diff --git a/media/libstagefright/OMXCodec.cpp b/media/libstagefright/OMXCodec.cpp index e71f16cb1f9960e4003f052fe5d137b601eb721e..79e767b9bc31369b2c96ec92b2e33f8bdc9f3ea6 100644 --- a/media/libstagefright/OMXCodec.cpp +++ b/media/libstagefright/OMXCodec.cpp @@ -200,6 +200,7 @@ static const CodecInfo kDecoderInfo[] = { { MEDIA_MIMETYPE_VIDEO_AVC, "OMX.qcom.video.decoder.avc" }, { MEDIA_MIMETYPE_VIDEO_AVC, "OMX.TI.Video.Decoder" }, { MEDIA_MIMETYPE_VIDEO_AVC, "OMX.SEC.AVC.Decoder" }, + { MEDIA_MIMETYPE_VIDEO_AVC, "OMX.google.h264.decoder" }, { MEDIA_MIMETYPE_VIDEO_AVC, "OMX.google.avc.decoder" }, { MEDIA_MIMETYPE_VIDEO_AVC, "AVCDecoder" }, { MEDIA_MIMETYPE_AUDIO_VORBIS, "OMX.google.vorbis.decoder" }, diff --git a/media/libstagefright/codecs/on2/h264dec/Android.mk b/media/libstagefright/codecs/on2/h264dec/Android.mk new file mode 100644 index 0000000000000000000000000000000000000000..5b3c87618749c6bcd68d8f5b92f085fffc64cfac --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/Android.mk @@ -0,0 +1,127 @@ +LOCAL_PATH := $(call my-dir) + +include $(CLEAR_VARS) + +LOCAL_ARM_MODE := arm + +LOCAL_SRC_FILES := \ + ./source/h264bsd_transform.c \ + ./source/h264bsd_util.c \ + ./source/h264bsd_byte_stream.c \ + ./source/h264bsd_seq_param_set.c \ + ./source/h264bsd_pic_param_set.c \ + ./source/h264bsd_slice_header.c \ + ./source/h264bsd_slice_data.c \ + ./source/h264bsd_macroblock_layer.c \ + ./source/h264bsd_stream.c \ + ./source/h264bsd_vlc.c \ + ./source/h264bsd_cavlc.c \ + ./source/h264bsd_nal_unit.c \ + ./source/h264bsd_neighbour.c \ + ./source/h264bsd_storage.c \ + ./source/h264bsd_slice_group_map.c \ + ./source/h264bsd_intra_prediction.c \ + ./source/h264bsd_inter_prediction.c \ + ./source/h264bsd_reconstruct.c \ + ./source/h264bsd_dpb.c \ + ./source/h264bsd_image.c \ + ./source/h264bsd_deblocking.c \ + ./source/h264bsd_conceal.c \ + ./source/h264bsd_vui.c \ + ./source/h264bsd_pic_order_cnt.c \ + ./source/h264bsd_decoder.c \ + ./source/H264SwDecApi.c \ + SoftAVC.cpp \ + +LOCAL_C_INCLUDES := $(LOCAL_PATH)/./inc \ + frameworks/base/media/libstagefright/include \ + frameworks/base/include/media/stagefright/openmax \ + +MY_ASM := \ + ./source/arm_neon_asm_gcc/h264bsdWriteMacroblock.S \ + ./source/arm_neon_asm_gcc/h264bsdClearMbLayer.S \ + ./source/arm_neon_asm_gcc/h264bsdFillRow7.S \ + ./source/arm_neon_asm_gcc/h264bsdCountLeadingZeros.S \ + ./source/arm_neon_asm_gcc/h264bsdFlushBits.S + + +MY_OMXDL_C_SRC := \ + ./omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c \ + ./omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c \ + ./omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_InterpolateChroma.c \ + ./omxdl/arm_neon/vc/m4p10/src/armVCM4P10_CAVLCTables.c \ + ./omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c \ + ./omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c \ + ./omxdl/arm_neon/src/armCOMM_Bitstream.c \ + ./omxdl/arm_neon/src/armCOMM.c + +MY_OMXDL_ASM_SRC := \ + ./omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DeblockingChroma_unsafe_s.S \ + ./omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DeblockingLuma_unsafe_s.S \ + ./omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_Interpolate_Chroma_s.S \ + ./omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_Align_unsafe_s.S \ + ./omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_Copy_unsafe_s.S \ + ./omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.S \ + ./omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.S \ + ./omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.S \ + ./omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.S \ + ./omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.S \ + ./omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.S \ + ./omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.S \ + ./omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.S \ + ./omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.S \ + ./omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_InterpolateLuma_s.S \ + ./omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_Average_4x_Align_unsafe_s.S \ + ./omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DecodeCoeffsToPair_s.S \ + ./omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DequantTables_s.S \ + ./omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_QuantTables_s.S \ + ./omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_TransformResidual4x4_s.S \ + ./omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_UnpackBlock4x4_s.S \ + ./omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_TransformDequantLumaDCFromPair_s.S \ + ./omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntra_16x16_s.S \ + ./omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntra_4x4_s.S \ + ./omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntraChroma_8x8_s.S \ + ./omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.S \ + ./omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_TransformDequantChromaDCFromPair_s.S \ + + +ifeq ($(ARCH_ARM_HAVE_NEON),true) + LOCAL_ARM_NEON := true +# LOCAL_CFLAGS := -std=c99 -D._NEON -D._OMXDL + LOCAL_CFLAGS := -DH264DEC_NEON -DH264DEC_OMXDL + LOCAL_SRC_FILES += $(MY_ASM) $(MY_OMXDL_C_SRC) $(MY_OMXDL_ASM_SRC) + LOCAL_C_INCLUDES += $(LOCAL_PATH)/./source/arm_neon_asm_gcc + LOCAL_C_INCLUDES += $(LOCAL_PATH)/./omxdl/arm_neon/api \ + $(LOCAL_PATH)/./omxdl/arm_neon/vc/api \ + $(LOCAL_PATH)/./omxdl/arm_neon/vc/m4p10/api +endif + +LOCAL_SHARED_LIBRARIES := \ + libstagefright libstagefright_omx libstagefright_foundation libutils \ + +LOCAL_MODULE := libstagefright_soft_h264dec + +LOCAL_MODULE_TAGS := optional + +include $(BUILD_SHARED_LIBRARY) + +##################################################################### +# test utility: decoder +##################################################################### +## +## Test application +## +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := ./source/DecTestBench.c + +LOCAL_C_INCLUDES := $(LOCAL_PATH)/inc + +LOCAL_SHARED_LIBRARIES := libstagefright_soft_h264dec + +LOCAL_MODULE_TAGS := debug + +LOCAL_MODULE := decoder + +include $(BUILD_EXECUTABLE) + diff --git a/media/libstagefright/codecs/on2/h264dec/SoftAVC.cpp b/media/libstagefright/codecs/on2/h264dec/SoftAVC.cpp new file mode 100644 index 0000000000000000000000000000000000000000..259fbc9a452a7155c0b3732142954ea380f97c69 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/SoftAVC.cpp @@ -0,0 +1,515 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +//#define LOG_NDEBUG 0 +#define LOG_TAG "SoftAVC" +#include + +#include "SoftAVC.h" + +#include +#include +#include + + +namespace android { + +template +static void InitOMXParams(T *params) { + params->nSize = sizeof(T); + params->nVersion.s.nVersionMajor = 1; + params->nVersion.s.nVersionMinor = 0; + params->nVersion.s.nRevision = 0; + params->nVersion.s.nStep = 0; +} + +SoftAVC::SoftAVC( + const char *name, + const OMX_CALLBACKTYPE *callbacks, + OMX_PTR appData, + OMX_COMPONENTTYPE **component) + : SimpleSoftOMXComponent(name, callbacks, appData, component), + mHandle(NULL), + mInputBufferCount(0), + mWidth(320), + mHeight(240), + mPictureSize(mWidth * mHeight * 3 / 2), + mCropLeft(0), + mCropTop(0), + mFirstPicture(NULL), + mFirstPictureId(-1), + mPicId(0), + mHeadersDecoded(false), + mEOSStatus(INPUT_DATA_AVAILABLE), + mOutputPortSettingsChange(NONE) { + initPorts(); + CHECK_EQ(initDecoder(), (status_t)OK); +} + +SoftAVC::~SoftAVC() { + H264SwDecRelease(mHandle); + mHandle = NULL; + + while (mPicToHeaderMap.size() != 0) { + OMX_BUFFERHEADERTYPE *header = mPicToHeaderMap.editValueAt(0); + mPicToHeaderMap.removeItemsAt(0); + delete header; + header = NULL; + } + List &outQueue = getPortQueue(kOutputPortIndex); + List &inQueue = getPortQueue(kInputPortIndex); + CHECK(outQueue.empty()); + CHECK(inQueue.empty()); + + delete[] mFirstPicture; +} + +void SoftAVC::initPorts() { + OMX_PARAM_PORTDEFINITIONTYPE def; + InitOMXParams(&def); + + def.nPortIndex = kInputPortIndex; + def.eDir = OMX_DirInput; + def.nBufferCountMin = kNumInputBuffers; + def.nBufferCountActual = def.nBufferCountMin; + def.nBufferSize = 8192; + def.bEnabled = OMX_TRUE; + def.bPopulated = OMX_FALSE; + def.eDomain = OMX_PortDomainVideo; + def.bBuffersContiguous = OMX_FALSE; + def.nBufferAlignment = 1; + + def.format.video.cMIMEType = const_cast(MEDIA_MIMETYPE_VIDEO_AVC); + def.format.video.pNativeRender = NULL; + def.format.video.nFrameWidth = mWidth; + def.format.video.nFrameHeight = mHeight; + def.format.video.nStride = def.format.video.nFrameWidth; + def.format.video.nSliceHeight = def.format.video.nFrameHeight; + def.format.video.nBitrate = 0; + def.format.video.xFramerate = 0; + def.format.video.bFlagErrorConcealment = OMX_FALSE; + def.format.video.eCompressionFormat = OMX_VIDEO_CodingAVC; + def.format.video.eColorFormat = OMX_COLOR_FormatUnused; + def.format.video.pNativeWindow = NULL; + + addPort(def); + + def.nPortIndex = kOutputPortIndex; + def.eDir = OMX_DirOutput; + def.nBufferCountMin = kNumOutputBuffers; + def.nBufferCountActual = def.nBufferCountMin; + def.bEnabled = OMX_TRUE; + def.bPopulated = OMX_FALSE; + def.eDomain = OMX_PortDomainVideo; + def.bBuffersContiguous = OMX_FALSE; + def.nBufferAlignment = 2; + + def.format.video.cMIMEType = const_cast(MEDIA_MIMETYPE_VIDEO_RAW); + def.format.video.pNativeRender = NULL; + def.format.video.nFrameWidth = mWidth; + def.format.video.nFrameHeight = mHeight; + def.format.video.nStride = def.format.video.nFrameWidth; + def.format.video.nSliceHeight = def.format.video.nFrameHeight; + def.format.video.nBitrate = 0; + def.format.video.xFramerate = 0; + def.format.video.bFlagErrorConcealment = OMX_FALSE; + def.format.video.eCompressionFormat = OMX_VIDEO_CodingUnused; + def.format.video.eColorFormat = OMX_COLOR_FormatYUV420Planar; + def.format.video.pNativeWindow = NULL; + + def.nBufferSize = + (def.format.video.nFrameWidth * def.format.video.nFrameHeight * 3) / 2; + + addPort(def); +} + +status_t SoftAVC::initDecoder() { + if (H264SwDecInit(&mHandle, 1) == H264SWDEC_OK) { + return OK; + } + return UNKNOWN_ERROR; +} + +OMX_ERRORTYPE SoftAVC::internalGetParameter( + OMX_INDEXTYPE index, OMX_PTR params) { + switch (index) { + case OMX_IndexParamVideoPortFormat: + { + OMX_VIDEO_PARAM_PORTFORMATTYPE *formatParams = + (OMX_VIDEO_PARAM_PORTFORMATTYPE *)params; + + if (formatParams->nPortIndex > kOutputPortIndex) { + return OMX_ErrorUndefined; + } + + if (formatParams->nIndex != 0) { + return OMX_ErrorNoMore; + } + + if (formatParams->nPortIndex == kInputPortIndex) { + formatParams->eCompressionFormat = OMX_VIDEO_CodingAVC; + formatParams->eColorFormat = OMX_COLOR_FormatUnused; + formatParams->xFramerate = 0; + } else { + CHECK(formatParams->nPortIndex == kOutputPortIndex); + + formatParams->eCompressionFormat = OMX_VIDEO_CodingUnused; + formatParams->eColorFormat = OMX_COLOR_FormatYUV420Planar; + formatParams->xFramerate = 0; + } + + return OMX_ErrorNone; + } + + default: + return SimpleSoftOMXComponent::internalGetParameter(index, params); + } +} + +OMX_ERRORTYPE SoftAVC::internalSetParameter( + OMX_INDEXTYPE index, const OMX_PTR params) { + switch (index) { + case OMX_IndexParamStandardComponentRole: + { + const OMX_PARAM_COMPONENTROLETYPE *roleParams = + (const OMX_PARAM_COMPONENTROLETYPE *)params; + + if (strncmp((const char *)roleParams->cRole, + "video_decoder.avc", + OMX_MAX_STRINGNAME_SIZE - 1)) { + return OMX_ErrorUndefined; + } + + return OMX_ErrorNone; + } + + case OMX_IndexParamVideoPortFormat: + { + OMX_VIDEO_PARAM_PORTFORMATTYPE *formatParams = + (OMX_VIDEO_PARAM_PORTFORMATTYPE *)params; + + if (formatParams->nPortIndex > kOutputPortIndex) { + return OMX_ErrorUndefined; + } + + if (formatParams->nIndex != 0) { + return OMX_ErrorNoMore; + } + + return OMX_ErrorNone; + } + + default: + return SimpleSoftOMXComponent::internalSetParameter(index, params); + } +} + +OMX_ERRORTYPE SoftAVC::getConfig( + OMX_INDEXTYPE index, OMX_PTR params) { + switch (index) { + case OMX_IndexConfigCommonOutputCrop: + { + OMX_CONFIG_RECTTYPE *rectParams = (OMX_CONFIG_RECTTYPE *)params; + + if (rectParams->nPortIndex != 1) { + return OMX_ErrorUndefined; + } + + rectParams->nLeft = mCropLeft; + rectParams->nTop = mCropTop; + rectParams->nWidth = mWidth; + rectParams->nHeight = mHeight; + + return OMX_ErrorNone; + } + + default: + return OMX_ErrorUnsupportedIndex; + } +} + +void SoftAVC::onQueueFilled(OMX_U32 portIndex) { + if (mOutputPortSettingsChange != NONE) { + return; + } + + if (mEOSStatus == OUTPUT_FRAMES_FLUSHED) { + return; + } + + List &inQueue = getPortQueue(kInputPortIndex); + List &outQueue = getPortQueue(kOutputPortIndex); + H264SwDecRet ret = H264SWDEC_PIC_RDY; + status_t err = OK; + bool portSettingsChanged = false; + while ((mEOSStatus != INPUT_DATA_AVAILABLE || !inQueue.empty()) + && outQueue.size() == kNumOutputBuffers) { + + if (mEOSStatus == INPUT_EOS_SEEN) { + drainAllOutputBuffers(); + return; + } + + BufferInfo *inInfo = *inQueue.begin(); + OMX_BUFFERHEADERTYPE *inHeader = inInfo->mHeader; + ++mPicId; + if (inHeader->nFlags & OMX_BUFFERFLAG_EOS) { + inQueue.erase(inQueue.begin()); + inInfo->mOwnedByUs = false; + notifyEmptyBufferDone(inHeader); + mEOSStatus = INPUT_EOS_SEEN; + continue; + } + + OMX_BUFFERHEADERTYPE *header = new OMX_BUFFERHEADERTYPE; + memset(header, 0, sizeof(OMX_BUFFERHEADERTYPE)); + header->nTimeStamp = inHeader->nTimeStamp; + header->nFlags = inHeader->nFlags; + mPicToHeaderMap.add(mPicId, header); + inQueue.erase(inQueue.begin()); + + H264SwDecInput inPicture; + H264SwDecOutput outPicture; + memset(&inPicture, 0, sizeof(inPicture)); + inPicture.dataLen = inHeader->nFilledLen; + inPicture.pStream = inHeader->pBuffer + inHeader->nOffset; + inPicture.picId = mPicId; + inPicture.intraConcealmentMethod = 1; + H264SwDecPicture decodedPicture; + + while (inPicture.dataLen > 0) { + ret = H264SwDecDecode(mHandle, &inPicture, &outPicture); + if (ret == H264SWDEC_HDRS_RDY_BUFF_NOT_EMPTY || + ret == H264SWDEC_PIC_RDY_BUFF_NOT_EMPTY) { + inPicture.dataLen -= (u32)(outPicture.pStrmCurrPos - inPicture.pStream); + inPicture.pStream = outPicture.pStrmCurrPos; + if (ret == H264SWDEC_HDRS_RDY_BUFF_NOT_EMPTY) { + mHeadersDecoded = true; + H264SwDecInfo decoderInfo; + CHECK(H264SwDecGetInfo(mHandle, &decoderInfo) == H264SWDEC_OK); + + if (handlePortSettingChangeEvent(&decoderInfo)) { + portSettingsChanged = true; + } + + if (decoderInfo.croppingFlag && + handleCropRectEvent(&decoderInfo.cropParams)) { + portSettingsChanged = true; + } + } + } else { + if (portSettingsChanged) { + if (H264SwDecNextPicture(mHandle, &decodedPicture, 0) + == H264SWDEC_PIC_RDY) { + + // Save this output buffer; otherwise, it will be + // lost during dynamic port reconfiguration because + // OpenMAX client will delete _all_ output buffers + // in the process. + saveFirstOutputBuffer( + decodedPicture.picId, + (uint8_t *)decodedPicture.pOutputPicture); + } + } + inPicture.dataLen = 0; + if (ret < 0) { + LOGE("Decoder failed: %d", ret); + err = ERROR_MALFORMED; + } + } + } + inInfo->mOwnedByUs = false; + notifyEmptyBufferDone(inHeader); + + if (portSettingsChanged) { + portSettingsChanged = false; + return; + } + + if (mFirstPicture && !outQueue.empty()) { + drainOneOutputBuffer(mFirstPictureId, mFirstPicture); + delete[] mFirstPicture; + mFirstPicture = NULL; + mFirstPictureId = -1; + } + + while (!outQueue.empty() && + mHeadersDecoded && + H264SwDecNextPicture(mHandle, &decodedPicture, 0) + == H264SWDEC_PIC_RDY) { + + int32_t picId = decodedPicture.picId; + uint8_t *data = (uint8_t *) decodedPicture.pOutputPicture; + drainOneOutputBuffer(picId, data); + } + + if (err != OK) { + notify(OMX_EventError, OMX_ErrorUndefined, err, NULL); + } + } +} + +bool SoftAVC::handlePortSettingChangeEvent(const H264SwDecInfo *info) { + if (mWidth != info->picWidth || mHeight != info->picHeight) { + mWidth = info->picWidth; + mHeight = info->picHeight; + mPictureSize = mWidth * mHeight * 3 / 2; + updatePortDefinitions(); + notify(OMX_EventPortSettingsChanged, 1, 0, NULL); + mOutputPortSettingsChange = AWAITING_DISABLED; + return true; + } + + return false; +} + +bool SoftAVC::handleCropRectEvent(const CropParams *crop) { + if (mCropLeft != crop->cropLeftOffset || + mCropTop != crop->cropTopOffset || + mWidth != crop->cropOutWidth || + mHeight != crop->cropOutHeight) { + + mCropLeft = crop->cropLeftOffset; + mCropTop = crop->cropTopOffset; + mWidth = crop->cropOutWidth; + mHeight = crop->cropOutHeight; + mPictureSize = mWidth * mHeight * 3 / 2; + + notify(OMX_EventPortSettingsChanged, 1, + OMX_IndexConfigCommonOutputCrop, NULL); + + return true; + } + return false; +} + +void SoftAVC::saveFirstOutputBuffer(int32_t picId, uint8_t *data) { + CHECK(mFirstPicture == NULL); + mFirstPictureId = picId; + + mFirstPicture = new uint8_t[mPictureSize]; + memcpy(mFirstPicture, data, mPictureSize); +} + +void SoftAVC::drainOneOutputBuffer(int32_t picId, uint8_t* data) { + List &outQueue = getPortQueue(kOutputPortIndex); + BufferInfo *outInfo = *outQueue.begin(); + outQueue.erase(outQueue.begin()); + OMX_BUFFERHEADERTYPE *outHeader = outInfo->mHeader; + OMX_BUFFERHEADERTYPE *header = mPicToHeaderMap.valueFor(picId); + outHeader->nTimeStamp = header->nTimeStamp; + outHeader->nFlags = header->nFlags; + outHeader->nFilledLen = mPictureSize; + memcpy(outHeader->pBuffer + outHeader->nOffset, + data, mPictureSize); + mPicToHeaderMap.removeItem(picId); + delete header; + outInfo->mOwnedByUs = false; + notifyFillBufferDone(outHeader); +} + +bool SoftAVC::drainAllOutputBuffers() { + List &outQueue = getPortQueue(kOutputPortIndex); + H264SwDecPicture decodedPicture; + + while (!outQueue.empty()) { + BufferInfo *outInfo = *outQueue.begin(); + outQueue.erase(outQueue.begin()); + OMX_BUFFERHEADERTYPE *outHeader = outInfo->mHeader; + if (mHeadersDecoded && + H264SWDEC_PIC_RDY == + H264SwDecNextPicture(mHandle, &decodedPicture, 1 /* flush */)) { + + int32_t picId = decodedPicture.picId; + CHECK(mPicToHeaderMap.indexOfKey(picId) >= 0); + + memcpy(outHeader->pBuffer + outHeader->nOffset, + decodedPicture.pOutputPicture, + mPictureSize); + + OMX_BUFFERHEADERTYPE *header = mPicToHeaderMap.valueFor(picId); + outHeader->nTimeStamp = header->nTimeStamp; + outHeader->nFlags = header->nFlags; + outHeader->nFilledLen = mPictureSize; + mPicToHeaderMap.removeItem(picId); + delete header; + } else { + outHeader->nTimeStamp = 0; + outHeader->nFilledLen = 0; + outHeader->nFlags = OMX_BUFFERFLAG_EOS; + mEOSStatus = OUTPUT_FRAMES_FLUSHED; + } + + outInfo->mOwnedByUs = false; + notifyFillBufferDone(outHeader); + } + + return true; +} + +void SoftAVC::onPortFlushCompleted(OMX_U32 portIndex) { + if (portIndex == kInputPortIndex) { + mEOSStatus = INPUT_DATA_AVAILABLE; + } +} + +void SoftAVC::onPortEnableCompleted(OMX_U32 portIndex, bool enabled) { + switch (mOutputPortSettingsChange) { + case NONE: + break; + + case AWAITING_DISABLED: + { + CHECK(!enabled); + mOutputPortSettingsChange = AWAITING_ENABLED; + break; + } + + default: + { + CHECK_EQ((int)mOutputPortSettingsChange, (int)AWAITING_ENABLED); + CHECK(enabled); + mOutputPortSettingsChange = NONE; + break; + } + } +} + +void SoftAVC::updatePortDefinitions() { + OMX_PARAM_PORTDEFINITIONTYPE *def = &editPortInfo(0)->mDef; + def->format.video.nFrameWidth = mWidth; + def->format.video.nFrameHeight = mHeight; + def->format.video.nStride = def->format.video.nFrameWidth; + def->format.video.nSliceHeight = def->format.video.nFrameHeight; + + def = &editPortInfo(1)->mDef; + def->format.video.nFrameWidth = mWidth; + def->format.video.nFrameHeight = mHeight; + def->format.video.nStride = def->format.video.nFrameWidth; + def->format.video.nSliceHeight = def->format.video.nFrameHeight; + + def->nBufferSize = + (def->format.video.nFrameWidth + * def->format.video.nFrameHeight * 3) / 2; +} + +} // namespace android + +android::SoftOMXComponent *createSoftOMXComponent( + const char *name, const OMX_CALLBACKTYPE *callbacks, + OMX_PTR appData, OMX_COMPONENTTYPE **component) { + return new android::SoftAVC(name, callbacks, appData, component); +} diff --git a/media/libstagefright/codecs/on2/h264dec/SoftAVC.h b/media/libstagefright/codecs/on2/h264dec/SoftAVC.h new file mode 100644 index 0000000000000000000000000000000000000000..a7340c0ad0761395951f2b3627d68e205da7d1f6 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/SoftAVC.h @@ -0,0 +1,105 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SOFT_AVC_H_ + +#define SOFT_AVC_H_ + +#include "SimpleSoftOMXComponent.h" +#include + +#include "H264SwDecApi.h" +#include "basetype.h" + +namespace android { + +struct SoftAVC : public SimpleSoftOMXComponent { + SoftAVC(const char *name, + const OMX_CALLBACKTYPE *callbacks, + OMX_PTR appData, + OMX_COMPONENTTYPE **component); + +protected: + virtual ~SoftAVC(); + + virtual OMX_ERRORTYPE internalGetParameter( + OMX_INDEXTYPE index, OMX_PTR params); + + virtual OMX_ERRORTYPE internalSetParameter( + OMX_INDEXTYPE index, const OMX_PTR params); + + virtual OMX_ERRORTYPE getConfig(OMX_INDEXTYPE index, OMX_PTR params); + + virtual void onQueueFilled(OMX_U32 portIndex); + virtual void onPortFlushCompleted(OMX_U32 portIndex); + virtual void onPortEnableCompleted(OMX_U32 portIndex, bool enabled); + +private: + enum { + kInputPortIndex = 0, + kOutputPortIndex = 1, + kNumInputBuffers = 8, + kNumOutputBuffers = 16, + }; + + enum EOSStatus { + INPUT_DATA_AVAILABLE, + INPUT_EOS_SEEN, + OUTPUT_FRAMES_FLUSHED, + }; + + void *mHandle; + + size_t mInputBufferCount; + + uint32_t mWidth, mHeight, mPictureSize; + uint32_t mCropLeft, mCropTop; + + uint8_t *mFirstPicture; + int32_t mFirstPictureId; + + int32_t mPicId; // Which output picture is for which input buffer? + + // OMX_BUFFERHEADERTYPE may be overkill, but it is convenient + // for tracking the following fields: nFlags, nTimeStamp, etc. + KeyedVector mPicToHeaderMap; + bool mHeadersDecoded; + + EOSStatus mEOSStatus; + + enum OutputPortSettingChange { + NONE, + AWAITING_DISABLED, + AWAITING_ENABLED + }; + OutputPortSettingChange mOutputPortSettingsChange; + + void initPorts(); + status_t initDecoder(); + void updatePortDefinitions(); + bool drainAllOutputBuffers(); + void drainOneOutputBuffer(int32_t picId, uint8_t *data); + void saveFirstOutputBuffer(int32_t pidId, uint8_t *data); + bool handleCropRectEvent(const CropParams* crop); + bool handlePortSettingChangeEvent(const H264SwDecInfo *info); + + DISALLOW_EVIL_CONSTRUCTORS(SoftAVC); +}; + +} // namespace android + +#endif // SOFT_AVC_H_ + diff --git a/media/libstagefright/codecs/on2/h264dec/inc/H264SwDecApi.h b/media/libstagefright/codecs/on2/h264dec/inc/H264SwDecApi.h new file mode 100755 index 0000000000000000000000000000000000000000..fe112bc35ae0fd4194e8543a812745cab4f43220 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/inc/H264SwDecApi.h @@ -0,0 +1,192 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include Headers + + 2. Enumerations used as a return value or a parameter. + 2.1. API's return value enumerations. + + 3. User Structures + 3.1. Structures for H264SwDecDecode() parameters. + 3.2. Structures for information interchange with + DEC API and user application. + + 4. Prototypes of Decoder API functions + +------------------------------------------------------------------------------*/ + +#ifndef H264SWDECAPI_H +#define H264SWDECAPI_H + +#ifdef __cplusplus +extern "C" +{ +#endif + +/*------------------------------------------------------------------------------ + 1. Include Headers +------------------------------------------------------------------------------*/ + + #include "basetype.h" + +/*------------------------------------------------------------------------------ + 2.1. API's return value enumerations. +------------------------------------------------------------------------------*/ + + typedef enum + { + H264SWDEC_OK = 0, + H264SWDEC_STRM_PROCESSED = 1, + H264SWDEC_PIC_RDY, + H264SWDEC_PIC_RDY_BUFF_NOT_EMPTY, + H264SWDEC_HDRS_RDY_BUFF_NOT_EMPTY, + H264SWDEC_PARAM_ERR = -1, + H264SWDEC_STRM_ERR = -2, + H264SWDEC_NOT_INITIALIZED = -3, + H264SWDEC_MEMFAIL = -4, + H264SWDEC_INITFAIL = -5, + H264SWDEC_HDRS_NOT_RDY = -6, + H264SWDEC_EVALUATION_LIMIT_EXCEEDED = -7 + } H264SwDecRet; + +/*------------------------------------------------------------------------------ + 3.1. Structures for H264SwDecDecode() parameters. +------------------------------------------------------------------------------*/ + + /* typedef of the Decoder instance */ + typedef void *H264SwDecInst; + + /* Input structure */ + typedef struct + { + u8 *pStream; /* Pointer to stream to be decoded */ + u32 dataLen; /* Number of bytes to be decoded */ + u32 picId; /* Identifier for the picture to be decoded */ + u32 intraConcealmentMethod; /* 0 = Gray concealment for intra + 1 = Reference concealment for intra */ + + } H264SwDecInput; + + + /* Output structure */ + typedef struct + { + u8 *pStrmCurrPos; /* Pointer to stream position where decoder + ended up */ + } H264SwDecOutput; + + /* Output structure for H264SwDecNextPicture */ + typedef struct + { + u32 *pOutputPicture; /* Pointer to the picture, YUV format */ + u32 picId; /* Identifier of the picture to be displayed*/ + u32 isIdrPicture; /* Flag to indicate if the picture is an + IDR picture */ + u32 nbrOfErrMBs; /* Number of concealed MB's in the picture */ + } H264SwDecPicture; + +/*------------------------------------------------------------------------------ + 3.2. Structures for information interchange with DEC API + and user application. +------------------------------------------------------------------------------*/ + + typedef struct + { + u32 cropLeftOffset; + u32 cropOutWidth; + u32 cropTopOffset; + u32 cropOutHeight; + } CropParams; + + typedef struct + { + u32 profile; + u32 picWidth; + u32 picHeight; + u32 videoRange; + u32 matrixCoefficients; + u32 parWidth; + u32 parHeight; + u32 croppingFlag; + CropParams cropParams; + } H264SwDecInfo; + + /* Version information */ + typedef struct + { + u32 major; /* Decoder API major version */ + u32 minor; /* Dncoder API minor version */ + } H264SwDecApiVersion; + +/*------------------------------------------------------------------------------ + 4. Prototypes of Decoder API functions +------------------------------------------------------------------------------*/ + + H264SwDecRet H264SwDecDecode(H264SwDecInst decInst, + H264SwDecInput *pInput, + H264SwDecOutput *pOutput); + + H264SwDecRet H264SwDecInit(H264SwDecInst *decInst, + u32 noOutputReordering); + + H264SwDecRet H264SwDecNextPicture(H264SwDecInst decInst, + H264SwDecPicture *pOutput, + u32 endOfStream); + + H264SwDecRet H264SwDecGetInfo(H264SwDecInst decInst, + H264SwDecInfo *pDecInfo); + + void H264SwDecRelease(H264SwDecInst decInst); + + H264SwDecApiVersion H264SwDecGetAPIVersion(void); + + /* function prototype for API trace */ + void H264SwDecTrace(char *); + + /* function prototype for memory allocation */ + void* H264SwDecMalloc(u32 size); + + /* function prototype for memory free */ + void H264SwDecFree(void *ptr); + + /* function prototype for memory copy */ + void H264SwDecMemcpy(void *dest, void *src, u32 count); + + /* function prototype for memset */ + void H264SwDecMemset(void *ptr, i32 value, u32 count); + + +#ifdef __cplusplus +} +#endif + +#endif /* H264SWDECAPI_H */ + + + + + + + + + + + + diff --git a/media/libstagefright/codecs/on2/h264dec/inc/basetype.h b/media/libstagefright/codecs/on2/h264dec/inc/basetype.h new file mode 100755 index 0000000000000000000000000000000000000000..63d56539dc134bbbaa7c95f822363aac7898f7a4 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/inc/basetype.h @@ -0,0 +1,52 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#ifndef BASETYPE_H_INCLUDED +#define BASETYPE_H_INCLUDED + + +#ifdef __arm +#define VOLATILE volatile +#else +#define VOLATILE +#endif + +typedef unsigned char u8; +typedef signed char i8; +typedef unsigned short u16; +typedef signed short i16; +typedef unsigned int u32; +typedef signed int i32; + +#if defined(VC1SWDEC_16BIT) || defined(MP4ENC_ARM11) +typedef unsigned short u16x; +typedef signed short i16x; +#else +typedef unsigned int u16x; +typedef signed int i16x; +#endif + + +#ifndef NULL +#ifdef __cplusplus +#define NULL 0 +#else +#define NULL ((void *)0) +#endif +#endif + +#endif /* BASETYPE_H_INCLUDED */ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/ARM_DELIVERY.TXT b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/ARM_DELIVERY.TXT new file mode 100644 index 0000000000000000000000000000000000000000..5ce70cada2b7633395e39710ef28c16bfdabe4eb --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/ARM_DELIVERY.TXT @@ -0,0 +1,63 @@ +The contents of this transaction was created by Hedley Francis +of ARM on 19-Feb-2008. + +It contains the ARM data versions listed below. + +This data, unless otherwise stated, is ARM Proprietary and access to it +is subject to the agreements indicated below. + +If you experience problems with this data, please contact ARM support +quoting transaction reference <97413>. + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +- OX001-SW-98010-r0p0-00bet1 + Video codecs - optimised code + V6 optimized code release for Hantro (Ver 1.0.2) + internal access + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +This transaction contains deliverables which are designated as being of +beta release status (BET). + +Beta release status has a particular meaning to ARM of which the recipient +must be aware. Beta is a pre-release status indicating that the deliverable +so described is believed to robustly demonstrate specified behaviour, to be +consistent across its included aspects and be ready for general deployment. +But Beta also indicates that pre-release reliability trials are ongoing and +that it is possible residual defects or errors in operation, consistency +and documentation may still be encountered. The recipient should consider +this position when using this Beta material supplied. ARM will normally +attempt to provide fixes or a work-around for defects identified by the +recipient, but the provision or timeliness of this support cannot be +guaranteed. ARM shall not be responsible for direct or consequential +damages as a result of encountering one or more of these residual defects. +By accepting a Beta release, the recipient agrees to these constraints and +to providing reasonable information to ARM to enable the replication of the +defects identified by the recipient. The specific Beta version supplied +will not be supported after release of a later or higher status version. +It should be noted that Support for the Beta release of the deliverable +will only be provided by ARM to a recipient who has a current support and +maintenance contract for the deliverable. + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +In addition to the data versions listed above, this transaction contains +two additional files at the top level. + +The first is this file, ARM_DELIVERY_97413.TXT, which is the delivery +note. + +The second is ARM_MANIFEST_97413.TXT which contains a manifest of all the +files included in this transaction, together with their checksums. + +The checksums provided are calculated using the RSA Data Security, Inc. +MD5 Message-Digest Algorithm. + +The checksums can be used to verify the integrity of this data using the +"md5sum" tool (which is part of the GNU "textutils" package) by running: + + % md5sum --check ARM_MANIFEST_97413.TXT + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/ARM_MANIFEST.TXT b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/ARM_MANIFEST.TXT new file mode 100644 index 0000000000000000000000000000000000000000..9b2238bab5c47b5820fc403eee6f3bdf14f55f28 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/ARM_MANIFEST.TXT @@ -0,0 +1,91 @@ + OX001-SW-98010-r0p0-00bet1/ + OX001-SW-98010-r0p0-00bet1/api/ +e049791cfab6060a08cbac7b3ad767d6 OX001-SW-98010-r0p0-00bet1/api/armCOMM_s.h +ed798face25497b2703ede736d6d52b6 OX001-SW-98010-r0p0-00bet1/api/omxtypes_s.h +4eebd63af087376811d6749f0646b864 OX001-SW-98010-r0p0-00bet1/api/armCOMM_BitDec_s.h +43cf46c2cf2fe1f93c615b57bcbe4809 OX001-SW-98010-r0p0-00bet1/api/armCOMM.h +8f248ceaac8f602e277a521b679dcbbe OX001-SW-98010-r0p0-00bet1/api/armCOMM_IDCTTable.h +53f2ae8a98495f05e26a4cf862a7f750 OX001-SW-98010-r0p0-00bet1/api/armCOMM_Version.h +3a2f420ddf6a1b950470bd0f5ebd5c62 OX001-SW-98010-r0p0-00bet1/api/armCOMM_IDCT_s.h +511c0bb534fe223599e2c84eff24c9ed OX001-SW-98010-r0p0-00bet1/api/armCOMM_MaskTable.h +8971932d56eed6b1ad1ba507f0bff5f0 OX001-SW-98010-r0p0-00bet1/api/armCOMM_Bitstream.h +f87fedd9ca432fefa757008176864ef8 OX001-SW-98010-r0p0-00bet1/api/armOMX.h +8e49899a428822c36ef9dd94e0e05f18 OX001-SW-98010-r0p0-00bet1/api/omxtypes.h +694281d11af52f88e6f9d4cb226ac8a7 OX001-SW-98010-r0p0-00bet1/build_vc.pl +e72d96c0a415459748df9807f3dae72f OX001-SW-98010-r0p0-00bet1/filelist_vc.txt + OX001-SW-98010-r0p0-00bet1/src/ +5eeae659a29477f5c52296d24afffd3c OX001-SW-98010-r0p0-00bet1/src/armCOMM_IDCTTable.c +d64cdcf38f7749dc7f77465e5b7d356d OX001-SW-98010-r0p0-00bet1/src/armCOMM_MaskTable.c + OX001-SW-98010-r0p0-00bet1/vc/ + OX001-SW-98010-r0p0-00bet1/vc/m4p10/ + OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/ +e7e0c320978564a7c9b2c723749a98d6 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_CAVLCTables.c +4adcd0df081990bdfc4729041a2a9152 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_InterpolateChroma.c +852e0404142965dc1f3aa7f00ee5127b OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_UnpackBlock4x4_s.s +7054151c5bfea6b5e74feee86b2d7b01 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c +38944c5e0bba01e32ff349c2c87c71b2 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_DequantTables_s.s +32ff4b8be62e2f0f3e764b83c1e5e2fd OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c +90b0e6a04e764902c0a0903640c10b32 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_DeblockingLuma_unsafe_s.s +28a19ae4fe2258628080d6a89bb54b91 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_DeblockingChroma_unsafe_s.s +98e196b9e1ffebaf91f62ea9d17fb97d OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_InterpolateLuma_s.s +01ba60eff66ea49a4f833ce6279f8e2f OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c +f301d5a95e07354f593ea5747c01cb0a OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s +44c9ef21e840a100301f7d7a4189957c OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s +a33b03bbd3352d24ed744769e12bb87d OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s +00c20bfda67bb86096b615fc17c94b35 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair_s.s +2ddcaf60a8ea1e6e6b77737f768bfb9d OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_QuantTables_s.s +c3002aad5600f872b70a5d7fe3915846 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_Align_unsafe_s.s +a2900f2c47f1c61d20bd6c1eda33d6d4 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_Copy_unsafe_s.s +e4fecd66bc47f07539bc308935e84a1f OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_PredictIntra_4x4_s.s +78815c9df50ba53131bb22d2b829e3c3 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair_s.s +1909ae312ac79a03a5fac1d1e8bc0291 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s +3d2c48580655928065de7839866d9bc4 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s +23aa2fdf155d4fa6ff745eab6e01f32b OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_PredictIntra_16x16_s.s +97f20a93c481d7f6173d919f41e415bd OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s +becd512da202436286811b6aec061f47 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s +dd24a99ae3cd842dcacaf31d47de88b3 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.s +c2d995f787b6f44ef10c751c12d1935f OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s +3628fbdf0cd217c287b6ccc94135d06e OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8_s.s +4a52b3e9e268b8a8f07829bf500d03af OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair_s.s +11249f8a98c5d4b84cb5575b0e37ca9c OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_Average_4x_Align_unsafe_s.s +3599b1074330965c8ca285d164efccff OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_Interpolate_Chroma_s.s +3339e026c7de655d9400949eb5e51451 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_TransformResidual4x4_s.s +cc4a6f32db0b72a91d3f278f6855df69 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c + OX001-SW-98010-r0p0-00bet1/vc/m4p10/api/ +6e530ddaa7c2b57ffe88162c020cb662 OX001-SW-98010-r0p0-00bet1/vc/m4p10/api/armVCM4P10_CAVLCTables.h + OX001-SW-98010-r0p0-00bet1/vc/m4p2/ + OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/ +cdf412920c2037a725d0420002b6752e OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_Clip8_s.s +dba9824e959b21d401cac925e68a11a6 OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter_s.s +b559b71d5c94627f10e616fb72c0cefc OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I_s.s +4fba4c431a783a78a2eb6497a94ac967 OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c +1e4c3be8c5eddc00c9f05e83bcf315ef OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_MCReconBlock_s.s +1b0b2990c2669dfb87cf6b810611c01b OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c +1c9b87abf3283e957816b3937c680701 OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP_s.s +4fe1afca659a9055fc1172e58f78a506 OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c +2ea067f0436f91ba1351edaf411cb4ea OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_Lookup_Tables.c +acb92be1dbcdb3ebe824cbe9e28d03bf OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_IDCT8x8blk_s.s +a6b41f01b1df7dd656ebdba3084bfa2a OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_FindMVpred_s.s +293a48a648a3085456e6665bb7366fad OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_SetPredDir_s.s +ffe6b96c74d4881f4d3c8de8cc737797 OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_QuantInvInter_I_s.s +437dfa204508850d61d4b87091446e9f OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s +ff5915d181bfd2cd2f0bd588bd2300dc OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s +6775eb0c561dbab965c60f85b08c96fd OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s +a0d85f4f517c945a4c9317ac021f2d08 OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s +386020dee8b725c7fe2526f1fc211d7d OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c + OX001-SW-98010-r0p0-00bet1/vc/m4p2/api/ +4624e7c838e10a249abcc3d3f4f40748 OX001-SW-98010-r0p0-00bet1/vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h +65e1057d04e2cb844559dc9f6e09795a OX001-SW-98010-r0p0-00bet1/vc/m4p2/api/armVCM4P2_ZigZag_Tables.h + OX001-SW-98010-r0p0-00bet1/vc/src/ +e627b3346b0dc9aff14446005ce0fa43 OX001-SW-98010-r0p0-00bet1/vc/src/armVC_Version.c + OX001-SW-98010-r0p0-00bet1/vc/api/ +7ca94b1c33ac0211e17d38baadd7d1dd OX001-SW-98010-r0p0-00bet1/vc/api/armVC.h +12cf7596edbbf6048b626d15e8d0ed48 OX001-SW-98010-r0p0-00bet1/vc/api/omxVC.h +11726e286a81257cb45f5547fb4d374c OX001-SW-98010-r0p0-00bet1/vc/api/omxVC_s.h +a5b2af605c319cd2491319e430741377 OX001-SW-98010-r0p0-00bet1/vc/api/armVCCOMM_s.h + OX001-SW-98010-r0p0-00bet1/vc/comm/ + OX001-SW-98010-r0p0-00bet1/vc/comm/src/ +50cca6954c447b012ab39ca7872e5e8f OX001-SW-98010-r0p0-00bet1/vc/comm/src/omxVCCOMM_Copy16x16_s.s +d1c3bce77fc5774c899b447d13f02cd0 OX001-SW-98010-r0p0-00bet1/vc/comm/src/omxVCCOMM_Copy8x8_s.s +fdac1d1bad3fd23c880beb39bc2e89aa OX001-SW-98010-r0p0-00bet1/vc/comm/src/omxVCCOMM_ExpandFrame_I_s.s +6d9adc2be5bd0311591030d0c6df771c ARM_DELIVERY_97413.TXT diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM.h new file mode 100644 index 0000000000000000000000000000000000000000..2ed86a466390016f8c431bd255030e4fec86aa71 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM.h @@ -0,0 +1,785 @@ +/** + * + * File Name: armCOMM.h + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * File: armCOMM.h + * Brief: Declares Common APIs/Data Types used across OpenMAX API's + * + */ + + +#ifndef _armCommon_H_ +#define _armCommon_H_ + +#include "omxtypes.h" + +typedef struct +{ + OMX_F32 Re; /** Real part */ + OMX_F32 Im; /** Imaginary part */ + +} OMX_FC32; /** single precision floating point complex number */ + +typedef struct +{ + OMX_F64 Re; /** Real part */ + OMX_F64 Im; /** Imaginary part */ + +} OMX_FC64; /** double precision floating point complex number */ + + +/* Used by both IP and IC domains for 8x8 JPEG blocks. */ +typedef OMX_S16 ARM_BLOCK8x8[64]; + + +#include "armOMX.h" + +#define armPI (OMX_F64)(3.1415926535897932384626433832795) + +/***********************************************************************/ + +/* Compiler extensions */ +#ifdef ARM_DEBUG +/* debug version */ +#include +#include +#include +#define armError(str) {printf((str)); printf("\n"); exit(-1);} +#define armWarn(str) {printf((str)); printf("\n");} +#define armIgnore(a) ((void)a) +#define armAssert(a) assert(a) +#else +/* release version */ +#define armError(str) ((void) (str)) +#define armWarn(str) ((void) (str)) +#define armIgnore(a) ((void) (a)) +#define armAssert(a) ((void) (a)) +#endif /* ARM_DEBUG */ + +/* Arithmetic operations */ + +#define armMin(a,b) ( (a) > (b) ? (b):(a) ) +#define armMax(a,b) ( (a) > (b) ? (a):(b) ) +#define armAbs(a) ( (a) < 0 ? -(a):(a) ) + +/* Alignment operation */ + +#define armAlignToBytes(Ptr,N) (Ptr + ( ((N-(int)Ptr)&(N-1)) / sizeof(*Ptr) )) +#define armAlignTo2Bytes(Ptr) armAlignToBytes(Ptr,2) +#define armAlignTo4Bytes(Ptr) armAlignToBytes(Ptr,4) +#define armAlignTo8Bytes(Ptr) armAlignToBytes(Ptr,8) +#define armAlignTo16Bytes(Ptr) armAlignToBytes(Ptr,16) + +/* Error and Alignment check */ + +#define armRetArgErrIf(condition, code) if(condition) { return (code); } +#define armRetDataErrIf(condition, code) if(condition) { return (code); } + +#ifndef ALIGNMENT_DOESNT_MATTER +#define armIsByteAligned(Ptr,N) ((((int)(Ptr)) % N)==0) +#define armNotByteAligned(Ptr,N) ((((int)(Ptr)) % N)!=0) +#else +#define armIsByteAligned(Ptr,N) (1) +#define armNotByteAligned(Ptr,N) (0) +#endif + +#define armIs2ByteAligned(Ptr) armIsByteAligned(Ptr,2) +#define armIs4ByteAligned(Ptr) armIsByteAligned(Ptr,4) +#define armIs8ByteAligned(Ptr) armIsByteAligned(Ptr,8) +#define armIs16ByteAligned(Ptr) armIsByteAligned(Ptr,16) + +#define armNot2ByteAligned(Ptr) armNotByteAligned(Ptr,2) +#define armNot4ByteAligned(Ptr) armNotByteAligned(Ptr,4) +#define armNot8ByteAligned(Ptr) armNotByteAligned(Ptr,8) +#define armNot16ByteAligned(Ptr) armNotByteAligned(Ptr,16) +#define armNot32ByteAligned(Ptr) armNotByteAligned(Ptr,32) + +/** + * Function: armRoundFloatToS16_ref/armRoundFloatToS32_ref/armRoundFloatToS64 + * + * Description: + * Converts a double precision value into a short int/int after rounding + * + * Parameters: + * [in] Value Float value to be converted + * + * Return Value: + * [out] converted value in OMX_S16/OMX_S32 format + * + */ + +OMX_S16 armRoundFloatToS16 (OMX_F64 Value); +OMX_S32 armRoundFloatToS32 (OMX_F64 Value); +OMX_S64 armRoundFloatToS64 (OMX_F64 Value); + +/** + * Function: armSatRoundFloatToS16_ref/armSatRoundFloatToS32 + * + * Description: + * Converts a double precision value into a short int/int after rounding and saturation + * + * Parameters: + * [in] Value Float value to be converted + * + * Return Value: + * [out] converted value in OMX_S16/OMX_S32 format + * + */ + +OMX_S16 armSatRoundFloatToS16 (OMX_F64 Value); +OMX_S32 armSatRoundFloatToS32 (OMX_F64 Value); + +/** + * Function: armSatRoundFloatToU16_ref/armSatRoundFloatToU32 + * + * Description: + * Converts a double precision value into a unsigned short int/int after rounding and saturation + * + * Parameters: + * [in] Value Float value to be converted + * + * Return Value: + * [out] converted value in OMX_U16/OMX_U32 format + * + */ + +OMX_U16 armSatRoundFloatToU16 (OMX_F64 Value); +OMX_U32 armSatRoundFloatToU32 (OMX_F64 Value); + +/** + * Function: armSignCheck + * + * Description: + * Checks the sign of a variable: + * returns 1 if it is Positive + * returns 0 if it is 0 + * returns -1 if it is Negative + * + * Remarks: + * + * Parameters: + * [in] var Variable to be checked + * + * Return Value: + * OMX_INT -- returns 1 if it is Positive + * returns 0 if it is 0 + * returns -1 if it is Negative + */ + +OMX_INT armSignCheck (OMX_S16 var); + +/** + * Function: armClip + * + * Description: Clips the input between MAX and MIN value + * + * + * Remarks: + * + * Parameters: + * [in] Min lower bound + * [in] Max upper bound + * [in] src variable to the clipped + * + * Return Value: + * OMX_S32 -- returns clipped value + */ + +OMX_S32 armClip ( + OMX_INT min, + OMX_INT max, + OMX_S32 src + ); + +/** + * Function: armClip_F32 + * + * Description: Clips the input between MAX and MIN value + * + * + * Remarks: + * + * Parameters: + * [in] Min lower bound + * [in] Max upper bound + * [in] src variable to the clipped + * + * Return Value: + * OMX_F32 -- returns clipped value + */ + +OMX_F32 armClip_F32 ( + OMX_F32 min, + OMX_F32 max, + OMX_F32 src + ); + +/** + * Function: armShiftSat_F32 + * + * Description: Divides a float value by 2^shift and + * saturates it for unsigned value range for satBits. + * Second parameter is like "shifting" the corresponding + * integer value. Takes care of rounding while clipping the final + * value. + * + * Parameters: + * [in] v Number to be operated upon + * [in] shift Divides the input "v" by "2^shift" + * [in] satBits Final range is [0, 2^satBits) + * + * Return Value: + * OMX_S32 -- returns "shifted" saturated value + */ + +OMX_U32 armShiftSat_F32( + OMX_F32 v, + OMX_INT shift, + OMX_INT satBits + ); + +/** + * Functions: armSwapElem + * + * Description: + * This function swaps two elements at the specified pointer locations. + * The size of each element could be anything as specified by + * + * Return Value: + * OMXResult -- Error status from the function + */ +OMXResult armSwapElem(OMX_U8 *pBuf1, OMX_U8 *pBuf2, OMX_INT elemSize); + + +/** + * Function: armMedianOf3 + * + * Description: Finds the median of three numbers + * + * Remarks: + * + * Parameters: + * [in] fEntry First entry + * [in] sEntry second entry + * [in] tEntry Third entry + * + * Return Value: + * OMX_S32 -- returns the median value + */ + +OMX_S32 armMedianOf3 ( + OMX_S32 fEntry, + OMX_S32 sEntry, + OMX_S32 tEntry + ); + +/** + * Function: armLogSize + * + * Description: Finds the size of a positive value and returns the same + * + * Remarks: + * + * Parameters: + * [in] value Positive value + * + * Return Value: + * OMX_U8 -- returns the size of the positive value + */ + +OMX_U8 armLogSize ( + OMX_U16 value + ); + +/***********************************************************************/ + /* Saturating Arithmetic operations */ + +/** + * Function :armSatAdd_S32() + * + * Description : + * Returns the result of saturated addition of the two inputs Value1, Value2 + * + * Parametrs: + * [in] Value1 First Operand + * [in] Value2 Second Operand + * + * Return: + * [out] Result of operation + * + * + **/ + +OMX_S32 armSatAdd_S32( + OMX_S32 Value1, + OMX_S32 Value2 + ); + +/** + * Function :armSatAdd_S64() + * + * Description : + * Returns the result of saturated addition of the two inputs Value1, Value2 + * + * Parametrs: + * [in] Value1 First Operand + * [in] Value2 Second Operand + * + * Return: + * [out] Result of operation + * + * + **/ + +OMX_S64 armSatAdd_S64( + OMX_S64 Value1, + OMX_S64 Value2 + ); + +/** Function :armSatSub_S32() + * + * Description : + * Returns the result of saturated substraction of the two inputs Value1, Value2 + * + * Parametrs: + * [in] Value1 First Operand + * [in] Value2 Second Operand + * + * Return: + * [out] Result of operation + * + **/ + +OMX_S32 armSatSub_S32( + OMX_S32 Value1, + OMX_S32 Value2 + ); + +/** + * Function :armSatMac_S32() + * + * Description : + * Returns the result of Multiplication of Value1 and Value2 and subesquent saturated + * accumulation with Mac + * + * Parametrs: + * [in] Value1 First Operand + * [in] Value2 Second Operand + * [in] Mac Accumulator + * + * Return: + * [out] Result of operation + **/ + +OMX_S32 armSatMac_S32( + OMX_S32 Mac, + OMX_S16 Value1, + OMX_S16 Value2 + ); + +/** + * Function :armSatMac_S16S32_S32 + * + * Description : + * Returns the result of saturated MAC operation of the three inputs delayElem, filTap , mac + * + * mac = mac + Saturate_in_32Bits(delayElem * filTap) + * + * Parametrs: + * [in] delayElem First 32 bit Operand + * [in] filTap Second 16 bit Operand + * [in] mac Result of MAC operation + * + * Return: + * [out] mac Result of operation + * + **/ + +OMX_S32 armSatMac_S16S32_S32( + OMX_S32 mac, + OMX_S32 delayElem, + OMX_S16 filTap ); + +/** + * Function :armSatRoundRightShift_S32_S16 + * + * Description : + * Returns the result of rounded right shift operation of input by the scalefactor + * + * output = Saturate_in_16Bits( ( RightShift( (Round(input) , scaleFactor ) ) + * + * Parametrs: + * [in] input The input to be operated on + * [in] scaleFactor The shift number + * + * Return: + * [out] Result of operation + * + **/ + + +OMX_S16 armSatRoundRightShift_S32_S16( + OMX_S32 input, + OMX_INT scaleFactor); + +/** + * Function :armSatRoundLeftShift_S32() + * + * Description : + * Returns the result of saturating left-shift operation on input + * Or rounded Right shift if the input Shift is negative. + * + * Parametrs: + * [in] Value Operand + * [in] shift Operand for shift operation + * + * Return: + * [out] Result of operation + * + **/ + +OMX_S32 armSatRoundLeftShift_S32( + OMX_S32 Value, + OMX_INT shift + ); + +/** + * Function :armSatRoundLeftShift_S64() + * + * Description : + * Returns the result of saturating left-shift operation on input + * Or rounded Right shift if the input Shift is negative. + * + * Parametrs: + * [in] Value Operand + * [in] shift Operand for shift operation + * + * Return: + * [out] Result of operation + * + **/ + +OMX_S64 armSatRoundLeftShift_S64( + OMX_S64 Value, + OMX_INT shift + ); + +/** + * Function :armSatMulS16S32_S32() + * + * Description : + * Returns the result of a S16 data type multiplied with an S32 data type + * in a S32 container + * + * Parametrs: + * [in] input1 Operand 1 + * [in] input2 Operand 2 + * + * Return: + * [out] Result of operation + * + **/ + + +OMX_S32 armSatMulS16S32_S32( + OMX_S16 input1, + OMX_S32 input2); + +/** + * Function :armSatMulS32S32_S32() + * + * Description : + * Returns the result of a S32 data type multiplied with an S32 data type + * in a S32 container + * + * Parametrs: + * [in] input1 Operand 1 + * [in] input2 Operand 2 + * + * Return: + * [out] Result of operation + * + **/ + +OMX_S32 armSatMulS32S32_S32( + OMX_S32 input1, + OMX_S32 input2); + + +/** + * Function :armIntDivAwayFromZero() + * + * Description : Integer division with rounding to the nearest integer. + * Half-integer values are rounded away from zero + * unless otherwise specified. For example 3//2 is rounded + * to 2, and -3//2 is rounded to -2. + * + * Parametrs: + * [in] Num Operand 1 + * [in] Deno Operand 2 + * + * Return: + * [out] Result of operation input1//input2 + * + **/ + +OMX_S32 armIntDivAwayFromZero (OMX_S32 Num, OMX_S32 Deno); + + +/***********************************************************************/ +/* + * Debugging macros + * + */ + + +/* + * Definition of output stream - change to stderr if necessary + */ +#define DEBUG_STREAM stdout + +/* + * Debug printf macros, one for each argument count. + * Add more if needed. + */ +#ifdef DEBUG_ON +#include + +#define DEBUG_PRINTF_0(a) fprintf(DEBUG_STREAM, a) +#define DEBUG_PRINTF_1(a, b) fprintf(DEBUG_STREAM, a, b) +#define DEBUG_PRINTF_2(a, b, c) fprintf(DEBUG_STREAM, a, b, c) +#define DEBUG_PRINTF_3(a, b, c, d) fprintf(DEBUG_STREAM, a, b, c, d) +#define DEBUG_PRINTF_4(a, b, c, d, e) fprintf(DEBUG_STREAM, a, b, c, d, e) +#define DEBUG_PRINTF_5(a, b, c, d, e, f) fprintf(DEBUG_STREAM, a, b, c, d, e, f) +#define DEBUG_PRINTF_6(a, b, c, d, e, f, g) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g) +#define DEBUG_PRINTF_7(a, b, c, d, e, f, g, h) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h) +#define DEBUG_PRINTF_8(a, b, c, d, e, f, g, h, i) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i) +#define DEBUG_PRINTF_9(a, b, c, d, e, f, g, h, i, j) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j) +#define DEBUG_PRINTF_10(a, b, c, d, e, f, g, h, i, j, k) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k) +#define DEBUG_PRINTF_11(a, b, c, d, e, f, g, h, i, j, k, l) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k, l) +#define DEBUG_PRINTF_12(a, b, c, d, e, f, g, h, i, j, k, l, m) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k, l, m) +#define DEBUG_PRINTF_13(a, b, c, d, e, f, g, h, i, j, k, l, m, n) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k, l, m, n) +#define DEBUG_PRINTF_14(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o) +#else /* DEBUG_ON */ +#define DEBUG_PRINTF_0(a) +#define DEBUG_PRINTF_1(a, b) +#define DEBUG_PRINTF_2(a, b, c) +#define DEBUG_PRINTF_3(a, b, c, d) +#define DEBUG_PRINTF_4(a, b, c, d, e) +#define DEBUG_PRINTF_5(a, b, c, d, e, f) +#define DEBUG_PRINTF_6(a, b, c, d, e, f, g) +#define DEBUG_PRINTF_7(a, b, c, d, e, f, g, h) +#define DEBUG_PRINTF_8(a, b, c, d, e, f, g, h, i) +#define DEBUG_PRINTF_9(a, b, c, d, e, f, g, h, i, j) +#define DEBUG_PRINTF_10(a, b, c, d, e, f, g, h, i, j, k) +#define DEBUG_PRINTF_11(a, b, c, d, e, f, g, h, i, j, k, l) +#define DEBUG_PRINTF_12(a, b, c, d, e, f, g, h, i, j, k, l, m) +#define DEBUG_PRINTF_13(a, b, c, d, e, f, g, h, i, j, k, l, m, n) +#define DEBUG_PRINTF_14(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o) +#endif /* DEBUG_ON */ + + +/* + * Domain and sub domain definitions + * + * In order to turn on debug for an entire domain or sub-domain + * at compile time, one of the DEBUG_DOMAIN_* below may be defined, + * which will activate debug in all of the defines it contains. + */ + +#ifdef DEBUG_DOMAIN_AC +#define DEBUG_OMXACAAC_DECODECHANPAIRELT_MPEG4 +#define DEBUG_OMXACAAC_DECODECHANPAIRELT +#define DEBUG_OMXACAAC_DECODEDATSTRELT +#define DEBUG_OMXACAAC_DECODEFILLELT +#define DEBUG_OMXACAAC_DECODEISSTEREO_S32 +#define DEBUG_OMXACAAC_DECODEMSPNS_S32 +#define DEBUG_OMXACAAC_DECODEMSSTEREO_S32_I +#define DEBUG_OMXACAAC_DECODEPRGCFGELT +#define DEBUG_OMXACAAC_DECODETNS_S32_I +#define DEBUG_OMXACAAC_DEINTERLEAVESPECTRUM_S32 +#define DEBUG_OMXACAAC_ENCODETNS_S32_I +#define DEBUG_OMXACAAC_LONGTERMPREDICT_S32 +#define DEBUG_OMXACAAC_LONGTERMRECONSTRUCT_S32 +#define DEBUG_OMXACAAC_MDCTFWD_S32 +#define DEBUG_OMXACAAC_MDCTINV_S32_S16 +#define DEBUG_OMXACAAC_NOISELESSDECODE +#define DEBUG_OMXACAAC_QUANTINV_S32_I +#define DEBUG_OMXACAAC_UNPACKADIFHEADER +#define DEBUG_OMXACAAC_UNPACKADTSFRAMEHEADER +#define DEBUG_OMXACMP3_HUFFMANDECODESFBMBP_S32 +#define DEBUG_OMXACMP3_HUFFMANDECODESFB_S32 +#define DEBUG_OMXACMP3_HUFFMANDECODE_S32 +#define DEBUG_OMXACMP3_MDCTINV_S32 +#define DEBUG_OMXACMP3_REQUANTIZESFB_S32_I +#define DEBUG_OMXACMP3_REQUANTIZE_S32_I +#define DEBUG_OMXACMP3_SYNTHPQMF_S32_S16 +#define DEBUG_OMXACMP3_UNPACKFRAMEHEADER +#define DEBUG_OMXACMP3_UNPACKSCALEFACTORS_S8 +#define DEBUG_OMXACMP3_UNPACKSIDEINFO +#endif /* DEBUG_DOMAIN_AC */ + + +#ifdef DEBUG_DOMAIN_VC +#define DEBUG_OMXVCM4P10_AVERAGE_16X +#define DEBUG_OMXVCM4P10_AVERAGE_4X +#define DEBUG_OMXVCM4P10_AVERAGE_8X +#define DEBUG_OMXVCM4P10_DEBLOCKCHROMA_U8_C1IR +#define DEBUG_OMXVCM4P10_DEBLOCKLUMA_U8_C1IR +#define DEBUG_OMXVCM4P10_DECODECHROMADCCOEFFSTOPAIRCAVLC_U8 +#define DEBUG_OMXVCM4P10_DECODECOEFFSTOPAIRCAVLC_U8 +#define DEBUG_OMXVCM4P10_DEQUANTTRANSFORMACFROMPAIR_U8_S16_C1_DLX +#define DEBUG_OMXVCM4P10_EXPANDFRAME +#define DEBUG_OMXVCM4P10_FILTERDEBLOCKINGCHROMA_HOREDGE_U8_C1IR +#define DEBUG_OMXVCM4P10_FILTERDEBLOCKINGCHROMA_VEREDGE_U8_C1IR +#define DEBUG_OMXVCM4P10_FILTERDEBLOCKINGLUMA_HOREDGE_U8_C1IR +#define DEBUG_OMXVCM4P10_FILTERDEBLOCKINGLUMA_VEREDGE_U8_C1IR +#define DEBUG_OMXVCM4P10_PREDICTINTRACHROMA8X8_U8_C1R +#define DEBUG_OMXVCM4P10_PREDICTINTRA_16X16_U8_C1R +#define DEBUG_OMXVCM4P10_PREDICTINTRA_4X4_U8_C1R +#define DEBUG_OMXVCM4P10_SADQUAR_16X +#define DEBUG_OMXVCM4P10_SADQUAR_4X +#define DEBUG_OMXVCM4P10_SADQUAR_8X +#define DEBUG_OMXVCM4P10_SAD_16X +#define DEBUG_OMXVCM4P10_SAD_4X +#define DEBUG_OMXVCM4P10_SAD_8X +#define DEBUG_OMXVCM4P10_SATD_4X4 +#define DEBUG_OMXVCM4P10_TRANSFORMDEQUANTCHROMADCFROMPAIR_U8_S16_C1 +#define DEBUG_OMXVCM4P10_TRANSFORMDEQUANTLUMADCFROMPAIR_U8_S16_C1 +#define DEBUG_OMXVCM4P10_TRANSFORMQUANT_CHROMADC +#define DEBUG_OMXVCM4P10_TRANSFORMQUANT_LUMADC +#define DEBUG_OMXVCM4P2_BLOCKMATCH_HALF_16X16 +#define DEBUG_OMXVCM4P2_BLOCKMATCH_HALF_8X8 +#define DEBUG_OMXVCM4P2_BLOCKMATCH_INTEGER_16X16 +#define DEBUG_OMXVCM4P2_BLOCKMATCH_INTEGER_8X8 +#define DEBUG_OMXVCM4P2_COMPUTETEXTUREERRORBLOCK_SAD_U8_S16 +#define DEBUG_OMXVCM4P2_COMPUTETEXTUREERRORBLOCK_U8_S16 +#define DEBUG_OMXVCM4P2_DCT8X8BLKDLX +#define DEBUG_OMXVCM4P2_DECODEBLOCKCOEF_INTER_S16 +#define DEBUG_OMXVCM4P2_DECODEPADMV_PVOP +#define DEBUG_OMXVCM4P2_DECODEVLCZIGZAG_INTER_S16 +#define DEBUG_OMXVCM4P2_DECODEVLCZIGZAG_INTRAACVLC_S16 +#define DEBUG_OMXVCM4P2_DECODEVLCZIGZAG_INTRADCVLC_S16 +#define DEBUG_OMXVCM4P2_ENCODEMV_U8_S16 +#define DEBUG_OMXVCM4P2_ENCODEVLCZIGZAG_INTER_S16 +#define DEBUG_OMXVCM4P2_ENCODEVLCZIGZAG_INTRAACVLC_S16 +#define DEBUG_OMXVCM4P2_ENCODEVLCZIGZAG_INTRADCVLC_S16 +#define DEBUG_OMXVCM4P2_FINDMVPRED +#define DEBUG_OMXVCM4P2_IDCT8X8BLKDLX +#define DEBUG_OMXVCM4P2_LIMITMVTORECT +#define DEBUG_OMXVCM4P2_MOTIONESTIMATIONMB +#define DEBUG_OMXVCM4P2_PADMBGRAY_U8 +#define DEBUG_OMXVCM4P2_PADMBHORIZONTAL_U8 +#define DEBUG_OMXVCM4P2_PADMBVERTICAL_U8 +#define DEBUG_OMXVCM4P2_PADMV +#define DEBUG_OMXVCM4P2_QUANTINTER_S16_I +#define DEBUG_OMXVCM4P2_QUANTINTRA_S16_I +#define DEBUG_OMXVCM4P2_QUANTINVINTER_S16_I +#define DEBUG_OMXVCM4P2_QUANTINVINTRA_S16_I +#define DEBUG_OMXVCM4P2_TRANSRECBLOCKCEOF_INTER +#define DEBUG_OMXVCM4P2_TRANSRECBLOCKCEOF_INTRA +#endif /* DEBUG_DOMAIN_VC */ + + +#ifdef DEBUG_DOMAIN_IC +/* To be filled in */ +#endif /* DEBUG_DOMAIN_IC */ + + +#ifdef DEBUG_DOMAIN_SP +#define DEBUG_OMXACSP_DOTPROD_S16 +#define DEBUG_OMXACSP_BLOCKEXP_S16 +#define DEBUG_OMXACSP_BLOCKEXP_S32 +#define DEBUG_OMXACSP_COPY_S16 +#define DEBUG_OMXACSP_DOTPROD_S16 +#define DEBUG_OMXACSP_DOTPROD_S16_SFS +#define DEBUG_OMXACSP_FFTFWD_CTOC_SC16_SFS +#define DEBUG_OMXACSP_FFTFWD_CTOC_SC32_SFS +#define DEBUG_OMXACSP_FFTFWD_RTOCCS_S16S32_SFS +#define DEBUG_OMXACSP_FFTFWD_RTOCCS_S32_SFS +#define DEBUG_OMXACSP_FFTGETBUFSIZE_C_SC16 +#define DEBUG_OMXACSP_FFTGETBUFSIZE_C_SC32 +#define DEBUG_OMXACSP_FFTGETBUFSIZE_R_S16_S32 +#define DEBUG_OMXACSP_FFTGETBUFSIZE_R_S32 +#define DEBUG_OMXACSP_FFTINIT_C_SC16 +#define DEBUG_OMXACSP_FFTINIT_C_SC32 +#define DEBUG_OMXACSP_FFTINIT_R_S16_S32 +#define DEBUG_OMXACSP_FFTINIT_R_S32 +#define DEBUG_OMXACSP_FFTINV_CCSTOR_S32S16_SFS +#define DEBUG_OMXACSP_FFTINV_CCSTOR_S32_SFS +#define DEBUG_OMXACSP_FFTINV_CTOC_SC16_SFS +#define DEBUG_OMXACSP_FFTINV_CTOC_SC32_SFS +#define DEBUG_OMXACSP_FILTERMEDIAN_S32_I +#define DEBUG_OMXACSP_FILTERMEDIAN_S32 +#define DEBUG_OMXACSP_FIRONE_DIRECT_S16_ISFS +#define DEBUG_OMXACSP_FIRONE_DIRECT_S16_I +#define DEBUG_OMXACSP_FIRONE_DIRECT_S16 +#define DEBUG_OMXACSP_FIRONE_DIRECT_S16_SFS +#define DEBUG_OMXACSP_FIR_DIRECT_S16_ISFS +#define DEBUG_OMXACSP_FIR_DIRECT_S16_I +#define DEBUG_OMXACSP_FIR_DIRECT_S16 +#define DEBUG_OMXACSP_FIR_DIRECT_S16_SFS +#define DEBUG_OMXACSP_IIRONE_BIQUADDIRECT_S16_I +#define DEBUG_OMXACSP_IIRONE_BIQUADDIRECT_S16 +#define DEBUG_OMXACSP_IIRONE_DIRECT_S16_I +#define DEBUG_OMXACSP_IIRONE_DIRECT_S16 +#define DEBUG_OMXACSP_IIR_BIQUADDIRECT_S16_I +#define DEBUG_OMXACSP_IIR_BIQUADDIRECT_S16 +#define DEBUG_OMXACSP_IIR_DIRECT_S16_I +#define DEBUG_OMXACSP_IIR_DIRECT_S16 +#endif /* DEBUG_DOMAIN_SP */ + + +#ifdef DEBUG_DOMAIN_IP +#define DEBUG_OMXIPBM_ADDC_U8_C1R_SFS +#define DEBUG_OMXIPBM_COPY_U8_C1R +#define DEBUG_OMXIPBM_COPY_U8_C3R +#define DEBUG_OMXIPBM_MIRROR_U8_C1R +#define DEBUG_OMXIPBM_MULC_U8_C1R_SFS +#define DEBUG_OMXIPCS_COLORTWISTQ14_U8_C3R +#define DEBUG_OMXIPCS_RGB565TOYCBCR420LS_MCU_U16_S16_C3P3R +#define DEBUG_OMXIPCS_RGB565TOYCBCR422LS_MCU_U16_S16_C3P3R +#define DEBUG_OMXIPCS_RGB565TOYCBCR444LS_MCU_U16_S16_C3P3R +#define DEBUG_OMXIPCS_RGBTOYCBCR420LS_MCU_U8_S16_C3P3R +#define DEBUG_OMXIPCS_RGBTOYCBCR422LS_MCU_U8_S16_C3P3R +#define DEBUG_OMXIPCS_RGBTOYCBCR444LS_MCU_U8_S16_C3P3R +#define DEBUG_OMXIPCS_YCBCR420RSZROT_U8_P3R +#define DEBUG_OMXIPCS_YCBCR420TORGB565LS_MCU_S16_U16_P3C3R +#define DEBUG_OMXIPCS_YCBCR420TORGB565_U8_U16_P3C3R +#define DEBUG_OMXIPCS_YCBCR420TORGBLS_MCU_S16_U8_P3C3R +#define DEBUG_OMXIPCS_YCBCR422RSZCSCROTRGB_U8_C2R +#define DEBUG_OMXIPCS_YCBCR422RSZROT_U8_P3R +#define DEBUG_OMXIPCS_YCBCR422TORGB565LS_MCU_S16_U16_P3C3R +#define DEBUG_OMXIPCS_YCBCR422TORGB565_U8_U16_C2C3R +#define DEBUG_OMXIPCS_YCBCR422TORGBLS_MCU_S16_U8_P3C3R +#define DEBUG_OMXIPCS_YCBCR422TORGB_U8_C2C3R +#define DEBUG_OMXIPCS_YCBCR422TOYCBCR420ROTATE_U8_C2P3R +#define DEBUG_OMXIPCS_YCBCR422TOYCBCR420ROTATE_U8_P3R +#define DEBUG_OMXIPCS_YCBCR444TORGB565LS_MCU_S16_U16_P3C3R +#define DEBUG_OMXIPCS_YCBCR444TORGBLS_MCU_S16_U8_P3C3R +#define DEBUG_OMXIPCS_YCBCRTORGB565_U8_U16_C3R +#define DEBUG_OMXIPCS_YCBCRTORGB565_U8_U16_P3C3R +#define DEBUG_OMXIPCS_YCBCRTORGB_U8_C3R +#define DEBUG_OMXIPPP_GETCENTRALMOMENT_S64 +#define DEBUG_OMXIPPP_GETSPATIALMOMENT_S64 +#define DEBUG_OMXIPPP_MOMENTGETSTATESIZE_S64 +#define DEBUG_OMXIPPP_MOMENTINIT_S64 +#define DEBUG_OMXIPPP_MOMENTS64S_U8_C1R +#define DEBUG_OMXIPPP_MOMENTS64S_U8_C3R +#endif /* DEBUG_DOMAIN_IP */ + + +#endif /* _armCommon_H_ */ + +/*End of File*/ + + + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_BitDec_s.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_BitDec_s.h new file mode 100644 index 0000000000000000000000000000000000000000..abb98fcc1af794edbcae5e19425beeeab8ecbe79 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_BitDec_s.h @@ -0,0 +1,670 @@ +;// +;// +;// File Name: armCOMM_BitDec_s.h +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// +;// OpenMAX optimized bitstream decode module +;// +;// You must include armCOMM_s.h before including this file +;// +;// This module provides macros to perform assembly optimized fixed and +;// variable length decoding from a read-only bitstream. The variable +;// length decode modules take as input a pointer to a table of 16-bit +;// entries of the following format. +;// +;// VLD Table Entry format +;// +;// 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 +;// +------------------------------------------------+ +;// | Len | Symbol | 1 | +;// +------------------------------------------------+ +;// | Offset | 0 | +;// +------------------------------------------------+ +;// +;// If the table entry is a leaf entry then bit 0 set: +;// Len = Number of bits overread (0 to 7) +;// Symbol = Symbol payload (unsigned 12 bits) +;// +;// If the table entry is an internal node then bit 0 is clear: +;// Offset = Number of (16-bit) half words from the table +;// start to the next table node +;// +;// The table is accessed by successive lookup up on the +;// next Step bits of the input bitstream until a leaf node +;// is obtained. The Step sizes are supplied to the VLD macro. +;// +;// USAGE: +;// +;// To use any of the macros in this package, first call: +;// +;// M_BD_INIT ppBitStream, pBitOffset, pBitStream, RBitBuffer, RBitCount, Tmp +;// +;// This caches the current bitstream position and next available +;// bits in registers pBitStream, RBitBuffer, RBitCount. These registers +;// are reserved for use by the bitstream decode package until you +;// call M_BD_FINI. +;// +;// Next call the following macro(s) as many times as you need: +;// +;// M_BD_LOOK8 - Look ahead constant 1<=N<=8 bits into the bitstream +;// M_BD_LOOK16 - Look ahead constant 1<=N<=16 bits into the bitstream +;// M_BD_READ8 - Read constant 1<=N<=8 bits from the bitstream +;// M_BD_READ16 - Read constant 1<=N<=16 bits from the bitstream +;// M_BD_VREAD8 - Read variable 1<=N<=8 bits from the bitstream +;// M_BD_VREAD16 - Read variable 1<=N<=16 bits from the bitstream +;// M_BD_VLD - Perform variable length decode using lookup table +;// +;// Finally call the macro: +;// +;// M_BD_FINI ppBitStream, pBitOffset +;// +;// This writes the bitstream state back to memory. +;// +;// The three bitstream cache register names are assigned to the following global +;// variables: +;// + + GBLS pBitStream ;// Register name for pBitStream + GBLS BitBuffer ;// Register name for BitBuffer + GBLS BitCount ;// Register name for BitCount + +;// +;// These register variables must have a certain defined state on entry to every bitstream +;// macro (except M_BD_INIT) and on exit from every bitstream macro (except M_BD_FINI). +;// The state may depend on implementation. +;// +;// For the default (ARM11) implementation the following hold: +;// pBitStream - points to the first byte not held in the BitBuffer +;// BitBuffer - is a cache of (4 bytes) 32 bits, bit 31 the first bit +;// BitCount - is offset (from the top bit) to the next unused bitstream bit +;// 0<=BitCount<=15 (so BitBuffer holds at least 17 unused bits) +;// +;// + + ;// Bitstream Decode initialise + ;// + ;// Initialises the bitstream decode global registers from + ;// bitstream pointers. This macro is split into 3 parts to enable + ;// scheduling. + ;// + ;// Input Registers: + ;// + ;// $ppBitStream - pointer to pointer to the next bitstream byte + ;// $pBitOffset - pointer to the number of bits used in the current byte (0..7) + ;// $RBitStream - register to use for pBitStream (can be $ppBitStream) + ;// $RBitBuffer - register to use for BitBuffer + ;// $RBitCount - register to use for BitCount (can be $pBitOffset) + ;// + ;// Output Registers: + ;// + ;// $T1,$T2,$T3 - registers that must be preserved between calls to + ;// M_BD_INIT1 and M_BD_INIT2 + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_INIT0 $ppBitStream, $pBitOffset, $RBitStream, $RBitBuffer, $RBitCount + +pBitStream SETS "$RBitStream" +BitBuffer SETS "$RBitBuffer" +BitCount SETS "$RBitCount" + + ;// load inputs + LDR $pBitStream, [$ppBitStream] + LDR $BitCount, [$pBitOffset] + MEND + + MACRO + M_BD_INIT1 $T1, $T2, $T3 + LDRB $T2, [$pBitStream, #2] + LDRB $T1, [$pBitStream, #1] + LDRB $BitBuffer, [$pBitStream], #3 + ADD $BitCount, $BitCount, #8 + MEND + + MACRO + M_BD_INIT2 $T1, $T2, $T3 + ORR $T2, $T2, $T1, LSL #8 + ORR $BitBuffer, $T2, $BitBuffer, LSL #16 + MEND + + ;// + ;// Look ahead fixed 1<=N<=8 bits without consuming any bits + ;// The next bits will be placed at bit 31..24 of destination register + ;// + ;// Input Registers: + ;// + ;// $N - number of bits to look + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $Symbol - the next N bits of the bitstream + ;// $T1 - corrupted temp/scratch register + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_LOOK8 $Symbol, $N + ASSERT ($N>=1):LAND:($N<=8) + MOV $Symbol, $BitBuffer, LSL $BitCount + MEND + + ;// + ;// Look ahead fixed 1<=N<=16 bits without consuming any bits + ;// The next bits will be placed at bit 31..16 of destination register + ;// + ;// Input Registers: + ;// + ;// $N - number of bits to look + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $Symbol - the next N bits of the bitstream + ;// $T1 - corrupted temp/scratch register + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_LOOK16 $Symbol, $N, $T1 + ASSERT ($N >= 1):LAND:($N <= 16) + MOV $Symbol, $BitBuffer, LSL $BitCount + MEND + + ;// + ;// Skips fixed 1<=N<=8 bits from the bitstream, advancing the bitstream pointer + ;// + ;// Input Registers: + ;// + ;// $N - number of bits + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $T1 - corrupted temp/scratch register + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_SKIP8 $N, $T1 + ASSERT ($N>=1):LAND:($N<=8) + SUBS $BitCount, $BitCount, #(8-$N) + LDRCSB $T1, [$pBitStream], #1 + ADDCC $BitCount, $BitCount, #8 + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 + MEND + + + ;// + ;// Read fixed 1<=N<=8 bits from the bitstream, advancing the bitstream pointer + ;// + ;// Input Registers: + ;// + ;// $N - number of bits to read + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $Symbol - the next N bits of the bitstream + ;// $T1 - corrupted temp/scratch register + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_READ8 $Symbol, $N, $T1 + ASSERT ($N>=1):LAND:($N<=8) + MOVS $Symbol, $BitBuffer, LSL $BitCount + SUBS $BitCount, $BitCount, #(8-$N) + LDRCSB $T1, [$pBitStream], #1 + ADDCC $BitCount, $BitCount, #8 + MOV $Symbol, $Symbol, LSR #(32-$N) + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 + MEND + + ;// + ;// Read fixed 1<=N<=16 bits from the bitstream, advancing the bitstream pointer + ;// + ;// Input Registers: + ;// + ;// $N - number of bits to read + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $Symbol - the next N bits of the bitstream + ;// $T1 - corrupted temp/scratch register + ;// $T2 - corrupted temp/scratch register + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_READ16 $Symbol, $N, $T1, $T2 + ASSERT ($N>=1):LAND:($N<=16) + ASSERT $Symbol<>$T1 + IF ($N<=8) + M_BD_READ8 $Symbol, $N, $T1 + ELSE + ;// N>8 so we will be able to refill at least one byte + LDRB $T1, [$pBitStream], #1 + MOVS $Symbol, $BitBuffer, LSL $BitCount + ORR $BitBuffer, $T1, $BitBuffer, LSL #8 + SUBS $BitCount, $BitCount, #(16-$N) + LDRCSB $T1, [$pBitStream], #1 + MOV $Symbol, $Symbol, LSR #(32-$N) + ADDCC $BitCount, $BitCount, #8 + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 + ENDIF + MEND + + ;// + ;// Skip variable 1<=N<=8 bits from the bitstream, advancing the bitstream pointer. + ;// + ;// Input Registers: + ;// + ;// $N - number of bits. 1<=N<=8 + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $T1 - corrupted temp/scratch register + ;// $T2 - corrupted temp/scratch register + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_VSKIP8 $N, $T1 + ADD $BitCount, $BitCount, $N + SUBS $BitCount, $BitCount, #8 + LDRCSB $T1, [$pBitStream], #1 + ADDCC $BitCount, $BitCount, #8 + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 + MEND + + ;// + ;// Skip variable 1<=N<=16 bits from the bitstream, advancing the bitstream pointer. + ;// + ;// Input Registers: + ;// + ;// $N - number of bits. 1<=N<=16 + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $T1 - corrupted temp/scratch register + ;// $T2 - corrupted temp/scratch register + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_VSKIP16 $N, $T1, $T2 + ADD $BitCount, $BitCount, $N + SUBS $BitCount, $BitCount, #8 + LDRCSB $T1, [$pBitStream], #1 + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 + SUBCSS $BitCount, $BitCount, #8 + LDRCSB $T1, [$pBitStream], #1 + ADDCC $BitCount, $BitCount, #8 + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 + MEND + + ;// + ;// Read variable 1<=N<=8 bits from the bitstream, advancing the bitstream pointer. + ;// + ;// Input Registers: + ;// + ;// $N - number of bits to read. 1<=N<=8 + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $Symbol - the next N bits of the bitstream + ;// $T1 - corrupted temp/scratch register + ;// $T2 - corrupted temp/scratch register + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_VREAD8 $Symbol, $N, $T1, $T2 + MOV $Symbol, $BitBuffer, LSL $BitCount + ADD $BitCount, $BitCount, $N + SUBS $BitCount, $BitCount, #8 + LDRCSB $T1, [$pBitStream], #1 + RSB $T2, $N, #32 + ADDCC $BitCount, $BitCount, #8 + MOV $Symbol, $Symbol, LSR $T2 + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 + MEND + + + ;// + ;// Read variable 1<=N<=16 bits from the bitstream, advancing the bitstream pointer. + ;// + ;// Input Registers: + ;// + ;// $N - number of bits to read. 1<=N<=16 + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $Symbol - the next N bits of the bitstream + ;// $T1 - corrupted temp/scratch register + ;// $T2 - corrupted temp/scratch register + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_VREAD16 $Symbol, $N, $T1, $T2 + MOV $Symbol, $BitBuffer, LSL $BitCount + ADD $BitCount, $BitCount, $N + SUBS $BitCount, $BitCount, #8 + LDRCSB $T1, [$pBitStream], #1 + RSB $T2, $N, #32 + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 + SUBCSS $BitCount, $BitCount, #8 + LDRCSB $T1, [$pBitStream], #1 + ADDCC $BitCount, $BitCount, #8 + MOV $Symbol, $Symbol, LSR $T2 + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 + MEND + + + ;// + ;// Decode a code of the form 0000...001 where there + ;// are N zeros before the 1 and N<=15 (code length<=16) + ;// + ;// Input Registers: + ;// + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $Symbol - the number of zeros before the next 1 + ;// >=16 is an illegal code + ;// $T1 - corrupted temp/scratch register + ;// $T2 - corrupted temp/scratch register + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_CLZ16 $Symbol, $T1, $T2 + MOVS $Symbol, $BitBuffer, LSL $BitCount + CLZ $Symbol, $Symbol + ADD $BitCount, $BitCount, $Symbol + SUBS $BitCount, $BitCount, #7 ;// length is Symbol+1 + LDRCSB $T1, [$pBitStream], #1 + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 + SUBCSS $BitCount, $BitCount, #8 + LDRCSB $T1, [$pBitStream], #1 + ADDCC $BitCount, $BitCount, #8 + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 + MEND + + ;// + ;// Decode a code of the form 1111...110 where there + ;// are N ones before the 0 and N<=15 (code length<=16) + ;// + ;// Input Registers: + ;// + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $Symbol - the number of zeros before the next 1 + ;// >=16 is an illegal code + ;// $T1 - corrupted temp/scratch register + ;// $T2 - corrupted temp/scratch register + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_CLO16 $Symbol, $T1, $T2 + MOV $Symbol, $BitBuffer, LSL $BitCount + MVN $Symbol, $Symbol + CLZ $Symbol, $Symbol + ADD $BitCount, $BitCount, $Symbol + SUBS $BitCount, $BitCount, #7 ;// length is Symbol+1 + LDRCSB $T1, [$pBitStream], #1 + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 + SUBCSS $BitCount, $BitCount, #8 + LDRCSB $T1, [$pBitStream], #1 + ADDCC $BitCount, $BitCount, #8 + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 + MEND + + + ;// + ;// Variable Length Decode module + ;// + ;// Decodes one VLD Symbol from a bitstream and refill the bitstream + ;// buffer. + ;// + ;// Input Registers: + ;// + ;// $pVLDTable - pointer to VLD decode table of 16-bit entries. + ;// The format is described above at the start of + ;// this file. + ;// $S0 - The number of bits to look up for the first step + ;// 1<=$S0<=8 + ;// $S1 - The number of bits to look up for each subsequent + ;// step 1<=$S1<=$S0. + ;// + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $Symbol - decoded VLD symbol value + ;// $T1 - corrupted temp/scratch register + ;// $T2 - corrupted temp/scratch register + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_VLD $Symbol, $T1, $T2, $pVLDTable, $S0, $S1 + ASSERT (1<=$S0):LAND:($S0<=8) + ASSERT (1<=$S1):LAND:($S1<=$S0) + + ;// Note 0<=BitCount<=15 on entry and exit + + MOVS $T1, $BitBuffer, LSL $BitCount ;// left align next bits + MOVS $Symbol, #(2<<$S0)-2 ;// create mask + AND $Symbol, $Symbol, $T1, LSR #(31-$S0) ;// 2*(next $S0 bits) + SUBS $BitCount, $BitCount, #8 ;// CS if buffer can be filled +01 + LDRCSB $T1, [$pBitStream], #1 ;// load refill byte + LDRH $Symbol, [$pVLDTable, $Symbol] ;// load table entry + ADDCC $BitCount, $BitCount, #8 ;// refill not possible + ADD $BitCount, $BitCount, #$S0 ;// assume $S0 bits used + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 ;// merge in refill byte + MOVS $T1, $Symbol, LSR #1 ;// CS=leaf entry + BCS %FT02 + + MOVS $T1, $BitBuffer, LSL $BitCount ;// left align next bit + IF (2*$S0-$S1<=8) + ;// Can combine refill check and -S0+S1 and keep $BitCount<=15 + SUBS $BitCount, $BitCount, #8+($S0-$S1) + ELSE + ;// Separate refill check and -S0+S1 offset + SUBS $BitCount, $BitCount, #8 + SUB $BitCount, $BitCount, #($S0-$S1) + ENDIF + ADD $Symbol, $Symbol, $T1, LSR #(31-$S1) ;// add 2*(next $S1 bits) to + BIC $Symbol, $Symbol, #1 ;// table offset + B %BT01 ;// load next table entry +02 + ;// BitCount range now depend on the route here + ;// if (first step) S0 <= BitCount <= 7+S0 <=15 + ;// else if (2*S0-S1<=8) S0 <= BitCount <= 7+(2*S0-S1) <=15 + ;// else S1 <= BitCount <= 7+S1 <=15 + + SUB $BitCount, $BitCount, $Symbol, LSR#13 + BIC $Symbol, $T1, #0xF000 + MEND + + + ;// Add an offset number of bits + ;// + ;// Outputs destination byte and bit index values which corresponds to an offset number of bits + ;// from the current location. This is used to compare bitstream positions using. M_BD_CMP. + ;// + ;// Input Registers: + ;// + ;// $Offset - Offset to be added in bits. + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $ByteIndex - Destination pBitStream pointer after adding the Offset. + ;// This value will be 4 byte ahead and needs to subtract by 4 to get exact + ;// pointer (as in M_BD_FINI). But for using with M_BD_CMP subtract is not needed. + ;// $BitIndex - Destination BitCount after the addition of Offset number of bits + ;// + MACRO + M_BD_ADD $ByteIndex, $BitIndex, $Offset + + ;// ($ByteIndex,$BitIndex) = Current position + $Offset bits + ADD $Offset, $Offset, $BitCount + AND $BitIndex, $Offset, #7 + ADD $ByteIndex, $pBitStream, $Offset, ASR #3 + MEND + + ;// Move bitstream pointers to the location given + ;// + ;// Outputs destination byte and bit index values which corresponds to + ;// the current location given (calculated using M_BD_ADD). + ;// + ;// Input Registers: + ;// + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// $ByteIndex - Destination pBitStream pointer after move. + ;// This value will be 4 byte ahead and needs to subtract by 4 to get exact + ;// pointer (as in M_BD_FINI). + ;// $BitIndex - Destination BitCount after the move + ;// + ;// Output Registers: + ;// + ;// $pBitStream \ + ;// } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_MOV $ByteIndex, $BitIndex + + ;// ($pBitStream, $Offset) = ($ByteIndex,$BitIndex) + MOV $BitCount, $BitIndex + MOV $pBitStream, $ByteIndex + MEND + + ;// Bitstream Compare + ;// + ;// Compares bitstream position with that of a destination position. Destination position + ;// is held in two input registers which are calculated using M_BD_ADD macro + ;// + ;// Input Registers: + ;// + ;// $ByteIndex - Destination pBitStream pointer, (4 byte ahead as described in M_BD_ADD) + ;// $BitIndex - Destination BitCount + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// FLAGS - GE if destination is reached, LT = is destination is ahead + ;// $T1 - corrupted temp/scratch register + ;// + MACRO + M_BD_CMP $ByteIndex, $BitIndex, $T1 + + ;// Return flags set by (current positon)-($ByteIndex,$BitIndex) + ;// so GE means that we have reached the indicated position + + ADD $T1, $pBitStream, $BitCount, LSR #3 + CMP $T1, $ByteIndex + AND $T1, $BitCount, #7 + CMPEQ $T1, $BitIndex + MEND + + + ;// Bitstream Decode finalise + ;// + ;// Writes back the bitstream state to the bitstream pointers + ;// + ;// Input Registers: + ;// + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $ppBitStream - pointer to pointer to the next bitstream byte + ;// $pBitOffset - pointer to the number of bits used in the current byte (0..7) + ;// $pBitStream \ + ;// $BitBuffer } these register are corrupted + ;// $BitCount / + ;// + MACRO + M_BD_FINI $ppBitStream, $pBitOffset + + ;// Advance pointer by the number of free bits in the buffer + ADD $pBitStream, $pBitStream, $BitCount, LSR#3 + AND $BitCount, $BitCount, #7 + + ;// Now move back 32 bits to reach the first usued bit + SUB $pBitStream, $pBitStream, #4 + + ;// Store out bitstream state + STR $BitCount, [$pBitOffset] + STR $pBitStream, [$ppBitStream] + MEND + + END + \ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_Bitstream.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_Bitstream.h new file mode 100644 index 0000000000000000000000000000000000000000..4f9bc3bee8b79fd63e8958e60d35e17211d60229 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_Bitstream.h @@ -0,0 +1,212 @@ +/** + * + * File Name: armCOMM_Bitstream.h + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * File: armCOMM_Bitstream.h + * Brief: Declares common API's/Data types used across the OpenMax Encoders/Decoders. + * + */ + +#ifndef _armCodec_H_ +#define _armCodec_H_ + +#include "omxtypes.h" + +typedef struct { + OMX_U8 codeLen; + OMX_U32 codeWord; +} ARM_VLC32; + +/* The above should be renamed as "ARM_VLC32" */ + +/** + * Function: armLookAheadBits() + * + * Description: + * Get the next N bits from the bitstream without advancing the bitstream pointer + * + * Parameters: + * [in] **ppBitStream + * [in] *pOffset + * [in] N=1...32 + * + * Returns Value + */ + +OMX_U32 armLookAheadBits(const OMX_U8 **ppBitStream, OMX_INT *pOffset, OMX_INT N); + +/** + * Function: armGetBits() + * + * Description: + * Read N bits from the bitstream + * + * Parameters: + * [in] *ppBitStream + * [in] *pOffset + * [in] N=1..32 + * + * [out] *ppBitStream + * [out] *pOffset + * Returns Value + */ + +OMX_U32 armGetBits(const OMX_U8 **ppBitStream, OMX_INT *pOffset, OMX_INT N); + +/** + * Function: armByteAlign() + * + * Description: + * Align the pointer *ppBitStream to the next byte boundary + * + * Parameters: + * [in] *ppBitStream + * [in] *pOffset + * + * [out] *ppBitStream + * [out] *pOffset + * + **/ + +OMXVoid armByteAlign(const OMX_U8 **ppBitStream,OMX_INT *pOffset); + +/** + * Function: armSkipBits() + * + * Description: + * Skip N bits from the value at *ppBitStream + * + * Parameters: + * [in] *ppBitStream + * [in] *pOffset + * [in] N + * + * [out] *ppBitStream + * [out] *pOffset + * + **/ + +OMXVoid armSkipBits(const OMX_U8 **ppBitStream,OMX_INT *pOffset,OMX_INT N); + +/*************************************** + * Variable bit length Decode + ***************************************/ + +/** + * Function: armUnPackVLC32() + * + * Description: + * Variable length decode of variable length symbol (max size 32 bits) read from + * the bit stream pointed by *ppBitStream at *pOffset by using the table + * pointed by pCodeBook + * + * Parameters: + * [in] **ppBitStream + * [in] *pOffset + * [in] pCodeBook + * + * [out] **ppBitStream + * [out] *pOffset + * + * Returns : Code Book Index if successfull. + * : "ARM_NO_CODEBOOK_INDEX = 0xFFFF" if search fails. + **/ + +#define ARM_NO_CODEBOOK_INDEX (OMX_U16)(0xFFFF) + +OMX_U16 armUnPackVLC32( + const OMX_U8 **ppBitStream, + OMX_INT *pOffset, + const ARM_VLC32 *pCodeBook +); + +/*************************************** + * Fixed bit length Encode + ***************************************/ + +/** + * Function: armPackBits + * + * Description: + * Pack a VLC code word into the bitstream + * + * Remarks: + * + * Parameters: + * [in] ppBitStream pointer to the pointer to the current byte + * in the bit stream. + * [in] pOffset pointer to the bit position in the byte + * pointed by *ppBitStream. Valid within 0 + * to 7. + * [in] codeWord Code word that need to be inserted in to the + * bitstream + * [in] codeLength Length of the code word valid range 1...32 + * + * [out] ppBitStream *ppBitStream is updated after the block is encoded, + * so that it points to the current byte in the bit + * stream buffer. + * [out] pBitOffset *pBitOffset is updated so that it points to the + * current bit position in the byte pointed by + * *ppBitStream. + * + * Return Value: + * Standard OMX_RESULT result. See enumeration for possible result codes. + * + */ + +OMXResult armPackBits ( + OMX_U8 **ppBitStream, + OMX_INT *pOffset, + OMX_U32 codeWord, + OMX_INT codeLength +); + +/*************************************** + * Variable bit length Encode + ***************************************/ + +/** + * Function: armPackVLC32 + * + * Description: + * Pack a VLC code word into the bitstream + * + * Remarks: + * + * Parameters: + * [in] ppBitStream pointer to the pointer to the current byte + * in the bit stream. + * [in] pBitOffset pointer to the bit position in the byte + * pointed by *ppBitStream. Valid within 0 + * to 7. + * [in] code VLC code word that need to be inserted in to the + * bitstream + * + * [out] ppBitStream *ppBitStream is updated after the block is encoded, + * so that it points to the current byte in the bit + * stream buffer. + * [out] pBitOffset *pBitOffset is updated so that it points to the + * current bit position in the byte pointed by + * *ppBitStream. + * + * Return Value: + * Standard OMX_RESULT result. See enumeration for possible result codes. + * + */ + +OMXResult armPackVLC32 ( + OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + ARM_VLC32 code +); + +#endif /*_armCodec_H_*/ + +/*End of File*/ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_IDCTTable.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_IDCTTable.h new file mode 100644 index 0000000000000000000000000000000000000000..d5db32ff4d3565709a6ab59599bc30f29b998e7b --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_IDCTTable.h @@ -0,0 +1,40 @@ +/** + * + * + * File Name: armCOMM_IDCTTable.h + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * File : armCOMM_IDCTTable.h + * Description : Contains declarations of tables for IDCT calculation. + * + */ + +#ifndef _armCOMM_IDCTTable_H_ +#define _armCOMM_IDCTTable_H_ + +#include "omxtypes.h" + + /* Table of s(u)*A(u)*A(v)/16 at Q15 + * s(u)=1.0 0 <= u <= 5 + * s(6)=2.0 + * s(7)=4.0 + * A(0) = 2*sqrt(2) + * A(u) = 4*cos(u*pi/16) for (u!=0) + */ +extern const OMX_U16 armCOMM_IDCTPreScale [64]; +extern const OMX_U16 armCOMM_IDCTCoef [4]; + +#endif /* _armCOMM_IDCTTable_H_ */ + + +/* End of File */ + + + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_IDCT_s.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_IDCT_s.h new file mode 100644 index 0000000000000000000000000000000000000000..03f713774db4acb55c2b45523cc718a529dc1621 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_IDCT_s.h @@ -0,0 +1,1445 @@ +;// +;// This confidential and proprietary software may be used only as +;// authorised by a licensing agreement from ARM Limited +;// (C) COPYRIGHT 2004 ARM Limited +;// ALL RIGHTS RESERVED +;// The entire notice above must be reproduced on all authorised +;// copies and copies may only be made to the extent permitted +;// by a licensing agreement from ARM Limited. +;// +;// IDCT_s.s +;// +;// Inverse DCT module +;// +;// +;// ALGORITHM DESCRIPTION +;// +;// The 8x8 2D IDCT is performed by calculating a 1D IDCT for each +;// column and then a 1D IDCT for each row. +;// +;// The 8-point 1D IDCT is defined by +;// f(x) = (C(0)*T(0)*c(0,x) + ... + C(7)*T(7)*c(7,x))/2 +;// +;// C(u) = 1/sqrt(2) if u=0 or 1 if u!=0 +;// c(u,x) = cos( (2x+1)*u*pi/16 ) +;// +;// We compute the 8-point 1D IDCT using the reverse of +;// the Arai-Agui-Nakajima flow graph which we split into +;// 5 stages named in reverse order to identify with the +;// forward DCT. Direct inversion of the forward formulae +;// in file FDCT_s.s gives: +;// +;// IStage 5: j(u) = T(u)*A(u) [ A(u)=4*C(u)*c(u,0) ] +;// [ A(0) = 2*sqrt(2) +;// A(u) = 4*cos(u*pi/16) for (u!=0) ] +;// +;// IStage 4: i0 = j0 i1 = j4 +;// i3 = (j2+j6)/2 i2 = (j2-j6)/2 +;// i7 = (j5+j3)/2 i4 = (j5-j3)/2 +;// i5 = (j1+j7)/2 i6 = (j1-j7)/2 +;// +;// IStage 3: h0 = (i0+i1)/2 h1 = (i0-i1)/2 +;// h2 = (i2*sqrt2)-i3 h3 = i3 +;// h4 = cos(pi/8)*i4 + sin(pi/8)*i6 +;// h6 = -sin(pi/8)*i4 + cos(pi/8)*i6 +;// [ The above two lines rotate by -(pi/8) ] +;// h5 = (i5-i7)/sqrt2 h7 = (i5+i7)/2 +;// +;// IStage 2: g0 = (h0+h3)/2 g3 = (h0-h3)/2 +;// g1 = (h1+h2)/2 g2 = (h1-h2)/2 +;// g7 = h7 g6 = h6 - h7 +;// g5 = h5 - g6 g4 = h4 - g5 +;// +;// IStage 1: f0 = (g0+g7)/2 f7 = (g0-g7)/2 +;// f1 = (g1+g6)/2 f6 = (g1-g6)/2 +;// f2 = (g2+g5)/2 f5 = (g2-g5)/2 +;// f3 = (g3+g4)/2 f4 = (g3-g4)/2 +;// +;// Note that most coefficients are halved 3 times during the +;// above calculation. We can rescale the algorithm dividing +;// the input by 8 to remove the halvings. +;// +;// IStage 5: j(u) = T(u)*A(u)/8 +;// +;// IStage 4: i0 = j0 i1 = j4 +;// i3 = j2 + j6 i2 = j2 - j6 +;// i7 = j5 + j3 i4 = j5 - j3 +;// i5 = j1 + j7 i6 = j1 - j7 +;// +;// IStage 3: h0 = i0 + i1 h1 = i0 - i1 +;// h2 = (i2*sqrt2)-i3 h3 = i3 +;// h4 = 2*( cos(pi/8)*i4 + sin(pi/8)*i6) +;// h6 = 2*(-sin(pi/8)*i4 + cos(pi/8)*i6) +;// h5 = (i5-i7)*sqrt2 h7 = i5 + i7 +;// +;// IStage 2: g0 = h0 + h3 g3 = h0 - h3 +;// g1 = h1 + h2 g2 = h1 - h2 +;// g7 = h7 g6 = h6 - h7 +;// g5 = h5 - g6 g4 = h4 - g5 +;// +;// IStage 1: f0 = g0 + g7 f7 = g0 - g7 +;// f1 = g1 + g6 f6 = g1 - g6 +;// f2 = g2 + g5 f5 = g2 - g5 +;// f3 = g3 + g4 f4 = g3 - g4 +;// +;// Note: +;// 1. The scaling by A(u)/8 can often be combined with inverse +;// quantization. The column and row scalings can be combined. +;// 2. The flowgraph in the AAN paper has h4,g6 negated compared +;// to the above code but is otherwise identical. +;// 3. The rotation by -pi/8 can be peformed using three multiplies +;// Eg c*i4+s*i6 = (i6-i4)*s + (c+s)*i4 +;// -s*i4+c*i6 = (i6-i4)*s + (c-s)*i6 +;// 4. If |T(u)|<=1 then from the IDCT definition, +;// |f(x)| <= ((1/sqrt2) + |c(1,x)| + .. + |c(7,x)|)/2 +;// = ((1/sqrt2) + cos(pi/16) + ... + cos(7*pi/16))/2 +;// = ((1/sqrt2) + (cot(pi/32)-1)/2)/2 +;// = (1 + cos(pi/16) + cos(2pi/16) + cos(3pi/16))/sqrt(2) +;// = (approx)2.64 +;// So the max gain of the 2D IDCT is ~x7.0 = 3 bits. +;// The table below shows input patterns generating the maximum +;// value of |f(u)| for input in the range |T(x)|<=1. M=-1, P=+1 +;// InputPattern Max |f(x)| +;// PPPPPPPP |f0| = 2.64 +;// PPPMMMMM |f1| = 2.64 +;// PPMMMPPP |f2| = 2.64 +;// PPMMPPMM |f3| = 2.64 +;// PMMPPMMP |f4| = 2.64 +;// PMMPMMPM |f5| = 2.64 +;// PMPPMPMP |f6| = 2.64 +;// PMPMPMPM |f7| = 2.64 +;// Note that this input pattern is the transpose of the +;// corresponding max input patter for the FDCT. + +;// Arguments + +pSrc RN 0 ;// source data buffer +Stride RN 1 ;// destination stride in bytes +pDest RN 2 ;// destination data buffer +pScale RN 3 ;// pointer to scaling table + + + ;// DCT Inverse Macro + ;// The DCT code should be parametrized according + ;// to the following inputs: + ;// $outsize = "u8" : 8-bit unsigned data saturated (0 to +255) + ;// "s9" : 16-bit signed data saturated to 9-bit (-256 to +255) + ;// "s16" : 16-bit signed data not saturated (max size ~+/-14273) + ;// $inscale = "s16" : signed 16-bit aan-scale table, Q15 format, with 4 byte alignment + ;// "s32" : signed 32-bit aan-scale table, Q23 format, with 4 byte alignment + ;// + ;// Inputs: + ;// pSrc = r0 = Pointer to input data + ;// Range is -256 to +255 (9-bit) + ;// Stride = r1 = Stride between input lines + ;// pDest = r2 = Pointer to output data + ;// pScale = r3 = Pointer to aan-scale table in the format defined by $inscale + + + + MACRO + M_IDCT $outsize, $inscale, $stride + LCLA SHIFT + + + IF ARM1136JS + +;// REGISTER ALLOCATION +;// This is hard since we have 8 values, 9 free registers and each +;// butterfly requires a temporary register. We also want to +;// maintain register order so we can use LDM/STM. The table below +;// summarises the register allocation that meets all these criteria. +;// a=1stcol, b=2ndcol, f,g,h,i are dataflow points described above. +;// +;// r1 a01 g0 h0 +;// r4 b01 f0 g1 h1 i0 +;// r5 a23 f1 g2 i1 +;// r6 b23 f2 g3 h2 i2 +;// r7 a45 f3 h3 i3 +;// r8 b45 f4 g4 h4 i4 +;// r9 a67 f5 g5 h5 i5 +;// r10 b67 f6 g6 h6 i6 +;// r11 f7 g7 h7 i7 +;// +ra01 RN 1 +rb01 RN 4 +ra23 RN 5 +rb23 RN 6 +ra45 RN 7 +rb45 RN 8 +ra67 RN 9 +rb67 RN 10 +rtmp RN 11 +csPiBy8 RN 12 ;// [ (Sin(pi/8)@Q15), (Cos(pi/8)@Q15) ] +LoopRR2 RN 14 ;// [ LoopNumber<<13 , (1/Sqrt(2))@Q15 ] +;// Transpose allocation +xft RN ra01 +xf0 RN rb01 +xf1 RN ra23 +xf2 RN rb23 +xf3 RN ra45 +xf4 RN rb45 +xf5 RN ra67 +xf6 RN rb67 +xf7 RN rtmp +;// IStage 1 allocation +xg0 RN xft +xg1 RN xf0 +xg2 RN xf1 +xg3 RN xf2 +xgt RN xf3 +xg4 RN xf4 +xg5 RN xf5 +xg6 RN xf6 +xg7 RN xf7 +;// IStage 2 allocation +xh0 RN xg0 +xh1 RN xg1 +xht RN xg2 +xh2 RN xg3 +xh3 RN xgt +xh4 RN xg4 +xh5 RN xg5 +xh6 RN xg6 +xh7 RN xg7 +;// IStage 3,4 allocation +xit RN xh0 +xi0 RN xh1 +xi1 RN xht +xi2 RN xh2 +xi3 RN xh3 +xi4 RN xh4 +xi5 RN xh5 +xi6 RN xh6 +xi7 RN xh7 + + M_STR pDest, ppDest + IF "$stride"="s" + M_STR Stride, pStride + ENDIF + M_ADR pDest, pBlk + LDR csPiBy8, =0x30fc7642 + LDR LoopRR2, =0x00005a82 + +v6_idct_col$_F + ;// Load even values + LDR xi4, [pSrc], #4 ;// j0 + LDR xi5, [pSrc, #4*16-4] ;// j4 + LDR xi6, [pSrc, #2*16-4] ;// j2 + LDR xi7, [pSrc, #6*16-4] ;// j6 + + ;// Scale Even Values + IF "$inscale"="s16" ;// 16x16 mul +SHIFT SETA 12 + LDR xi0, [pScale], #4 + LDR xi1, [pScale, #4*16-4] + LDR xi2, [pScale, #2*16-4] + MOV xit, #1<<(SHIFT-1) + SMLABB xi3, xi0, xi4, xit + SMLATT xi4, xi0, xi4, xit + SMLABB xi0, xi1, xi5, xit + SMLATT xi5, xi1, xi5, xit + MOV xi3, xi3, ASR #SHIFT + PKHBT xi4, xi3, xi4, LSL #(16-SHIFT) + LDR xi3, [pScale, #6*16-4] + SMLABB xi1, xi2, xi6, xit + SMLATT xi6, xi2, xi6, xit + MOV xi0, xi0, ASR #SHIFT + PKHBT xi5, xi0, xi5, LSL #(16-SHIFT) + SMLABB xi2, xi3, xi7, xit + SMLATT xi7, xi3, xi7, xit + MOV xi1, xi1, ASR #SHIFT + PKHBT xi6, xi1, xi6, LSL #(16-SHIFT) + MOV xi2, xi2, ASR #SHIFT + PKHBT xi7, xi2, xi7, LSL #(16-SHIFT) + ENDIF + IF "$inscale"="s32" ;// 32x16 mul +SHIFT SETA (12+8-16) + MOV xit, #1<<(SHIFT-1) + LDR xi0, [pScale], #8 + LDR xi1, [pScale, #0*32+4-8] + LDR xi2, [pScale, #4*32-8] + LDR xi3, [pScale, #4*32+4-8] + SMLAWB xi0, xi0, xi4, xit + SMLAWT xi1, xi1, xi4, xit + SMLAWB xi2, xi2, xi5, xit + SMLAWT xi3, xi3, xi5, xit + MOV xi0, xi0, ASR #SHIFT + PKHBT xi4, xi0, xi1, LSL #(16-SHIFT) + MOV xi2, xi2, ASR #SHIFT + PKHBT xi5, xi2, xi3, LSL #(16-SHIFT) + LDR xi0, [pScale, #2*32-8] + LDR xi1, [pScale, #2*32+4-8] + LDR xi2, [pScale, #6*32-8] + LDR xi3, [pScale, #6*32+4-8] + SMLAWB xi0, xi0, xi6, xit + SMLAWT xi1, xi1, xi6, xit + SMLAWB xi2, xi2, xi7, xit + SMLAWT xi3, xi3, xi7, xit + MOV xi0, xi0, ASR #SHIFT + PKHBT xi6, xi0, xi1, LSL #(16-SHIFT) + MOV xi2, xi2, ASR #SHIFT + PKHBT xi7, xi2, xi3, LSL #(16-SHIFT) + ENDIF + + ;// Load odd values + LDR xi0, [pSrc, #1*16-4] ;// j1 + LDR xi1, [pSrc, #7*16-4] ;// j7 + LDR xi2, [pSrc, #5*16-4] ;// j5 + LDR xi3, [pSrc, #3*16-4] ;// j3 + + IF {TRUE} + ;// shortcut if odd values 0 + TEQ xi0, #0 + TEQEQ xi1, #0 + TEQEQ xi2, #0 + TEQEQ xi3, #0 + BEQ v6OddZero$_F + ENDIF + + ;// Store scaled even values + STMIA pDest, {xi4, xi5, xi6, xi7} + + ;// Scale odd values + IF "$inscale"="s16" + ;// Perform AAN Scale + LDR xi4, [pScale, #1*16-4] + LDR xi5, [pScale, #7*16-4] + LDR xi6, [pScale, #5*16-4] + SMLABB xi7, xi0, xi4, xit + SMLATT xi0, xi0, xi4, xit + SMLABB xi4, xi1, xi5, xit + SMLATT xi1, xi1, xi5, xit + MOV xi7, xi7, ASR #SHIFT + PKHBT xi0, xi7, xi0, LSL #(16-SHIFT) + LDR xi7, [pScale, #3*16-4] + SMLABB xi5, xi2, xi6, xit + SMLATT xi2, xi2, xi6, xit + MOV xi4, xi4, ASR #SHIFT + PKHBT xi1, xi4, xi1, LSL #(16-SHIFT) + SMLABB xi6, xi3, xi7, xit + SMLATT xi3, xi3, xi7, xit + MOV xi5, xi5, ASR #SHIFT + PKHBT xi2, xi5, xi2, LSL #(16-SHIFT) + MOV xi6, xi6, ASR #SHIFT + PKHBT xi3, xi6, xi3, LSL #(16-SHIFT) + ENDIF + IF "$inscale"="s32" ;// 32x16 mul + LDR xi4, [pScale, #1*32-8] + LDR xi5, [pScale, #1*32+4-8] + LDR xi6, [pScale, #7*32-8] + LDR xi7, [pScale, #7*32+4-8] + SMLAWB xi4, xi4, xi0, xit + SMLAWT xi5, xi5, xi0, xit + SMLAWB xi6, xi6, xi1, xit + SMLAWT xi7, xi7, xi1, xit + MOV xi4, xi4, ASR #SHIFT + PKHBT xi0, xi4, xi5, LSL #(16-SHIFT) + MOV xi6, xi6, ASR #SHIFT + PKHBT xi1, xi6, xi7, LSL #(16-SHIFT) + LDR xi4, [pScale, #5*32-8] + LDR xi5, [pScale, #5*32+4-8] + LDR xi6, [pScale, #3*32-8] + LDR xi7, [pScale, #3*32+4-8] + SMLAWB xi4, xi4, xi2, xit + SMLAWT xi5, xi5, xi2, xit + SMLAWB xi6, xi6, xi3, xit + SMLAWT xi7, xi7, xi3, xit + MOV xi4, xi4, ASR #SHIFT + PKHBT xi2, xi4, xi5, LSL #(16-SHIFT) + MOV xi6, xi6, ASR #SHIFT + PKHBT xi3, xi6, xi7, LSL #(16-SHIFT) + ENDIF + + SHADD16 xi5, xi0, xi1 ;// (j1+j7)/2 + SSUB16 xi6, xi0, xi1 ;// j1-j7 + SHADD16 xi7, xi2, xi3 ;// (j5+j3)/2 + SSUB16 xi4, xi2, xi3 ;// j5-j3 + + SSUB16 xi3, xi5, xi7 ;// (i5-i7)/2 + + PKHBT xi0, xi6, xi4, LSL#16 ;// [i4,i6] row a + PKHTB xi1, xi4, xi6, ASR#16 ;// [i4,i6] row b + + SMUADX xi2, xi0, csPiBy8 ;// rowa by [c,s] + SMUADX xi4, xi1, csPiBy8 ;// rowb by [c,s] + SMUSD xi0, xi0, csPiBy8 ;// rowa by [-s,c] + SMUSD xi6, xi1, csPiBy8 ;// rowb by [-s,c] + + SMULBB xi1, xi3, LoopRR2 + SMULTB xi3, xi3, LoopRR2 + + PKHTB xh4, xi4, xi2, ASR#16 ;// h4/4 + PKHTB xh6, xi6, xi0, ASR#16 ;// h6/4 + SHADD16 xh7, xi5, xi7 ;// (i5+i7)/4 + + ;// xi0,xi1,xi2,xi3 now free + ;// IStage 4,3, rows 2to3 x1/2 + + MOV xi3, xi3, LSL #1 + PKHTB xh5, xi3, xi1, ASR#15 ;// h5/4 + LDRD xi0, [pDest, #8] ;// j2,j6 scaled + + ;// IStage 2, rows4to7 + SSUB16 xg6, xh6, xh7 + SSUB16 xg5, xh5, xg6 + SSUB16 xg4, xh4, xg5 + + SSUB16 xi2, xi0, xi1 ;// (j2-j6) + SHADD16 xi3, xi0, xi1 ;// (j2+j6)/2 + + SMULBB xi0, xi2, LoopRR2 + SMULTB xi2, xi2, LoopRR2 + + MOV xi2, xi2, LSL #1 + PKHTB xh2, xi2, xi0, ASR#15 ;// i2*sqrt(2)/4 + + ;// xi0, xi1 now free + ;// IStage 4,3 rows 0to1 x 1/2 + LDRD xi0, [pDest] ;// j0, j4 scaled + SSUB16 xh2, xh2, xi3 + ADDS LoopRR2, LoopRR2, #2<<29 ;// done two rows + + SHADD16 xh0, xi0, xi1 + SHSUB16 xh1, xi0, xi1 + + ;// IStage 2 rows 0to3 x 1/2 + SHSUB16 xg2, xh1, xh2 + SHADD16 xg1, xh1, xh2 + SHSUB16 xg3, xh0, xh3 + SHADD16 xg0, xh0, xh3 + + ;// IStage 1 all rows + SADD16 xf3, xg3, xg4 + SSUB16 xf4, xg3, xg4 + SADD16 xf2, xg2, xg5 + SSUB16 xf5, xg2, xg5 + SADD16 xf1, xg1, xg6 + SSUB16 xf6, xg1, xg6 + SADD16 xf0, xg0, xg7 + SSUB16 xf7, xg0, xg7 + + ;// Transpose, store and loop + PKHBT ra01, xf0, xf1, LSL #16 + PKHTB rb01, xf1, xf0, ASR #16 + + PKHBT ra23, xf2, xf3, LSL #16 + PKHTB rb23, xf3, xf2, ASR #16 + + PKHBT ra45, xf4, xf5, LSL #16 + PKHTB rb45, xf5, xf4, ASR #16 + + PKHBT ra67, xf6, xf7, LSL #16 + STMIA pDest!, {ra01, ra23, ra45, ra67} + PKHTB rb67, xf7, xf6, ASR #16 + STMIA pDest!, {rb01, rb23, rb45, rb67} + BCC v6_idct_col$_F + + SUB pSrc, pDest, #(64*2) + M_LDR pDest, ppDest + IF "$stride"="s" + M_LDR pScale, pStride + ENDIF + B v6_idct_row$_F + +v6OddZero$_F + SSUB16 xi2, xi6, xi7 ;// (j2-j6) + SHADD16 xi3, xi6, xi7 ;// (j2+j6)/2 + + SMULBB xi0, xi2, LoopRR2 + SMULTB xi2, xi2, LoopRR2 + + MOV xi2, xi2, LSL #1 + PKHTB xh2, xi2, xi0, ASR#15 ;// i2*sqrt(2)/4 + SSUB16 xh2, xh2, xi3 + + ;// xi0, xi1 now free + ;// IStage 4,3 rows 0to1 x 1/2 + + SHADD16 xh0, xi4, xi5 + SHSUB16 xh1, xi4, xi5 + + ;// IStage 2 rows 0to3 x 1/2 + SHSUB16 xg2, xh1, xh2 + SHADD16 xg1, xh1, xh2 + SHSUB16 xg3, xh0, xh3 + SHADD16 xg0, xh0, xh3 + + ;// IStage 1 all rows + MOV xf3, xg3 + MOV xf4, xg3 + MOV xf2, xg2 + MOV xf5, xg2 + MOV xf1, xg1 + MOV xf6, xg1 + MOV xf0, xg0 + MOV xf7, xg0 + + ;// Transpose + PKHBT ra01, xf0, xf1, LSL #16 + PKHTB rb01, xf1, xf0, ASR #16 + + PKHBT ra23, xf2, xf3, LSL #16 + PKHTB rb23, xf3, xf2, ASR #16 + + PKHBT ra45, xf4, xf5, LSL #16 + PKHTB rb45, xf5, xf4, ASR #16 + + PKHBT ra67, xf6, xf7, LSL #16 + PKHTB rb67, xf7, xf6, ASR #16 + + STMIA pDest!, {ra01, ra23, ra45, ra67} + ADDS LoopRR2, LoopRR2, #2<<29 ;// done two rows + STMIA pDest!, {rb01, rb23, rb45, rb67} + + BCC v6_idct_col$_F + SUB pSrc, pDest, #(64*2) + M_LDR pDest, ppDest + IF "$stride"="s" + M_LDR pScale, pStride + ENDIF + + +v6_idct_row$_F + ;// IStage 4,3, rows4to7 x1/4 + LDR xit, =0x00010001 ;// rounding constant + LDR xi0, [pSrc, #1*16] ;// j1 + LDR xi1, [pSrc, #7*16] ;// 4*j7 + LDR xi2, [pSrc, #5*16] ;// j5 + LDR xi3, [pSrc, #3*16] ;// j3 + + SHADD16 xi1, xi1, xit ;// 2*j7 + SHADD16 xi1, xi1, xit ;// j7 + + SHADD16 xi5, xi0, xi1 ;// (j1+j7)/2 + SSUB16 xi6, xi0, xi1 ;// j1-j7 + SHADD16 xi7, xi2, xi3 ;// (j5+j3)/2 + SSUB16 xi4, xi2, xi3 ;// j5-j3 + + SSUB16 xi3, xi5, xi7 ;// (i5-i7)/2 + + PKHBT xi0, xi6, xi4, LSL#16 ;// [i4,i6] row a + PKHTB xi1, xi4, xi6, ASR#16 ;// [i4,i6] row b + + SMUADX xi2, xi0, csPiBy8 ;// rowa by [c,s] + SMUADX xi4, xi1, csPiBy8 ;// rowb by [c,s] + SMUSD xi0, xi0, csPiBy8 ;// rowa by [-s,c] + SMUSD xi6, xi1, csPiBy8 ;// rowb by [-s,c] + + SMULBB xi1, xi3, LoopRR2 + SMULTB xi3, xi3, LoopRR2 + + PKHTB xh4, xi4, xi2, ASR#16 ;// h4/4 + PKHTB xh6, xi6, xi0, ASR#16 ;// h6/4 + SHADD16 xh7, xi5, xi7 ;// (i5+i7)/4 + + MOV xi3, xi3, LSL #1 + PKHTB xh5, xi3, xi1, ASR#15 ;// h5/4 + + ;// xi0,xi1,xi2,xi3 now free + ;// IStage 4,3, rows 2to3 x1/2 + + LDR xi0, [pSrc, #2*16] ;// j2 + LDR xi1, [pSrc, #6*16] ;// 2*j6 + + ;// IStage 2, rows4to7 + SSUB16 xg6, xh6, xh7 + SSUB16 xg5, xh5, xg6 + SSUB16 xg4, xh4, xg5 + + SHADD16 xi1, xi1, xit ;// j6 + SSUB16 xi2, xi0, xi1 ;// (j2-j6) + SHADD16 xi3, xi0, xi1 ;// (j2+j6)/2 + + SMULBB xi0, xi2, LoopRR2 + SMULTB xi2, xi2, LoopRR2 + + MOV xi2, xi2, LSL #1 + + PKHTB xh2, xi2, xi0, ASR#15 ;// i2*sqrt(2)/4 + + ;// xi0, xi1 now free + ;// IStage 4,3 rows 0to1 x 1/2 + LDR xi1, [pSrc, #4*16] ;// j4 + LDR xi0, [pSrc], #4 ;// j0 + + SSUB16 xh2, xh2, xi3 + ADDS LoopRR2, LoopRR2, #2<<29 ;// done two rows + + ADD xi0, xi0, xit, LSL #2 ;// ensure correct round + SHADD16 xh0, xi0, xi1 ;// of DC result + SHSUB16 xh1, xi0, xi1 + + ;// IStage 2 rows 0to3 x 1/2 + SHSUB16 xg2, xh1, xh2 + SHADD16 xg1, xh1, xh2 + SHSUB16 xg3, xh0, xh3 + SHADD16 xg0, xh0, xh3 + + ;// IStage 1 all rows + SHADD16 xf3, xg3, xg4 + SHSUB16 xf4, xg3, xg4 + SHADD16 xf2, xg2, xg5 + SHSUB16 xf5, xg2, xg5 + SHADD16 xf1, xg1, xg6 + SHSUB16 xf6, xg1, xg6 + SHADD16 xf0, xg0, xg7 + SHSUB16 xf7, xg0, xg7 + + ;// Saturate + IF ("$outsize"="u8") + USAT16 xf0, #8, xf0 + USAT16 xf1, #8, xf1 + USAT16 xf2, #8, xf2 + USAT16 xf3, #8, xf3 + USAT16 xf4, #8, xf4 + USAT16 xf5, #8, xf5 + USAT16 xf6, #8, xf6 + USAT16 xf7, #8, xf7 + ENDIF + IF ("$outsize"="s9") + SSAT16 xf0, #9, xf0 + SSAT16 xf1, #9, xf1 + SSAT16 xf2, #9, xf2 + SSAT16 xf3, #9, xf3 + SSAT16 xf4, #9, xf4 + SSAT16 xf5, #9, xf5 + SSAT16 xf6, #9, xf6 + SSAT16 xf7, #9, xf7 + ENDIF + + ;// Transpose to Row, Pack and store + IF ("$outsize"="u8") + ORR xf0, xf0, xf1, LSL #8 ;// [ b1 b0 a1 a0 ] + ORR xf2, xf2, xf3, LSL #8 ;// [ b3 b2 a3 a2 ] + ORR xf4, xf4, xf5, LSL #8 ;// [ b5 b4 a5 a4 ] + ORR xf6, xf6, xf7, LSL #8 ;// [ b7 b6 a7 a6 ] + PKHBT ra01, xf0, xf2, LSL #16 + PKHTB rb01, xf2, xf0, ASR #16 + PKHBT ra23, xf4, xf6, LSL #16 + PKHTB rb23, xf6, xf4, ASR #16 + STMIA pDest, {ra01, ra23} + IF "$stride"="s" + ADD pDest, pDest, pScale + STMIA pDest, {rb01, rb23} + ADD pDest, pDest, pScale + ELSE + ADD pDest, pDest, #($stride) + STMIA pDest, {rb01, rb23} + ADD pDest, pDest, #($stride) + ENDIF + ENDIF + IF ("$outsize"="s9"):LOR:("$outsize"="s16") + PKHBT ra01, xf0, xf1, LSL #16 + PKHTB rb01, xf1, xf0, ASR #16 + + PKHBT ra23, xf2, xf3, LSL #16 + PKHTB rb23, xf3, xf2, ASR #16 + + PKHBT ra45, xf4, xf5, LSL #16 + PKHTB rb45, xf5, xf4, ASR #16 + + PKHBT ra67, xf6, xf7, LSL #16 + PKHTB rb67, xf7, xf6, ASR #16 + + STMIA pDest, {ra01, ra23, ra45, ra67} + IF "$stride"="s" + ADD pDest, pDest, pScale + STMIA pDest, {rb01, rb23, rb45, rb67} + ADD pDest, pDest, pScale + ELSE + ADD pDest, pDest, #($stride) + STMIA pDest, {rb01, rb23, rb45, rb67} + ADD pDest, pDest, #($stride) + ENDIF + ENDIF + + BCC v6_idct_row$_F + ENDIF ;// ARM1136JS + + + IF CortexA8 + +Src0 EQU 7 +Src1 EQU 8 +Src2 EQU 9 +Src3 EQU 10 +Src4 EQU 11 +Src5 EQU 12 +Src6 EQU 13 +Src7 EQU 14 +Tmp EQU 15 + +qXj0 QN Src0.S16 +qXj1 QN Src1.S16 +qXj2 QN Src2.S16 +qXj3 QN Src3.S16 +qXj4 QN Src4.S16 +qXj5 QN Src5.S16 +qXj6 QN Src6.S16 +qXj7 QN Src7.S16 +qXjt QN Tmp.S16 + +dXj0lo DN (Src0*2).S16 +dXj0hi DN (Src0*2+1).S16 +dXj1lo DN (Src1*2).S16 +dXj1hi DN (Src1*2+1).S16 +dXj2lo DN (Src2*2).S16 +dXj2hi DN (Src2*2+1).S16 +dXj3lo DN (Src3*2).S16 +dXj3hi DN (Src3*2+1).S16 +dXj4lo DN (Src4*2).S16 +dXj4hi DN (Src4*2+1).S16 +dXj5lo DN (Src5*2).S16 +dXj5hi DN (Src5*2+1).S16 +dXj6lo DN (Src6*2).S16 +dXj6hi DN (Src6*2+1).S16 +dXj7lo DN (Src7*2).S16 +dXj7hi DN (Src7*2+1).S16 +dXjtlo DN (Tmp*2).S16 +dXjthi DN (Tmp*2+1).S16 + +qXi0 QN qXj0 +qXi1 QN qXj4 +qXi2 QN qXj2 +qXi3 QN qXj7 +qXi4 QN qXj5 +qXi5 QN qXjt +qXi6 QN qXj1 +qXi7 QN qXj6 +qXit QN qXj3 + +dXi0lo DN dXj0lo +dXi0hi DN dXj0hi +dXi1lo DN dXj4lo +dXi1hi DN dXj4hi +dXi2lo DN dXj2lo +dXi2hi DN dXj2hi +dXi3lo DN dXj7lo +dXi3hi DN dXj7hi +dXi4lo DN dXj5lo +dXi4hi DN dXj5hi +dXi5lo DN dXjtlo +dXi5hi DN dXjthi +dXi6lo DN dXj1lo +dXi6hi DN dXj1hi +dXi7lo DN dXj6lo +dXi7hi DN dXj6hi +dXitlo DN dXj3lo +dXithi DN dXj3hi + +qXh0 QN qXit +qXh1 QN qXi0 +qXh2 QN qXi2 +qXh3 QN qXi3 +qXh4 QN qXi7 +qXh5 QN qXi5 +qXh6 QN qXi4 +qXh7 QN qXi1 +qXht QN qXi6 + +dXh0lo DN dXitlo +dXh0hi DN dXithi +dXh1lo DN dXi0lo +dXh1hi DN dXi0hi +dXh2lo DN dXi2lo +dXh2hi DN dXi2hi +dXh3lo DN dXi3lo +dXh3hi DN dXi3hi +dXh4lo DN dXi7lo +dXh4hi DN dXi7hi +dXh5lo DN dXi5lo +dXh5hi DN dXi5hi +dXh6lo DN dXi4lo +dXh6hi DN dXi4hi +dXh7lo DN dXi1lo +dXh7hi DN dXi1hi +dXhtlo DN dXi6lo +dXhthi DN dXi6hi + +qXg0 QN qXh2 +qXg1 QN qXht +qXg2 QN qXh1 +qXg3 QN qXh0 +qXg4 QN qXh4 +qXg5 QN qXh5 +qXg6 QN qXh6 +qXg7 QN qXh7 +qXgt QN qXh3 + +qXf0 QN qXg6 +qXf1 QN qXg5 +qXf2 QN qXg4 +qXf3 QN qXgt +qXf4 QN qXg3 +qXf5 QN qXg2 +qXf6 QN qXg1 +qXf7 QN qXg0 +qXft QN qXg7 + + +qXt0 QN 1.S32 +qXt1 QN 2.S32 +qT0lo QN 1.S32 +qT0hi QN 2.S32 +qT1lo QN 3.S32 +qT1hi QN 4.S32 +qScalelo QN 5.S32 ;// used to read post scale values +qScalehi QN 6.S32 +qTemp0 QN 5.S32 +qTemp1 QN 6.S32 + + +Scale1 EQU 6 +Scale2 EQU 15 +qScale1 QN Scale1.S16 +qScale2 QN Scale2.S16 +dScale1lo DN (Scale1*2).S16 +dScale1hi DN (Scale1*2+1).S16 +dScale2lo DN (Scale2*2).S16 +dScale2hi DN (Scale2*2+1).S16 + +dCoefs DN 0.S16 ;// Scale coefficients in format {[0] [C] [S] [InvSqrt2]} +InvSqrt2 DN dCoefs[0] ;// 1/sqrt(2) in Q15 +S DN dCoefs[1] ;// Sin(PI/8) in Q15 +C DN dCoefs[2] ;// Cos(PI/8) in Q15 + +pTemp RN 12 + + + IMPORT armCOMM_IDCTCoef + + VLD1 {qXj0,qXj1}, [pSrc @64]! + VLD1 {qXj2,qXj3}, [pSrc @64]! + VLD1 {qXj4,qXj5}, [pSrc @64]! + VLD1 {qXj6,qXj7}, [pSrc @64]! + + ;// Load PreScale and multiply with Src + ;// IStage 4 + + IF "$inscale"="s16" ;// 16X16 Mul + M_IDCT_PRESCALE16 + ENDIF + + IF "$inscale"="s32" ;// 32X32 ,ul + M_IDCT_PRESCALE32 + ENDIF + + ;// IStage 3 + VQRDMULH qXi2, qXi2, InvSqrt2 ;// i2/sqrt(2) + VHADD qXh0, qXi0, qXi1 ;// (i0+i1)/2 + VHSUB qXh1, qXi0, qXi1 ;// (i0-i1)/2 + VHADD qXh7, qXi5, qXi7 ;// (i5+i7)/4 + VSUB qXh5, qXi5, qXi7 ;// (i5-i7)/2 + VQRDMULH qXh5, qXh5, InvSqrt2 ;// h5/sqrt(2) + VSUB qXh2, qXi2, qXi3 ;// h2, h3 + + VMULL qXt0, dXi4lo, C ;// c*i4 + VMLAL qXt0, dXi6lo, S ;// c*i4+s*i6 + VMULL qXt1, dXi4hi, C + VMLAL qXt1, dXi6hi, S + VSHRN dXh4lo, qXt0, #16 ;// h4 + VSHRN dXh4hi, qXt1, #16 + + VMULL qXt0, dXi6lo, C ;// c*i6 + VMLSL qXt0, dXi4lo, S ;// -s*i4 + c*h6 + VMULL qXt1, dXi6hi, C + VMLSL qXt1, dXi4hi, S + VSHRN dXh6lo, qXt0, #16 ;// h6 + VSHRN dXh6hi, qXt1, #16 + + ;// IStage 2 + VSUB qXg6, qXh6, qXh7 + VSUB qXg5, qXh5, qXg6 + VSUB qXg4, qXh4, qXg5 + VHADD qXg1, qXh1, qXh2 ;// (h1+h2)/2 + VHSUB qXg2, qXh1, qXh2 ;// (h1-h2)/2 + VHADD qXg0, qXh0, qXh3 ;// (h0+h3)/2 + VHSUB qXg3, qXh0, qXh3 ;// (h0-h3)/2 + + ;// IStage 1 all rows + VADD qXf3, qXg3, qXg4 + VSUB qXf4, qXg3, qXg4 + VADD qXf2, qXg2, qXg5 + VSUB qXf5, qXg2, qXg5 + VADD qXf1, qXg1, qXg6 + VSUB qXf6, qXg1, qXg6 + VADD qXf0, qXg0, qXg7 + VSUB qXf7, qXg0, qXg7 + + ;// Transpose, store and loop +XTR0 EQU Src5 +XTR1 EQU Tmp +XTR2 EQU Src6 +XTR3 EQU Src7 +XTR4 EQU Src3 +XTR5 EQU Src0 +XTR6 EQU Src1 +XTR7 EQU Src2 +XTRt EQU Src4 + +qA0 QN XTR0.S32 ;// for XTRpose +qA1 QN XTR1.S32 +qA2 QN XTR2.S32 +qA3 QN XTR3.S32 +qA4 QN XTR4.S32 +qA5 QN XTR5.S32 +qA6 QN XTR6.S32 +qA7 QN XTR7.S32 + +dB0 DN XTR0*2+1 ;// for using VSWP +dB1 DN XTR1*2+1 +dB2 DN XTR2*2+1 +dB3 DN XTR3*2+1 +dB4 DN XTR4*2 +dB5 DN XTR5*2 +dB6 DN XTR6*2 +dB7 DN XTR7*2 + + + VTRN qXf0, qXf1 + VTRN qXf2, qXf3 + VTRN qXf4, qXf5 + VTRN qXf6, qXf7 + VTRN qA0, qA2 + VTRN qA1, qA3 + VTRN qA4, qA6 + VTRN qA5, qA7 + VSWP dB0, dB4 + VSWP dB1, dB5 + VSWP dB2, dB6 + VSWP dB3, dB7 + + +qYj0 QN qXf0 +qYj1 QN qXf1 +qYj2 QN qXf2 +qYj3 QN qXf3 +qYj4 QN qXf4 +qYj5 QN qXf5 +qYj6 QN qXf6 +qYj7 QN qXf7 +qYjt QN qXft + +dYj0lo DN (XTR0*2).S16 +dYj0hi DN (XTR0*2+1).S16 +dYj1lo DN (XTR1*2).S16 +dYj1hi DN (XTR1*2+1).S16 +dYj2lo DN (XTR2*2).S16 +dYj2hi DN (XTR2*2+1).S16 +dYj3lo DN (XTR3*2).S16 +dYj3hi DN (XTR3*2+1).S16 +dYj4lo DN (XTR4*2).S16 +dYj4hi DN (XTR4*2+1).S16 +dYj5lo DN (XTR5*2).S16 +dYj5hi DN (XTR5*2+1).S16 +dYj6lo DN (XTR6*2).S16 +dYj6hi DN (XTR6*2+1).S16 +dYj7lo DN (XTR7*2).S16 +dYj7hi DN (XTR7*2+1).S16 +dYjtlo DN (XTRt*2).S16 +dYjthi DN (XTRt*2+1).S16 + +qYi0 QN qYj0 +qYi1 QN qYj4 +qYi2 QN qYj2 +qYi3 QN qYj7 +qYi4 QN qYj5 +qYi5 QN qYjt +qYi6 QN qYj1 +qYi7 QN qYj6 +qYit QN qYj3 + +dYi0lo DN dYj0lo +dYi0hi DN dYj0hi +dYi1lo DN dYj4lo +dYi1hi DN dYj4hi +dYi2lo DN dYj2lo +dYi2hi DN dYj2hi +dYi3lo DN dYj7lo +dYi3hi DN dYj7hi +dYi4lo DN dYj5lo +dYi4hi DN dYj5hi +dYi5lo DN dYjtlo +dYi5hi DN dYjthi +dYi6lo DN dYj1lo +dYi6hi DN dYj1hi +dYi7lo DN dYj6lo +dYi7hi DN dYj6hi +dYitlo DN dYj3lo +dYithi DN dYj3hi + +qYh0 QN qYit +qYh1 QN qYi0 +qYh2 QN qYi2 +qYh3 QN qYi3 +qYh4 QN qYi7 +qYh5 QN qYi5 +qYh6 QN qYi4 +qYh7 QN qYi1 +qYht QN qYi6 + +dYh0lo DN dYitlo +dYh0hi DN dYithi +dYh1lo DN dYi0lo +dYh1hi DN dYi0hi +dYh2lo DN dYi2lo +dYh2hi DN dYi2hi +dYh3lo DN dYi3lo +dYh3hi DN dYi3hi +dYh4lo DN dYi7lo +dYh4hi DN dYi7hi +dYh5lo DN dYi5lo +dYh5hi DN dYi5hi +dYh6lo DN dYi4lo +dYh6hi DN dYi4hi +dYh7lo DN dYi1lo +dYh7hi DN dYi1hi +dYhtlo DN dYi6lo +dYhthi DN dYi6hi + +qYg0 QN qYh2 +qYg1 QN qYht +qYg2 QN qYh1 +qYg3 QN qYh0 +qYg4 QN qYh4 +qYg5 QN qYh5 +qYg6 QN qYh6 +qYg7 QN qYh7 +qYgt QN qYh3 + +qYf0 QN qYg6 +qYf1 QN qYg5 +qYf2 QN qYg4 +qYf3 QN qYgt +qYf4 QN qYg3 +qYf5 QN qYg2 +qYf6 QN qYg1 +qYf7 QN qYg0 +qYft QN qYg7 + + VRSHR qYj7, qYj7, #2 + VRSHR qYj6, qYj6, #1 + + VHADD qYi5, qYj1, qYj7 ;// i5 = (j1+j7)/2 + VSUB qYi6, qYj1, qYj7 ;// i6 = j1-j7 + VHADD qYi3, qYj2, qYj6 ;// i3 = (j2+j6)/2 + VSUB qYi2, qYj2, qYj6 ;// i2 = j2-j6 + VHADD qYi7, qYj5, qYj3 ;// i7 = (j5+j3)/2 + VSUB qYi4, qYj5, qYj3 ;// i4 = j5-j3 + + VQRDMULH qYi2, qYi2, InvSqrt2 ;// i2/sqrt(2) + ;// IStage 4,3 rows 0to1 x 1/2 + + MOV pTemp, #0x4 ;// ensure correct round + VDUP qScale1, pTemp ;// of DC result + VADD qYi0, qYi0, qScale1 + + VHADD qYh0, qYi0, qYi1 ;// (i0+i1)/2 + VHSUB qYh1, qYi0, qYi1 ;// (i0-i1)/2 + + VHADD qYh7, qYi5, qYi7 ;// (i5+i7)/4 + VSUB qYh5, qYi5, qYi7 ;// (i5-i7)/2 + VSUB qYh2, qYi2, qYi3 ;// h2, h3 + VQRDMULH qYh5, qYh5, InvSqrt2 ;// h5/sqrt(2) + + VMULL qXt0, dYi4lo, C ;// c*i4 + VMLAL qXt0, dYi6lo, S ;// c*i4+s*i6 + VMULL qXt1, dYi4hi, C + VMLAL qXt1, dYi6hi, S + VSHRN dYh4lo, qXt0, #16 ;// h4 + VSHRN dYh4hi, qXt1, #16 + + VMULL qXt0, dYi6lo, C ;// c*i6 + VMLSL qXt0, dYi4lo, S ;// -s*i4 + c*h6 + VMULL qXt1, dYi6hi, C + VMLSL qXt1, dYi4hi, S + VSHRN dYh6lo, qXt0, #16 ;// h6 + VSHRN dYh6hi, qXt1, #16 + + VSUB qYg6, qYh6, qYh7 + VSUB qYg5, qYh5, qYg6 + VSUB qYg4, qYh4, qYg5 + + ;// IStage 2 rows 0to3 x 1/2 + VHADD qYg1, qYh1, qYh2 ;// (h1+h2)/2 + VHSUB qYg2, qYh1, qYh2 ;// (h1-h2)/2 + VHADD qYg0, qYh0, qYh3 ;// (h0+h3)/2 + VHSUB qYg3, qYh0, qYh3 ;// (h0-h3)/2 + + + ;// IStage 1 all rows + VHADD qYf3, qYg3, qYg4 + VHSUB qYf4, qYg3, qYg4 + VHADD qYf2, qYg2, qYg5 + VHSUB qYf5, qYg2, qYg5 + VHADD qYf1, qYg1, qYg6 + VHSUB qYf6, qYg1, qYg6 + VHADD qYf0, qYg0, qYg7 + VHSUB qYf7, qYg0, qYg7 + +YTR0 EQU Src0 +YTR1 EQU Src4 +YTR2 EQU Src1 +YTR3 EQU Src2 +YTR4 EQU Src7 +YTR5 EQU Src5 +YTR6 EQU Tmp +YTR7 EQU Src6 +YTRt EQU Src3 + +qC0 QN YTR0.S32 ;// for YTRpose +qC1 QN YTR1.S32 +qC2 QN YTR2.S32 +qC3 QN YTR3.S32 +qC4 QN YTR4.S32 +qC5 QN YTR5.S32 +qC6 QN YTR6.S32 +qC7 QN YTR7.S32 + +dD0 DN YTR0*2+1 ;// for using VSWP +dD1 DN YTR1*2+1 +dD2 DN YTR2*2+1 +dD3 DN YTR3*2+1 +dD4 DN YTR4*2 +dD5 DN YTR5*2 +dD6 DN YTR6*2 +dD7 DN YTR7*2 + + VTRN qYf0, qYf1 + VTRN qYf2, qYf3 + VTRN qYf4, qYf5 + VTRN qYf6, qYf7 + VTRN qC0, qC2 + VTRN qC1, qC3 + VTRN qC4, qC6 + VTRN qC5, qC7 + VSWP dD0, dD4 + VSWP dD1, dD5 + VSWP dD2, dD6 + VSWP dD3, dD7 + + +dYf0U8 DN YTR0*2.U8 +dYf1U8 DN YTR1*2.U8 +dYf2U8 DN YTR2*2.U8 +dYf3U8 DN YTR3*2.U8 +dYf4U8 DN YTR4*2.U8 +dYf5U8 DN YTR5*2.U8 +dYf6U8 DN YTR6*2.U8 +dYf7U8 DN YTR7*2.U8 + + ;// + ;// Do saturation if outsize is other than S16 + ;// + + IF ("$outsize"="u8") + ;// Output range [0-255] + VQMOVN dYf0U8, qYf0 + VQMOVN dYf1U8, qYf1 + VQMOVN dYf2U8, qYf2 + VQMOVN dYf3U8, qYf3 + VQMOVN dYf4U8, qYf4 + VQMOVN dYf5U8, qYf5 + VQMOVN dYf6U8, qYf6 + VQMOVN dYf7U8, qYf7 + ENDIF + + IF ("$outsize"="s9") + ;// Output range [-256 to +255] + VQSHL qYf0, qYf0, #16-9 + VQSHL qYf1, qYf1, #16-9 + VQSHL qYf2, qYf2, #16-9 + VQSHL qYf3, qYf3, #16-9 + VQSHL qYf4, qYf4, #16-9 + VQSHL qYf5, qYf5, #16-9 + VQSHL qYf6, qYf6, #16-9 + VQSHL qYf7, qYf7, #16-9 + + VSHR qYf0, qYf0, #16-9 + VSHR qYf1, qYf1, #16-9 + VSHR qYf2, qYf2, #16-9 + VSHR qYf3, qYf3, #16-9 + VSHR qYf4, qYf4, #16-9 + VSHR qYf5, qYf5, #16-9 + VSHR qYf6, qYf6, #16-9 + VSHR qYf7, qYf7, #16-9 + ENDIF + + ;// Store output depending on the Stride size + IF "$stride"="s" + VST1 qYf0, [pDest @64], Stride + VST1 qYf1, [pDest @64], Stride + VST1 qYf2, [pDest @64], Stride + VST1 qYf3, [pDest @64], Stride + VST1 qYf4, [pDest @64], Stride + VST1 qYf5, [pDest @64], Stride + VST1 qYf6, [pDest @64], Stride + VST1 qYf7, [pDest @64] + ELSE + IF ("$outsize"="u8") + VST1 dYf0U8, [pDest @64], #8 + VST1 dYf1U8, [pDest @64], #8 + VST1 dYf2U8, [pDest @64], #8 + VST1 dYf3U8, [pDest @64], #8 + VST1 dYf4U8, [pDest @64], #8 + VST1 dYf5U8, [pDest @64], #8 + VST1 dYf6U8, [pDest @64], #8 + VST1 dYf7U8, [pDest @64] + ELSE + ;// ("$outsize"="s9") or ("$outsize"="s16") + VST1 qYf0, [pDest @64], #16 + VST1 qYf1, [pDest @64], #16 + VST1 qYf2, [pDest @64], #16 + VST1 qYf3, [pDest @64], #16 + VST1 qYf4, [pDest @64], #16 + VST1 qYf5, [pDest @64], #16 + VST1 qYf6, [pDest @64], #16 + VST1 qYf7, [pDest @64] + ENDIF + + ENDIF + + + + ENDIF ;// CortexA8 + + + + MEND + + ;// Scale TWO input rows with TWO rows of 16 bit scale values + ;// + ;// This macro is used by M_IDCT_PRESCALE16 to pre-scale one row + ;// input (Eight input values) with one row of scale values. Also + ;// Loads next scale values from pScale, if $LastRow flag is not set. + ;// + ;// Input Registers: + ;// + ;// $dAlo - Input D register with first four S16 values of row n + ;// $dAhi - Input D register with next four S16 values of row n + ;// $dBlo - Input D register with first four S16 values of row n+1 + ;// $dBhi - Input D register with next four S16 values of row n+1 + ;// pScale - Pointer to next row of scale values + ;// qT0lo - Temporary scratch register + ;// qT0hi - Temporary scratch register + ;// qT1lo - Temporary scratch register + ;// qT1hi - Temporary scratch register + ;// dScale1lo - Scale value of row n + ;// dScale1hi - Scale value of row n + ;// dScale2lo - Scale value of row n+1 + ;// dScale2hi - Scale value of row n+1 + ;// + ;// Input Flag + ;// + ;// $LastRow - Flag to indicate whether current row is last row + ;// + ;// Output Registers: + ;// + ;// $dAlo - Scaled output values (first four S16 of row n) + ;// $dAhi - Scaled output values (next four S16 of row n) + ;// $dBlo - Scaled output values (first four S16 of row n+1) + ;// $dBhi - Scaled output values (next four S16 of row n+1) + ;// qScale1 - Scale values for next row + ;// qScale2 - Scale values for next row+1 + ;// pScale - Pointer to next row of scale values + ;// + MACRO + M_IDCT_SCALE16 $dAlo, $dAhi, $dBlo, $dBhi, $LastRow + VMULL qT0lo, $dAlo, dScale1lo + VMULL qT0hi, $dAhi, dScale1hi + VMULL qT1lo, $dBlo, dScale2lo + VMULL qT1hi, $dBhi, dScale2hi + IF "$LastRow"="0" + VLD1 qScale1, [pScale], #16 ;// Load scale for row n+1 + VLD1 qScale2, [pScale], #16 ;// Load scale for row n+2 + ENDIF + VQRSHRN $dAlo, qT0lo, #12 + VQRSHRN $dAhi, qT0hi, #12 + VQRSHRN $dBlo, qT1lo, #12 + VQRSHRN $dBhi, qT1hi, #12 + MEND + + ;// Scale 8x8 block input values with 16 bit scale values + ;// + ;// This macro is used to pre-scale block of 8x8 input. + ;// This also do the Ist stage transformations of IDCT. + ;// + ;// Input Registers: + ;// + ;// dXjnlo - n th input D register with first four S16 values + ;// dXjnhi - n th input D register with next four S16 values + ;// qXjn - n th input Q register with eight S16 values + ;// pScale - Pointer to scale values + ;// + ;// Output Registers: + ;// + ;// qXin - n th output Q register with eight S16 output values of 1st stage + ;// + MACRO + M_IDCT_PRESCALE16 + VLD1 qScale1, [pScale], #16 ;// Load Pre scale for row 0 + VLD1 qScale2, [pScale], #16 ;// Load Pre scale for row 0 + M_IDCT_SCALE16 dXj0lo, dXj0hi, dXj1lo, dXj1hi, 0 ;// Pre scale row 0 & 1 + M_IDCT_SCALE16 dXj2lo, dXj2hi, dXj3lo, dXj3hi, 0 + M_IDCT_SCALE16 dXj4lo, dXj4hi, dXj5lo, dXj5hi, 0 + M_IDCT_SCALE16 dXj6lo, dXj6hi, dXj7lo, dXj7hi, 1 + VHADD qXi5, qXj1, qXj7 ;// (j1+j7)/2 + VSUB qXi6, qXj1, qXj7 ;// j1-j7 + LDR pSrc, =armCOMM_IDCTCoef ;// Address of DCT inverse AAN constants + VHADD qXi3, qXj2, qXj6 ;// (j2+j6)/2 + VSUB qXi2, qXj2, qXj6 ;// j2-j6 + VLDR dCoefs, [pSrc] ;// Load DCT inverse AAN constants + VHADD qXi7, qXj5, qXj3 ;// (j5+j3)/2 + VSUB qXi4, qXj5, qXj3 ;// j5-j3 + MEND + + + ;// Scale 8x8 block input values with 32 bit scale values + ;// + ;// This macro is used to pre-scale block of 8x8 input. + ;// This also do the Ist stage transformations of IDCT. + ;// + ;// Input Registers: + ;// + ;// dXjnlo - n th input D register with first four S16 values + ;// dXjnhi - n th input D register with next four S16 values + ;// qXjn - n th input Q register with eight S16 values + ;// pScale - Pointer to 32bit scale values in Q23 format + ;// + ;// Output Registers: + ;// + ;// dXinlo - n th output D register with first four S16 output values of 1st stage + ;// dXinhi - n th output D register with next four S16 output values of 1st stage + ;// + MACRO + M_IDCT_PRESCALE32 +qScale0lo QN 0.S32 +qScale0hi QN 1.S32 +qScale1lo QN 2.S32 +qScale1hi QN 3.S32 +qScale2lo QN qScale1lo +qScale2hi QN qScale1hi +qScale3lo QN qScale1lo +qScale3hi QN qScale1hi +qScale4lo QN qScale1lo +qScale4hi QN qScale1hi +qScale5lo QN qScale0lo +qScale5hi QN qScale0hi +qScale6lo QN qScale0lo +qScale6hi QN qScale0hi +qScale7lo QN qScale0lo +qScale7hi QN qScale0hi + +qSrc0lo QN 4.S32 +qSrc0hi QN 5.S32 +qSrc1lo QN 6.S32 +qSrc1hi QN Src4.S32 +qSrc2lo QN qSrc0lo +qSrc2hi QN qSrc0hi +qSrc3lo QN qSrc0lo +qSrc3hi QN qSrc0hi +qSrc4lo QN qSrc0lo +qSrc4hi QN qSrc0hi +qSrc5lo QN qSrc1lo +qSrc5hi QN qSrc1hi +qSrc6lo QN qSrc1lo +qSrc6hi QN qSrc1hi +qSrc7lo QN qSrc0lo +qSrc7hi QN qSrc0hi + +qRes17lo QN qScale0lo +qRes17hi QN qScale0hi +qRes26lo QN qScale0lo +qRes26hi QN qScale0hi +qRes53lo QN qScale0lo +qRes53hi QN qScale0hi + + ADD pTemp, pScale, #4*8*7 ;// Address of pScale[7] + + ;// Row 0 + VLD1 {qScale0lo, qScale0hi}, [pScale]! + VSHLL qSrc0lo, dXj0lo, #(12-1) + VSHLL qSrc0hi, dXj0hi, #(12-1) + VLD1 {qScale1lo, qScale1hi}, [pScale]! + VQRDMULH qSrc0lo, qScale0lo, qSrc0lo + VQRDMULH qSrc0hi, qScale0hi, qSrc0hi + VLD1 {qScale7lo, qScale7hi}, [pTemp]! + VSHLL qSrc1lo, dXj1lo, #(12-1) + VSHLL qSrc1hi, dXj1hi, #(12-1) + VMOVN dXi0lo, qSrc0lo ;// Output i0 + VMOVN dXi0hi, qSrc0hi + VSHLL qSrc7lo, dXj7lo, #(12-1) + VSHLL qSrc7hi, dXj7hi, #(12-1) + SUB pTemp, pTemp, #((16*2)+(4*8*1)) + VQRDMULH qSrc1lo, qScale1lo, qSrc1lo + VQRDMULH qSrc1hi, qScale1hi, qSrc1hi + VQRDMULH qSrc7lo, qScale7lo, qSrc7lo + VQRDMULH qSrc7hi, qScale7hi, qSrc7hi + VLD1 {qScale2lo, qScale2hi}, [pScale]! + + ;// Row 1 & 7 + VHADD qRes17lo, qSrc1lo, qSrc7lo ;// (j1+j7)/2 + VHADD qRes17hi, qSrc1hi, qSrc7hi ;// (j1+j7)/2 + VMOVN dXi5lo, qRes17lo ;// Output i5 + VMOVN dXi5hi, qRes17hi + VSUB qRes17lo, qSrc1lo, qSrc7lo ;// j1-j7 + VSUB qRes17hi, qSrc1hi, qSrc7hi ;// j1-j7 + VMOVN dXi6lo, qRes17lo ;// Output i6 + VMOVN dXi6hi, qRes17hi + VSHLL qSrc2lo, dXj2lo, #(12-1) + VSHLL qSrc2hi, dXj2hi, #(12-1) + VLD1 {qScale6lo, qScale6hi}, [pTemp]! + VSHLL qSrc6lo, dXj6lo, #(12-1) + VSHLL qSrc6hi, dXj6hi, #(12-1) + SUB pTemp, pTemp, #((16*2)+(4*8*1)) + VQRDMULH qSrc2lo, qScale2lo, qSrc2lo + VQRDMULH qSrc2hi, qScale2hi, qSrc2hi + VQRDMULH qSrc6lo, qScale6lo, qSrc6lo + VQRDMULH qSrc6hi, qScale6hi, qSrc6hi + VLD1 {qScale3lo, qScale3hi}, [pScale]! + + ;// Row 2 & 6 + VHADD qRes26lo, qSrc2lo, qSrc6lo ;// (j2+j6)/2 + VHADD qRes26hi, qSrc2hi, qSrc6hi ;// (j2+j6)/2 + VMOVN dXi3lo, qRes26lo ;// Output i3 + VMOVN dXi3hi, qRes26hi + VSUB qRes26lo, qSrc2lo, qSrc6lo ;// j2-j6 + VSUB qRes26hi, qSrc2hi, qSrc6hi ;// j2-j6 + VMOVN dXi2lo, qRes26lo ;// Output i2 + VMOVN dXi2hi, qRes26hi + VSHLL qSrc3lo, dXj3lo, #(12-1) + VSHLL qSrc3hi, dXj3hi, #(12-1) + VLD1 {qScale5lo, qScale5hi}, [pTemp]! + VSHLL qSrc5lo, dXj5lo, #(12-1) + VSHLL qSrc5hi, dXj5hi, #(12-1) + VQRDMULH qSrc3lo, qScale3lo, qSrc3lo + VQRDMULH qSrc3hi, qScale3hi, qSrc3hi + VQRDMULH qSrc5lo, qScale5lo, qSrc5lo + VQRDMULH qSrc5hi, qScale5hi, qSrc5hi + + ;// Row 3 & 5 + VHADD qRes53lo, qSrc5lo, qSrc3lo ;// (j5+j3)/2 + VHADD qRes53hi, qSrc5hi, qSrc3hi ;// (j5+j3)/2 + SUB pSrc, pSrc, #16*2*2 + VMOVN dXi7lo, qRes53lo ;// Output i7 + VMOVN dXi7hi, qRes53hi + VSUB qRes53lo, qSrc5lo, qSrc3lo ;// j5-j3 + VSUB qRes53hi, qSrc5hi, qSrc3hi ;// j5-j3 + VLD1 qXj4, [pSrc @64] + VMOVN dXi4lo, qRes53lo ;// Output i4 + VMOVN dXi4hi, qRes53hi + VSHLL qSrc4lo, dXj4lo, #(12-1) + VSHLL qSrc4hi, dXj4hi, #(12-1) + VLD1 {qScale4lo, qScale4hi}, [pScale] + LDR pSrc, =armCOMM_IDCTCoef ;// Address of DCT inverse AAN constants + VQRDMULH qSrc4lo, qScale4lo, qSrc4lo + VQRDMULH qSrc4hi, qScale4hi, qSrc4hi + VLDR dCoefs, [pSrc] ;// Load DCT inverse AAN constants + ;// Row 4 + VMOVN dXi1lo, qSrc4lo ;// Output i1 + VMOVN dXi1hi, qSrc4hi + + MEND + + END diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_MaskTable.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_MaskTable.h new file mode 100644 index 0000000000000000000000000000000000000000..b5da9dce4e2471f189a8267552b124f47ec6d4a3 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_MaskTable.h @@ -0,0 +1,27 @@ +/** + * + * File Name: armCOMM_MaskTable.h + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Mask Table to mask the end of array + */ + + + +#ifndef _ARMCOMM_MASKTABLE_H_ +#define _ARMCOMM_MASKTABLE_H_ + +#define MaskTableSize 72 + +/* Mask table */ + +extern const OMX_U16 armCOMM_qMaskTable16[MaskTableSize]; +extern const OMX_U8 armCOMM_qMaskTable8[MaskTableSize]; + +#endif diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_Version.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_Version.h new file mode 100644 index 0000000000000000000000000000000000000000..13e5b2b647ced1b40492fcf7b44a4d97d08014aa --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_Version.h @@ -0,0 +1,43 @@ +/* Guard the header against multiple inclusion. */ +#ifndef __ARM_COMM_VERSION_H__ +#define __ARM_COMM_VERSION_H__ + + +/* The following line should be in omxtypes.h but hasn't been approved by OpenMAX yet */ +#define OMX_VERSION 102 + +/* We need to define these macros in order to convert a #define number into a #define string. */ +#define ARM_QUOTE(a) #a +#define ARM_INDIRECT(A) ARM_QUOTE(A) + +/* Convert the OMX_VERSION number into a string that can be used, for example, to print it out. */ +#define ARM_VERSION_STRING ARM_INDIRECT(OMX_VERSION) + + +/* Define this in order to turn on ARM version/release/build strings in each domain */ +#define ARM_INCLUDE_VERSION_DESCRIPTIONS + +#ifdef ARM_INCLUDE_VERSION_DESCRIPTIONS + extern const char * const omxAC_VersionDescription; + extern const char * const omxIC_VersionDescription; + extern const char * const omxIP_VersionDescription; + extern const char * const omxSP_VersionDescription; + extern const char * const omxVC_VersionDescription; +#endif /* ARM_INCLUDE_VERSION_DESCRIPTIONS */ + + +/* The following entries should be automatically updated by the release script */ +/* They are used in the ARM version strings defined for each domain. */ + +/* The release tag associated with this release of the library. - used for source and object releases */ +#define OMX_ARM_RELEASE_TAG "r0p0-00bet1" + +/* The ARM architecture used to build any objects or executables in this release. */ +#define OMX_ARM_BUILD_ARCHITECTURE "ARM Architecture V6" + +/* The ARM Toolchain used to build any objects or executables in this release. */ +#define OMX_ARM_BUILD_TOOLCHAIN "ARM RVCT 3.1" + + +#endif /* __ARM_COMM_VERSION_H__ */ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_s.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_s.h new file mode 100644 index 0000000000000000000000000000000000000000..2df1fc88cb806eac5dde57ee5132c58dd1f59616 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_s.h @@ -0,0 +1,1154 @@ +;// +;// +;// File Name: armCOMM_s.h +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// +;// ARM optimized OpenMAX common header file +;// + +;// Protect against multiple inclusion + IF :LNOT::DEF:ARMCOMM_S_H + GBLL ARMCOMM_S_H + + REQUIRE8 ;// Requires 8-byte stack alignment + PRESERVE8 ;// Preserves 8-byte stack alignment + + GBLL ARM_ERRORCHECK +ARM_ERRORCHECK SETL {FALSE} + +;// Globals + + GBLS _RRegList ;// R saved register list + GBLS _DRegList ;// D saved register list + GBLS _Variant ;// Selected processor variant + GBLS _CPU ;// CPU name + GBLS _Struct ;// Structure name + + GBLL _InFunc ;// Inside function assembly flag + GBLL _SwLong ;// Long switch flag + + GBLA _RBytes ;// Number of register bytes on stack + GBLA _SBytes ;// Number of scratch bytes on stack + GBLA _ABytes ;// Stack offset of next argument + GBLA _Workspace ;// Stack offset of scratch workspace + GBLA _F ;// Function number + GBLA _StOff ;// Struct offset + GBLA _SwNum ;// Switch number + GBLS _32 ;// Suffix for 32 byte alignmnet + GBLS _16 ;// Suffix for 16 byte alignmnet + +_InFunc SETL {FALSE} +_SBytes SETA 0 +_F SETA 0 +_SwNum SETA 0 +_32 SETS "ALIGN32" +_16 SETS "ALIGN16" + +;///////////////////////////////////////////////////////// +;// Override the tools settings of the CPU if the #define +;// USECPU is set, otherwise use the CPU defined by the +;// assembler settings. +;///////////////////////////////////////////////////////// + + IF :DEF: OVERRIDECPU +_CPU SETS OVERRIDECPU + ELSE +_CPU SETS {CPU} + ENDIF + + + +;///////////////////////////////////////////////////////// +;// Work out which code to build +;///////////////////////////////////////////////////////// + + IF :DEF:ARM1136JS:LOR::DEF:CortexA8:LOR::DEF:ARM_GENERIC + INFO 1,"Please switch to using M_VARIANTS" + ENDIF + + ;// Define and reset all officially recongnised variants + MACRO + _M_DEF_VARIANTS + _M_DEF_VARIANT ARM926EJS + _M_DEF_VARIANT ARM1136JS + _M_DEF_VARIANT ARM1136JS_U + _M_DEF_VARIANT CortexA8 + _M_DEF_VARIANT ARM7TDMI + MEND + + MACRO + _M_DEF_VARIANT $var + GBLL $var + GBLL _ok$var +$var SETL {FALSE} + MEND + + + ;// Variant declaration + ;// + ;// Define a list of code variants supported by this + ;// source file. This macro then chooses the most + ;// appropriate variant to build for the currently configured + ;// core. + ;// + MACRO + M_VARIANTS $v0,$v1,$v2,$v3,$v4,$v5,$v6,$v7 + ;// Set to TRUE variants that are supported + _M_DEF_VARIANTS + _M_VARIANT $v0 + _M_VARIANT $v1 + _M_VARIANT $v2 + _M_VARIANT $v3 + _M_VARIANT $v4 + _M_VARIANT $v5 + _M_VARIANT $v6 + _M_VARIANT $v7 + + ;// Look for first available variant to match a CPU + ;// _M_TRY cpu, variant fall back list +_Variant SETS "" + _M_TRY ARM926EJ-S, ARM926EJS + _M_TRY ARM1176JZ-S, ARM1136JS + _M_TRY ARM1176JZF-S, ARM1136JS + _M_TRY ARM1156T2-S, ARM1136JS + _M_TRY ARM1156T2F-S, ARM1136JS + _M_TRY ARM1136J-S, ARM1136JS + _M_TRY ARM1136JF-S, ARM1136JS + _M_TRY MPCore, ARM1136JS + _M_TRY Cortex-A8, CortexA8, ARM1136JS + _M_TRY Cortex-R4, ARM1136JS + _M_TRY ARM7TDMI + + ;// Select the correct variant + _M_DEF_VARIANTS + IF _Variant="" + INFO 1, "No match found for CPU '$_CPU'" + ELSE +$_Variant SETL {TRUE} + ENDIF + MEND + + ;// Register a variant as available + MACRO + _M_VARIANT $var + IF "$var"="" + MEXIT + ENDIF + IF :LNOT::DEF:_ok$var + INFO 1, "Unrecognized variant '$var'" + ENDIF +$var SETL {TRUE} + MEND + + ;// For a given CPU, see if any of the variants supporting + ;// this CPU are available. The first available variant is + ;// chosen + MACRO + _M_TRY $cpu, $v0,$v1,$v2,$v3,$v4,$v5,$v6,$v7 + IF "$cpu"<>_CPU + MEXIT + ENDIF + _M_TRY1 $v0 + _M_TRY1 $v1 + _M_TRY1 $v2 + _M_TRY1 $v3 + _M_TRY1 $v4 + _M_TRY1 $v5 + _M_TRY1 $v6 + _M_TRY1 $v7 + ;// Check a match was found + IF _Variant="" + INFO 1, "No variant match found for CPU '$_CPU'" + ENDIF + MEND + + MACRO + _M_TRY1 $var + IF "$var"="" + MEXIT + ENDIF + IF (_Variant=""):LAND:$var +_Variant SETS "$var" + ENDIF + MEND + +;//////////////////////////////////////////////////////// +;// Structure definition +;//////////////////////////////////////////////////////// + + ;// Declare a structure of given name + MACRO + M_STRUCT $sname +_Struct SETS "$sname" +_StOff SETA 0 + MEND + + ;// Declare a structure field + ;// The field is called $sname_$fname + ;// $size = the size of each entry, must be power of 2 + ;// $number = (if provided) the number of entries for an array + MACRO + M_FIELD $fname, $size, $number + IF (_StOff:AND:($size-1))!=0 +_StOff SETA _StOff + ($size - (_StOff:AND:($size-1))) + ENDIF +$_Struct._$fname EQU _StOff + IF "$number"<>"" +_StOff SETA _StOff + $size*$number + ELSE +_StOff SETA _StOff + $size + ENDIF + MEND + + + MACRO + M_ENDSTRUCT +sizeof_$_Struct EQU _StOff +_Struct SETS "" + MEND + +;////////////////////////////////////////////////////////// +;// Switch and table macros +;////////////////////////////////////////////////////////// + + ;// Start a relative switch table with register to switch on + ;// + ;// $v = the register to switch on + ;// $s = if specified must be "L" to indicate long + ;// this allows a greater range to the case code + MACRO + M_SWITCH $v, $s + ASSERT "$s"="":LOR:"$s"="L" +_SwLong SETL {FALSE} + IF "$s"="L" +_SwLong SETL {TRUE} + ENDIF +_SwNum SETA _SwNum+1 + IF {CONFIG}=16 + ;// Thumb + IF _SwLong + TBH [pc, $v, LSL#1] + ELSE + TBB [pc, $v] + ENDIF +_Switch$_SwNum + ELSE + ;// ARM + ADD pc, pc, $v, LSL #2 + NOP + ENDIF + MEND + + ;// Add a case to the switch statement + MACRO + M_CASE $label + IF {CONFIG}=16 + ;// Thumb + IF _SwLong + DCW ($label - _Switch$_SwNum)/2 + ELSE + DCB ($label - _Switch$_SwNum)/2 + ENDIF + ELSE + ;// ARM + B $label + ENDIF + MEND + + ;// End of switch statement + MACRO + M_ENDSWITCH + ALIGN 2 + MEND + + +;//////////////////////////////////////////////////////// +;// Data area allocation +;//////////////////////////////////////////////////////// + + ;// Constant table allocator macro + ;// + ;// Creates a new section for each constant table + ;// $name is symbol through which the table can be accessed. + ;// $align is the optional alignment of the table, log2 of + ;// the byte alignment - $align=4 is 16 byte aligned + MACRO + M_TABLE $name, $align + ASSERT :LNOT:_InFunc + IF "$align"="" + AREA |.constdata|, READONLY, DATA + ELSE + ;// AREAs inherit the alignment of the first declaration. + ;// Therefore for each alignment size we must have an area + ;// of a different name. + AREA constdata_a$align, READONLY, DATA, ALIGN=$align + + ;// We also force alignment incase we are tagging onto + ;// an already started area. + ALIGN (1<<$align) + ENDIF +$name + MEND + +;///////////////////////////////////////////////////// +;// Macros to allocate space on the stack +;// +;// These all assume that the stack is 8-byte aligned +;// at entry to the function, which means that the +;// 32-byte alignment macro needs to work in a +;// bit more of a special way... +;///////////////////////////////////////////////////// + + + + + ;// Allocate 1-byte aligned area of name + ;// $name size $size bytes. + MACRO + M_ALLOC1 $name, $size + ASSERT :LNOT:_InFunc +$name$_F EQU _SBytes +_SBytes SETA _SBytes + ($size) + MEND + + ;// Allocate 2-byte aligned area of name + ;// $name size $size bytes. + MACRO + M_ALLOC2 $name, $size + ASSERT :LNOT:_InFunc + IF (_SBytes:AND:1)!=0 +_SBytes SETA _SBytes + (2 - (_SBytes:AND:1)) + ENDIF +$name$_F EQU _SBytes +_SBytes SETA _SBytes + ($size) + MEND + + ;// Allocate 4-byte aligned area of name + ;// $name size $size bytes. + MACRO + M_ALLOC4 $name, $size + ASSERT :LNOT:_InFunc + IF (_SBytes:AND:3)!=0 +_SBytes SETA _SBytes + (4 - (_SBytes:AND:3)) + ENDIF +$name$_F EQU _SBytes +_SBytes SETA _SBytes + ($size) + MEND + + ;// Allocate 8-byte aligned area of name + ;// $name size $size bytes. + MACRO + M_ALLOC8 $name, $size + ASSERT :LNOT:_InFunc + IF (_SBytes:AND:7)!=0 +_SBytes SETA _SBytes + (8 - (_SBytes:AND:7)) + ENDIF +$name$_F EQU _SBytes +_SBytes SETA _SBytes + ($size) + MEND + + + ;// Allocate 8-byte aligned area of name + ;// $name size ($size+16) bytes. + ;// The extra 16 bytes are later used to align the pointer to 16 bytes + + MACRO + M_ALLOC16 $name, $size + ASSERT :LNOT:_InFunc + IF (_SBytes:AND:7)!=0 +_SBytes SETA _SBytes + (8 - (_SBytes:AND:7)) + ENDIF +$name$_F$_16 EQU (_SBytes + 8) +_SBytes SETA _SBytes + ($size) + 8 + MEND + + ;// Allocate 8-byte aligned area of name + ;// $name size ($size+32) bytes. + ;// The extra 32 bytes are later used to align the pointer to 32 bytes + + MACRO + M_ALLOC32 $name, $size + ASSERT :LNOT:_InFunc + IF (_SBytes:AND:7)!=0 +_SBytes SETA _SBytes + (8 - (_SBytes:AND:7)) + ENDIF +$name$_F$_32 EQU (_SBytes + 24) +_SBytes SETA _SBytes + ($size) + 24 + MEND + + + + + ;// Argument Declaration Macro + ;// + ;// Allocate an argument name $name + ;// size $size bytes + MACRO + M_ARG $name, $size + ASSERT _InFunc +$name$_F EQU _ABytes +_ABytes SETA _ABytes + ($size) + MEND + +;/////////////////////////////////////////////// +;// Macros to access stacked variables +;/////////////////////////////////////////////// + + ;// Macro to perform a data processing operation + ;// with a constant second operand + MACRO + _M_OPC $op,$rd,$rn,$const + LCLA _sh + LCLA _cst +_sh SETA 0 +_cst SETA $const + IF _cst=0 + $op $rd, $rn, #_cst + MEXIT + ENDIF + WHILE (_cst:AND:3)=0 +_cst SETA _cst>>2 +_sh SETA _sh+2 + WEND + $op $rd, $rn, #(_cst:AND:0x000000FF)<<_sh + IF _cst>=256 + $op $rd, $rd, #(_cst:AND:0xFFFFFF00)<<_sh + ENDIF + MEND + + ;// Macro to perform a data access operation + ;// Such as LDR or STR + ;// The addressing mode is modified such that + ;// 1. If no address is given then the name is taken + ;// as a stack offset + ;// 2. If the addressing mode is not available for the + ;// state being assembled for (eg Thumb) then a suitable + ;// addressing mode is substituted. + ;// + ;// On Entry: + ;// $i = Instruction to perform (eg "LDRB") + ;// $a = Required byte alignment + ;// $r = Register(s) to transfer (eg "r1") + ;// $a0,$a1,$a2. Addressing mode and condition. One of: + ;// label {,cc} + ;// [base] {,,,cc} + ;// [base, offset]{!} {,,cc} + ;// [base, offset, shift]{!} {,cc} + ;// [base], offset {,,cc} + ;// [base], offset, shift {,cc} + MACRO + _M_DATA $i,$a,$r,$a0,$a1,$a2,$a3 + IF "$a0":LEFT:1="[" + IF "$a1"="" + $i$a3 $r, $a0 + ELSE + IF "$a0":RIGHT:1="]" + IF "$a2"="" + _M_POSTIND $i$a3, "$r", $a0, $a1 + ELSE + _M_POSTIND $i$a3, "$r", $a0, "$a1,$a2" + ENDIF + ELSE + IF "$a2"="" + _M_PREIND $i$a3, "$r", $a0, $a1 + ELSE + _M_PREIND $i$a3, "$r", $a0, "$a1,$a2" + ENDIF + ENDIF + ENDIF + ELSE + LCLA _Offset +_Offset SETA _Workspace + $a0$_F + ASSERT (_Offset:AND:($a-1))=0 + $i$a1 $r, [sp, #_Offset] + ENDIF + MEND + + ;// Handle post indexed load/stores + ;// op reg, [base], offset + MACRO + _M_POSTIND $i,$r,$a0,$a1 + LCLS _base + LCLS _offset + IF {CONFIG}=16 ;// Thumb +_base SETS ("$a0":LEFT:(:LEN:"$a0"-1)):RIGHT:(:LEN:"$a0"-2) ;// remove [] +_offset SETS "$a1" + IF _offset:LEFT:1="+" +_offset SETS _offset:RIGHT:(:LEN:_offset-1) + ENDIF + $i $r, $a0 + IF _offset:LEFT:1="-" +_offset SETS _offset:RIGHT:(:LEN:_offset-1) + SUB $_base, $_base, $_offset + ELSE + ADD $_base, $_base, $_offset + ENDIF + ELSE ;// ARM + $i $r, $a0, $a1 + ENDIF + MEND + + ;// Handle pre indexed load/store + ;// op reg, [base, offset]{!} + MACRO + _M_PREIND $i,$r,$a0,$a1 + LCLS _base + LCLS _offset + IF ({CONFIG}=16):LAND:(("$a1":RIGHT:2)="]!") +_base SETS "$a0":RIGHT:(:LEN:("$a0")-1) +_offset SETS "$a1":LEFT:(:LEN:("$a1")-2) + $i $r, [$_base, $_offset] + ADD $_base, $_base, $_offset + ELSE + $i $r, $a0, $a1 + ENDIF + MEND + + ;// Load unsigned byte from stack + MACRO + M_LDRB $r,$a0,$a1,$a2,$a3 + _M_DATA "LDRB",1,$r,$a0,$a1,$a2,$a3 + MEND + + ;// Load signed byte from stack + MACRO + M_LDRSB $r,$a0,$a1,$a2,$a3 + _M_DATA "LDRSB",1,$r,$a0,$a1,$a2,$a3 + MEND + + ;// Store byte to stack + MACRO + M_STRB $r,$a0,$a1,$a2,$a3 + _M_DATA "STRB",1,$r,$a0,$a1,$a2,$a3 + MEND + + ;// Load unsigned half word from stack + MACRO + M_LDRH $r,$a0,$a1,$a2,$a3 + _M_DATA "LDRH",2,$r,$a0,$a1,$a2,$a3 + MEND + + ;// Load signed half word from stack + MACRO + M_LDRSH $r,$a0,$a1,$a2,$a3 + _M_DATA "LDRSH",2,$r,$a0,$a1,$a2,$a3 + MEND + + ;// Store half word to stack + MACRO + M_STRH $r,$a0,$a1,$a2,$a3 + _M_DATA "STRH",2,$r,$a0,$a1,$a2,$a3 + MEND + + ;// Load word from stack + MACRO + M_LDR $r,$a0,$a1,$a2,$a3 + _M_DATA "LDR",4,$r,$a0,$a1,$a2,$a3 + MEND + + ;// Store word to stack + MACRO + M_STR $r,$a0,$a1,$a2,$a3 + _M_DATA "STR",4,$r,$a0,$a1,$a2,$a3 + MEND + + ;// Load double word from stack + MACRO + M_LDRD $r0,$r1,$a0,$a1,$a2,$a3 + _M_DATA "LDRD",8,"$r0,$r1",$a0,$a1,$a2,$a3 + MEND + + ;// Store double word to stack + MACRO + M_STRD $r0,$r1,$a0,$a1,$a2,$a3 + _M_DATA "STRD",8,"$r0,$r1",$a0,$a1,$a2,$a3 + MEND + + ;// Get absolute address of stack allocated location + MACRO + M_ADR $a, $b, $cc + _M_OPC ADD$cc, $a, sp, (_Workspace + $b$_F) + MEND + + ;// Get absolute address of stack allocated location and align the address to 16 bytes + MACRO + M_ADR16 $a, $b, $cc + _M_OPC ADD$cc, $a, sp, (_Workspace + $b$_F$_16) + + ;// Now align $a to 16 bytes + BIC$cc $a,$a,#0x0F + MEND + + ;// Get absolute address of stack allocated location and align the address to 32 bytes + MACRO + M_ADR32 $a, $b, $cc + _M_OPC ADD$cc, $a, sp, (_Workspace + $b$_F$_32) + + ;// Now align $a to 32 bytes + BIC$cc $a,$a,#0x1F + MEND + +;////////////////////////////////////////////////////////// +;// Function header and footer macros +;////////////////////////////////////////////////////////// + + ;// Function Header Macro + ;// Generates the function prologue + ;// Note that functions should all be "stack-moves-once" + ;// The FNSTART and FNEND macros should be the only places + ;// where the stack moves. + ;// + ;// $name = function name + ;// $rreg = "" don't stack any registers + ;// "lr" stack "lr" only + ;// "rN" stack registers "r4-rN,lr" + ;// $dreg = "" don't stack any D registers + ;// "dN" stack registers "d8-dN" + ;// + ;// Note: ARM Archicture procedure call standard AAPCS + ;// states that r4-r11, sp, d8-d15 must be preserved by + ;// a compliant function. + MACRO + M_START $name, $rreg, $dreg + ASSERT :LNOT:_InFunc + ASSERT "$name"!="" +_InFunc SETL {TRUE} +_RBytes SETA 0 +_Workspace SETA 0 + + ;// Create an area for the function + AREA |.text|, CODE + EXPORT $name +$name FUNCTION + + ;// Save R registers + _M_GETRREGLIST $rreg + IF _RRegList<>"" + STMFD sp!, {$_RRegList, lr} + ENDIF + + ;// Save D registers + _M_GETDREGLIST $dreg + IF _DRegList<>"" + VSTMFD sp!, {$_DRegList} + ENDIF + + + ;// Ensure size claimed on stack is 8-byte aligned + IF ((_SBytes:AND:7)!=0) +_SBytes SETA _SBytes + (8 - (_SBytes:AND:7)) + ENDIF + + IF (_SBytes!=0) + _M_OPC SUB, sp, sp, _SBytes + ENDIF + + +_ABytes SETA _SBytes + _RBytes - _Workspace + + + ;// Print function name if debug enabled + M_PRINTF "$name\n", + MEND + + ;// Work out a list of R saved registers + MACRO + _M_GETRREGLIST $rreg + IF "$rreg"="" +_RRegList SETS "" + MEXIT + ENDIF + IF "$rreg"="lr":LOR:"$rreg"="r4" +_RRegList SETS "r4" +_RBytes SETA _RBytes+8 + MEXIT + ENDIF + IF "$rreg"="r5":LOR:"$rreg"="r6" +_RRegList SETS "r4-r6" +_RBytes SETA _RBytes+16 + MEXIT + ENDIF + IF "$rreg"="r7":LOR:"$rreg"="r8" +_RRegList SETS "r4-r8" +_RBytes SETA _RBytes+24 + MEXIT + ENDIF + IF "$rreg"="r9":LOR:"$rreg"="r10" +_RRegList SETS "r4-r10" +_RBytes SETA _RBytes+32 + MEXIT + ENDIF + IF "$rreg"="r11":LOR:"$rreg"="r12" +_RRegList SETS "r4-r12" +_RBytes SETA _RBytes+40 + MEXIT + ENDIF + INFO 1, "Unrecognized saved r register limit '$rreg'" + MEND + + ;// Work out a list of D saved registers + MACRO + _M_GETDREGLIST $dreg + IF "$dreg"="" +_DRegList SETS "" + MEXIT + ENDIF + IF "$dreg"="d8" +_DRegList SETS "d8" +_RBytes SETA _RBytes+8 + MEXIT + ENDIF + IF "$dreg"="d9" +_DRegList SETS "d8-d9" +_RBytes SETA _RBytes+16 + MEXIT + ENDIF + IF "$dreg"="d10" +_DRegList SETS "d8-d10" +_RBytes SETA _RBytes+24 + MEXIT + ENDIF + IF "$dreg"="d11" +_DRegList SETS "d8-d11" +_RBytes SETA _RBytes+32 + MEXIT + ENDIF + IF "$dreg"="d12" +_DRegList SETS "d8-d12" +_RBytes SETA _RBytes+40 + MEXIT + ENDIF + IF "$dreg"="d13" +_DRegList SETS "d8-d13" +_RBytes SETA _RBytes+48 + MEXIT + ENDIF + IF "$dreg"="d14" +_DRegList SETS "d8-d14" +_RBytes SETA _RBytes+56 + MEXIT + ENDIF + IF "$dreg"="d15" +_DRegList SETS "d8-d15" +_RBytes SETA _RBytes+64 + MEXIT + ENDIF + INFO 1, "Unrecognized saved d register limit '$dreg'" + MEND + + ;// Produce function return instructions + MACRO + _M_RET $cc + IF _DRegList<>"" + VPOP$cc {$_DRegList} + ENDIF + IF _RRegList="" + BX$cc lr + ELSE + LDM$cc.FD sp!, {$_RRegList, pc} + ENDIF + MEND + + ;// Early Function Exit Macro + ;// $cc = condition to exit with + ;// (Example: M_EXIT EQ) + MACRO + M_EXIT $cc + ASSERT _InFunc + IF _SBytes!=0 + ;// Restore stack frame and exit + B$cc _End$_F + ELSE + ;// Can return directly + _M_RET $cc + ENDIF + MEND + + ;// Function Footer Macro + ;// Generates the function epilogue + MACRO + M_END + ASSERT _InFunc +_InFunc SETL {FALSE} +_End$_F + + ;// Restore the stack pointer to its original value on function entry + IF _SBytes!=0 + _M_OPC ADD, sp, sp, _SBytes + ENDIF + _M_RET + ENDFUNC + + ;// Reset the global stack tracking variables back to their + ;// initial values, and increment the function count +_SBytes SETA 0 +_F SETA _F+1 + MEND + + +;//========================================================================== +;// Debug Macros +;//========================================================================== + + GBLL DEBUG_ON +DEBUG_ON SETL {FALSE} + GBLL DEBUG_STALLS_ON +DEBUG_STALLS_ON SETL {FALSE} + + ;//========================================================================== + ;// Debug call to printf + ;// M_PRINTF $format, $val0, $val1, $val2 + ;// + ;// Examples: + ;// M_PRINTF "x=%08x\n", r0 + ;// + ;// This macro preserves the value of all registers including the + ;// flags. + ;//========================================================================== + + MACRO + M_PRINTF $format, $val0, $val1, $val2 + IF DEBUG_ON + + IMPORT printf + LCLA nArgs +nArgs SETA 0 + + ;// save registers so we don't corrupt them + STMFD sp!, {r0-r12, lr} + + ;// Drop stack to give us some workspace + SUB sp, sp, #16 + + ;// Save registers we need to print to the stack + IF "$val2" <> "" + ASSERT "$val1" <> "" + STR $val2, [sp, #8] +nArgs SETA nArgs+1 + ENDIF + IF "$val1" <> "" + ASSERT "$val0" <> "" + STR $val1, [sp, #4] +nArgs SETA nArgs+1 + ENDIF + IF "$val0"<>"" + STR $val0, [sp] +nArgs SETA nArgs+1 + ENDIF + + ;// Now we are safe to corrupt registers + ADR r0, %FT00 + IF nArgs=1 + LDR r1, [sp] + ENDIF + IF nArgs=2 + LDMIA sp, {r1,r2} + ENDIF + IF nArgs=3 + LDMIA sp, {r1,r2,r3} + ENDIF + + ;// print the values + MRS r4, cpsr ;// preserve flags + BL printf + MSR cpsr_f, r4 ;// restore flags + B %FT01 +00 ;// string to print + DCB "$format", 0 + ALIGN +01 ;// Finished + ADD sp, sp, #16 + ;// Restore registers + LDMFD sp!, {r0-r12,lr} + + ENDIF ;// DEBUG_ON + MEND + + + ;// Stall Simulation Macro + ;// Inserts a given number of NOPs for the currently + ;// defined platform + MACRO + M_STALL $plat1stall, $plat2stall, $plat3stall, $plat4stall, $plat5stall, $plat6stall + IF DEBUG_STALLS_ON + _M_STALL_SUB $plat1stall + _M_STALL_SUB $plat2stall + _M_STALL_SUB $plat3stall + _M_STALL_SUB $plat4stall + _M_STALL_SUB $plat5stall + _M_STALL_SUB $plat6stall + ENDIF + MEND + + MACRO + _M_STALL_SUB $platstall + IF "$platstall"!="" + LCLA _pllen + LCLS _pl + LCLL _pllog +_pllen SETA :LEN:"$platstall" +_pl SETS "$platstall":LEFT:(_pllen - 2) + IF :DEF:$_pl + IF $_pl + LCLS _st + LCLA _stnum +_st SETS "$platstall":RIGHT:1 +_stnum SETA $_st + WHILE _stnum>0 + MOV sp, sp +_stnum SETA _stnum - 1 + WEND + ENDIF + ENDIF + ENDIF + MEND + + + +;//========================================================================== +;// Endian Invarience Macros +;// +;// The idea behind these macros is that if an array is +;// loaded as words then the SMUL00 macro will multiply +;// array elements 0 regardless of the endianess of the +;// system. For little endian SMUL00=SMULBB, for big +;// endian SMUL00=SMULTT and similarly for other packed operations. +;// +;//========================================================================== + + MACRO + LIBI4 $comli, $combi, $a, $b, $c, $d, $cc + IF {ENDIAN}="big" + $combi.$cc $a, $b, $c, $d + ELSE + $comli.$cc $a, $b, $c, $d + ENDIF + MEND + + MACRO + LIBI3 $comli, $combi, $a, $b, $c, $cc + IF {ENDIAN}="big" + $combi.$cc $a, $b, $c + ELSE + $comli.$cc $a, $b, $c + ENDIF + MEND + + ;// SMLAxy macros + + MACRO + SMLA00 $a, $b, $c, $d, $cc + LIBI4 SMLABB, SMLATT, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLA01 $a, $b, $c, $d, $cc + LIBI4 SMLABT, SMLATB, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLA0B $a, $b, $c, $d, $cc + LIBI4 SMLABB, SMLATB, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLA0T $a, $b, $c, $d, $cc + LIBI4 SMLABT, SMLATT, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLA10 $a, $b, $c, $d, $cc + LIBI4 SMLATB, SMLABT, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLA11 $a, $b, $c, $d, $cc + LIBI4 SMLATT, SMLABB, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLA1B $a, $b, $c, $d, $cc + LIBI4 SMLATB, SMLABB, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLA1T $a, $b, $c, $d, $cc + LIBI4 SMLATT, SMLABT, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLAB0 $a, $b, $c, $d, $cc + LIBI4 SMLABB, SMLABT, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLAB1 $a, $b, $c, $d, $cc + LIBI4 SMLABT, SMLABB, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLAT0 $a, $b, $c, $d, $cc + LIBI4 SMLATB, SMLATT, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLAT1 $a, $b, $c, $d, $cc + LIBI4 SMLATT, SMLATB, $a, $b, $c, $d, $cc + MEND + + ;// SMULxy macros + + MACRO + SMUL00 $a, $b, $c, $cc + LIBI3 SMULBB, SMULTT, $a, $b, $c, $cc + MEND + + MACRO + SMUL01 $a, $b, $c, $cc + LIBI3 SMULBT, SMULTB, $a, $b, $c, $cc + MEND + + MACRO + SMUL0B $a, $b, $c, $cc + LIBI3 SMULBB, SMULTB, $a, $b, $c, $cc + MEND + + MACRO + SMUL0T $a, $b, $c, $cc + LIBI3 SMULBT, SMULTT, $a, $b, $c, $cc + MEND + + MACRO + SMUL10 $a, $b, $c, $cc + LIBI3 SMULTB, SMULBT, $a, $b, $c, $cc + MEND + + MACRO + SMUL11 $a, $b, $c, $cc + LIBI3 SMULTT, SMULBB, $a, $b, $c, $cc + MEND + + MACRO + SMUL1B $a, $b, $c, $cc + LIBI3 SMULTB, SMULBB, $a, $b, $c, $cc + MEND + + MACRO + SMUL1T $a, $b, $c, $cc + LIBI3 SMULTT, SMULBT, $a, $b, $c, $cc + MEND + + MACRO + SMULB0 $a, $b, $c, $cc + LIBI3 SMULBB, SMULBT, $a, $b, $c, $cc + MEND + + MACRO + SMULB1 $a, $b, $c, $cc + LIBI3 SMULBT, SMULBB, $a, $b, $c, $cc + MEND + + MACRO + SMULT0 $a, $b, $c, $cc + LIBI3 SMULTB, SMULTT, $a, $b, $c, $cc + MEND + + MACRO + SMULT1 $a, $b, $c, $cc + LIBI3 SMULTT, SMULTB, $a, $b, $c, $cc + MEND + + ;// SMLAWx, SMULWx macros + + MACRO + SMLAW0 $a, $b, $c, $d, $cc + LIBI4 SMLAWB, SMLAWT, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLAW1 $a, $b, $c, $d, $cc + LIBI4 SMLAWT, SMLAWB, $a, $b, $c, $d, $cc + MEND + + MACRO + SMULW0 $a, $b, $c, $cc + LIBI3 SMULWB, SMULWT, $a, $b, $c, $cc + MEND + + MACRO + SMULW1 $a, $b, $c, $cc + LIBI3 SMULWT, SMULWB, $a, $b, $c, $cc + MEND + + ;// SMLALxy macros + + + MACRO + SMLAL00 $a, $b, $c, $d, $cc + LIBI4 SMLALBB, SMLALTT, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLAL01 $a, $b, $c, $d, $cc + LIBI4 SMLALBT, SMLALTB, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLAL0B $a, $b, $c, $d, $cc + LIBI4 SMLALBB, SMLALTB, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLAL0T $a, $b, $c, $d, $cc + LIBI4 SMLALBT, SMLALTT, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLAL10 $a, $b, $c, $d, $cc + LIBI4 SMLALTB, SMLALBT, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLAL11 $a, $b, $c, $d, $cc + LIBI4 SMLALTT, SMLALBB, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLAL1B $a, $b, $c, $d, $cc + LIBI4 SMLALTB, SMLALBB, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLAL1T $a, $b, $c, $d, $cc + LIBI4 SMLALTT, SMLALBT, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLALB0 $a, $b, $c, $d, $cc + LIBI4 SMLALBB, SMLALBT, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLALB1 $a, $b, $c, $d, $cc + LIBI4 SMLALBT, SMLALBB, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLALT0 $a, $b, $c, $d, $cc + LIBI4 SMLALTB, SMLALTT, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLALT1 $a, $b, $c, $d, $cc + LIBI4 SMLALTT, SMLALTB, $a, $b, $c, $d, $cc + MEND + + ENDIF ;// ARMCOMM_S_H + + END diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armOMX.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armOMX.h new file mode 100644 index 0000000000000000000000000000000000000000..f629f72b25db0da7e01d1f917bfed611ae517d7a --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armOMX.h @@ -0,0 +1,274 @@ +/* + * + * File Name: armOMX_ReleaseVersion.h + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * This file allows a version of the OMX DL libraries to be built where some or + * all of the function names can be given a user specified suffix. + * + * You might want to use it where: + * + * - you want to rename a function "out of the way" so that you could replace + * a function with a different version (the original version would still be + * in the library just with a different name - so you could debug the new + * version by comparing it to the output of the old) + * + * - you want to rename all the functions to versions with a suffix so that + * you can include two versions of the library and choose between functions + * at runtime. + * + * e.g. omxIPBM_Copy_U8_C1R could be renamed omxIPBM_Copy_U8_C1R_CortexA8 + * + */ + + +#ifndef _armOMX_H_ +#define _armOMX_H_ + + +/* We need to define these two macros in order to expand and concatenate the names */ +#define OMXCAT2BAR(A, B) omx ## A ## B +#define OMXCATBAR(A, B) OMXCAT2BAR(A, B) + +/* Define the suffix to add to all functions - the default is no suffix */ +#define BARE_SUFFIX + + + +/* Define what happens to the bare suffix-less functions, down to the sub-domain accuracy */ +#define OMXACAAC_SUFFIX BARE_SUFFIX +#define OMXACMP3_SUFFIX BARE_SUFFIX +#define OMXICJP_SUFFIX BARE_SUFFIX +#define OMXIPBM_SUFFIX BARE_SUFFIX +#define OMXIPCS_SUFFIX BARE_SUFFIX +#define OMXIPPP_SUFFIX BARE_SUFFIX +#define OMXSP_SUFFIX BARE_SUFFIX +#define OMXVCCOMM_SUFFIX BARE_SUFFIX +#define OMXVCM4P10_SUFFIX BARE_SUFFIX +#define OMXVCM4P2_SUFFIX BARE_SUFFIX + + + + +/* Define what the each bare, un-suffixed OpenMAX API function names is to be renamed */ +#define omxACAAC_DecodeChanPairElt OMXCATBAR(ACAAC_DecodeChanPairElt, OMXACAAC_SUFFIX) +#define omxACAAC_DecodeDatStrElt OMXCATBAR(ACAAC_DecodeDatStrElt, OMXACAAC_SUFFIX) +#define omxACAAC_DecodeFillElt OMXCATBAR(ACAAC_DecodeFillElt, OMXACAAC_SUFFIX) +#define omxACAAC_DecodeIsStereo_S32 OMXCATBAR(ACAAC_DecodeIsStereo_S32, OMXACAAC_SUFFIX) +#define omxACAAC_DecodeMsPNS_S32_I OMXCATBAR(ACAAC_DecodeMsPNS_S32_I, OMXACAAC_SUFFIX) +#define omxACAAC_DecodeMsStereo_S32_I OMXCATBAR(ACAAC_DecodeMsStereo_S32_I, OMXACAAC_SUFFIX) +#define omxACAAC_DecodePrgCfgElt OMXCATBAR(ACAAC_DecodePrgCfgElt, OMXACAAC_SUFFIX) +#define omxACAAC_DecodeTNS_S32_I OMXCATBAR(ACAAC_DecodeTNS_S32_I, OMXACAAC_SUFFIX) +#define omxACAAC_DeinterleaveSpectrum_S32 OMXCATBAR(ACAAC_DeinterleaveSpectrum_S32, OMXACAAC_SUFFIX) +#define omxACAAC_EncodeTNS_S32_I OMXCATBAR(ACAAC_EncodeTNS_S32_I, OMXACAAC_SUFFIX) +#define omxACAAC_LongTermPredict_S32 OMXCATBAR(ACAAC_LongTermPredict_S32, OMXACAAC_SUFFIX) +#define omxACAAC_LongTermReconstruct_S32_I OMXCATBAR(ACAAC_LongTermReconstruct_S32_I, OMXACAAC_SUFFIX) +#define omxACAAC_MDCTFwd_S32 OMXCATBAR(ACAAC_MDCTFwd_S32, OMXACAAC_SUFFIX) +#define omxACAAC_MDCTInv_S32_S16 OMXCATBAR(ACAAC_MDCTInv_S32_S16, OMXACAAC_SUFFIX) +#define omxACAAC_NoiselessDecode OMXCATBAR(ACAAC_NoiselessDecode, OMXACAAC_SUFFIX) +#define omxACAAC_QuantInv_S32_I OMXCATBAR(ACAAC_QuantInv_S32_I, OMXACAAC_SUFFIX) +#define omxACAAC_UnpackADIFHeader OMXCATBAR(ACAAC_UnpackADIFHeader, OMXACAAC_SUFFIX) +#define omxACAAC_UnpackADTSFrameHeader OMXCATBAR(ACAAC_UnpackADTSFrameHeader, OMXACAAC_SUFFIX) + + +#define omxACMP3_HuffmanDecode_S32 OMXCATBAR(ACMP3_HuffmanDecode_S32, OMXACMP3_SUFFIX) +#define omxACMP3_HuffmanDecodeSfb_S32 OMXCATBAR(ACMP3_HuffmanDecodeSfb_S32, OMXACMP3_SUFFIX) +#define omxACMP3_HuffmanDecodeSfbMbp_S32 OMXCATBAR(ACMP3_HuffmanDecodeSfbMbp_S32, OMXACMP3_SUFFIX) +#define omxACMP3_MDCTInv_S32 OMXCATBAR(ACMP3_MDCTInv_S32, OMXACMP3_SUFFIX) +#define omxACMP3_ReQuantize_S32_I OMXCATBAR(ACMP3_ReQuantize_S32_I, OMXACMP3_SUFFIX) +#define omxACMP3_ReQuantizeSfb_S32_I OMXCATBAR(ACMP3_ReQuantizeSfb_S32_I, OMXACMP3_SUFFIX) +#define omxACMP3_SynthPQMF_S32_S16 OMXCATBAR(ACMP3_SynthPQMF_S32_S16, OMXACMP3_SUFFIX) +#define omxACMP3_UnpackFrameHeader OMXCATBAR(ACMP3_UnpackFrameHeader, OMXACMP3_SUFFIX) +#define omxACMP3_UnpackScaleFactors_S8 OMXCATBAR(ACMP3_UnpackScaleFactors_S8, OMXACMP3_SUFFIX) +#define omxACMP3_UnpackSideInfo OMXCATBAR(ACMP3_UnpackSideInfo, OMXACMP3_SUFFIX) + +#define omxICJP_CopyExpand_U8_C3 OMXCATBAR(ICJP_CopyExpand_U8_C3, OMXICJP_SUFFIX) +#define omxICJP_DCTFwd_S16 OMXCATBAR(ICJP_DCTFwd_S16, OMXICJP_SUFFIX) +#define omxICJP_DCTFwd_S16_I OMXCATBAR(ICJP_DCTFwd_S16_I, OMXICJP_SUFFIX) +#define omxICJP_DCTInv_S16 OMXCATBAR(ICJP_DCTInv_S16, OMXICJP_SUFFIX) +#define omxICJP_DCTInv_S16_I OMXCATBAR(ICJP_DCTInv_S16_I, OMXICJP_SUFFIX) +#define omxICJP_DCTQuantFwd_Multiple_S16 OMXCATBAR(ICJP_DCTQuantFwd_Multiple_S16, OMXICJP_SUFFIX) +#define omxICJP_DCTQuantFwd_S16 OMXCATBAR(ICJP_DCTQuantFwd_S16, OMXICJP_SUFFIX) +#define omxICJP_DCTQuantFwd_S16_I OMXCATBAR(ICJP_DCTQuantFwd_S16_I, OMXICJP_SUFFIX) +#define omxICJP_DCTQuantFwdTableInit OMXCATBAR(ICJP_DCTQuantFwdTableInit, OMXICJP_SUFFIX) +#define omxICJP_DCTQuantInv_Multiple_S16 OMXCATBAR(ICJP_DCTQuantInv_Multiple_S16, OMXICJP_SUFFIX) +#define omxICJP_DCTQuantInv_S16 OMXCATBAR(ICJP_DCTQuantInv_S16, OMXICJP_SUFFIX) +#define omxICJP_DCTQuantInv_S16_I OMXCATBAR(ICJP_DCTQuantInv_S16_I, OMXICJP_SUFFIX) +#define omxICJP_DCTQuantInvTableInit OMXCATBAR(ICJP_DCTQuantInvTableInit, OMXICJP_SUFFIX) +#define omxICJP_DecodeHuffman8x8_Direct_S16_C1 OMXCATBAR(ICJP_DecodeHuffman8x8_Direct_S16_C1, OMXICJP_SUFFIX) +#define omxICJP_DecodeHuffmanSpecGetBufSize_U8 OMXCATBAR(ICJP_DecodeHuffmanSpecGetBufSize_U8, OMXICJP_SUFFIX) +#define omxICJP_DecodeHuffmanSpecInit_U8 OMXCATBAR(ICJP_DecodeHuffmanSpecInit_U8, OMXICJP_SUFFIX) +#define omxICJP_EncodeHuffman8x8_Direct_S16_U1_C1 OMXCATBAR(ICJP_EncodeHuffman8x8_Direct_S16_U1_C1, OMXICJP_SUFFIX) +#define omxICJP_EncodeHuffmanSpecGetBufSize_U8 OMXCATBAR(ICJP_EncodeHuffmanSpecGetBufSize_U8, OMXICJP_SUFFIX) +#define omxICJP_EncodeHuffmanSpecInit_U8 OMXCATBAR(ICJP_EncodeHuffmanSpecInit_U8, OMXICJP_SUFFIX) + +#define omxIPBM_AddC_U8_C1R_Sfs OMXCATBAR(IPBM_AddC_U8_C1R_Sfs, OMXIPBM_SUFFIX) +#define omxIPBM_Copy_U8_C1R OMXCATBAR(IPBM_Copy_U8_C1R, OMXIPBM_SUFFIX) +#define omxIPBM_Copy_U8_C3R OMXCATBAR(IPBM_Copy_U8_C3R, OMXIPBM_SUFFIX) +#define omxIPBM_Mirror_U8_C1R OMXCATBAR(IPBM_Mirror_U8_C1R, OMXIPBM_SUFFIX) +#define omxIPBM_MulC_U8_C1R_Sfs OMXCATBAR(IPBM_MulC_U8_C1R_Sfs, OMXIPBM_SUFFIX) + +#define omxIPCS_ColorTwistQ14_U8_C3R OMXCATBAR(IPCS_ColorTwistQ14_U8_C3R, OMXIPCS_SUFFIX) +#define omxIPCS_BGR565ToYCbCr420LS_MCU_U16_S16_C3P3R OMXCATBAR(IPCS_BGR565ToYCbCr420LS_MCU_U16_S16_C3P3R, OMXIPCS_SUFFIX) +#define omxIPCS_BGR565ToYCbCr422LS_MCU_U16_S16_C3P3R OMXCATBAR(IPCS_BGR565ToYCbCr422LS_MCU_U16_S16_C3P3R, OMXIPCS_SUFFIX) +#define omxIPCS_BGR565ToYCbCr444LS_MCU_U16_S16_C3P3R OMXCATBAR(IPCS_BGR565ToYCbCr444LS_MCU_U16_S16_C3P3R, OMXIPCS_SUFFIX) +#define omxIPCS_BGR888ToYCbCr420LS_MCU_U8_S16_C3P3R OMXCATBAR(IPCS_BGR888ToYCbCr420LS_MCU_U8_S16_C3P3R, OMXIPCS_SUFFIX) +#define omxIPCS_BGR888ToYCbCr422LS_MCU_U8_S16_C3P3R OMXCATBAR(IPCS_BGR888ToYCbCr422LS_MCU_U8_S16_C3P3R, OMXIPCS_SUFFIX) +#define omxIPCS_BGR888ToYCbCr444LS_MCU_U8_S16_C3P3R OMXCATBAR(IPCS_BGR888ToYCbCr444LS_MCU_U8_S16_C3P3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr420RszCscRotBGR_U8_P3C3R OMXCATBAR(IPCS_YCbCr420RszCscRotBGR_U8_P3C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr420RszRot_U8_P3R OMXCATBAR(IPCS_YCbCr420RszRot_U8_P3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr420ToBGR565_U8_U16_P3C3R OMXCATBAR(IPCS_YCbCr420ToBGR565_U8_U16_P3C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr420ToBGR565LS_MCU_S16_U16_P3C3R OMXCATBAR(IPCS_YCbCr420ToBGR565LS_MCU_S16_U16_P3C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr420ToBGR888LS_MCU_S16_U8_P3C3R OMXCATBAR(IPCS_YCbCr420ToBGR888LS_MCU_S16_U8_P3C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr422RszCscRotBGR_U8_P3C3R OMXCATBAR(IPCS_YCbCr422RszCscRotBGR_U8_P3C3R, OMXIPCS_SUFFIX) +#define omxIPCS_CbYCrY422RszCscRotBGR_U8_U16_C2R OMXCATBAR(IPCS_CbYCrY422RszCscRotBGR_U8_U16_C2R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr422RszRot_U8_P3R OMXCATBAR(IPCS_YCbCr422RszRot_U8_P3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbYCr422ToBGR565_U8_U16_C2C3R OMXCATBAR(IPCS_YCbYCr422ToBGR565_U8_U16_C2C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr422ToBGR565LS_MCU_S16_U16_P3C3R OMXCATBAR(IPCS_YCbCr422ToBGR565LS_MCU_S16_U16_P3C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbYCr422ToBGR888_U8_C2C3R OMXCATBAR(IPCS_YCbYCr422ToBGR888_U8_C2C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R OMXCATBAR(IPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R OMXCATBAR(IPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R, OMXIPCS_SUFFIX) +#define omxIPCS_CbYCrY422ToYCbCr420Rotate_U8_C2P3R OMXCATBAR(IPCS_CbYCrY422ToYCbCr420Rotate_U8_C2P3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr422ToYCbCr420Rotate_U8_P3R OMXCATBAR(IPCS_YCbCr422ToYCbCr420Rotate_U8_P3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr444ToBGR565_U8_U16_C3R OMXCATBAR(IPCS_YCbCr444ToBGR565_U8_U16_C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr444ToBGR565_U8_U16_P3C3R OMXCATBAR(IPCS_YCbCr444ToBGR565_U8_U16_P3C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr444ToBGR565LS_MCU_S16_U16_P3C3R OMXCATBAR(IPCS_YCbCr444ToBGR565LS_MCU_S16_U16_P3C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr444ToBGR888_U8_C3R OMXCATBAR(IPCS_YCbCr444ToBGR888_U8_C3R, OMXIPCS_SUFFIX) + +#define omxIPPP_Deblock_HorEdge_U8_I OMXCATBAR(IPPP_Deblock_HorEdge_U8_I, OMXIPPP_SUFFIX) +#define omxIPPP_Deblock_VerEdge_U8_I OMXCATBAR(IPPP_Deblock_VerEdge_U8_I, OMXIPPP_SUFFIX) +#define omxIPPP_FilterFIR_U8_C1R OMXCATBAR(IPPP_FilterFIR_U8_C1R, OMXIPPP_SUFFIX) +#define omxIPPP_FilterMedian_U8_C1R OMXCATBAR(IPPP_FilterMedian_U8_C1R, OMXIPPP_SUFFIX) +#define omxIPPP_GetCentralMoment_S64 OMXCATBAR(IPPP_GetCentralMoment_S64, OMXIPPP_SUFFIX) +#define omxIPPP_GetSpatialMoment_S64 OMXCATBAR(IPPP_GetSpatialMoment_S64, OMXIPPP_SUFFIX) +#define omxIPPP_MomentGetStateSize OMXCATBAR(IPPP_MomentGetStateSize, OMXIPPP_SUFFIX) +#define omxIPPP_MomentInit OMXCATBAR(IPPP_MomentInit, OMXIPPP_SUFFIX) +#define omxIPPP_Moments_U8_C1R OMXCATBAR(IPPP_Moments_U8_C1R, OMXIPPP_SUFFIX) +#define omxIPPP_Moments_U8_C3R OMXCATBAR(IPPP_Moments_U8_C3R, OMXIPPP_SUFFIX) + +#define omxSP_BlockExp_S16 OMXCATBAR(SP_BlockExp_S16, OMXSP_SUFFIX) +#define omxSP_BlockExp_S32 OMXCATBAR(SP_BlockExp_S32, OMXSP_SUFFIX) +#define omxSP_Copy_S16 OMXCATBAR(SP_Copy_S16, OMXSP_SUFFIX) +#define omxSP_DotProd_S16 OMXCATBAR(SP_DotProd_S16, OMXSP_SUFFIX) +#define omxSP_DotProd_S16_Sfs OMXCATBAR(SP_DotProd_S16_Sfs, OMXSP_SUFFIX) +#define omxSP_FFTFwd_CToC_SC16_Sfs OMXCATBAR(SP_FFTFwd_CToC_SC16_Sfs, OMXSP_SUFFIX) +#define omxSP_FFTFwd_CToC_SC32_Sfs OMXCATBAR(SP_FFTFwd_CToC_SC32_Sfs, OMXSP_SUFFIX) +#define omxSP_FFTFwd_RToCCS_S16S32_Sfs OMXCATBAR(SP_FFTFwd_RToCCS_S16S32_Sfs, OMXSP_SUFFIX) +#define omxSP_FFTFwd_RToCCS_S32_Sfs OMXCATBAR(SP_FFTFwd_RToCCS_S32_Sfs, OMXSP_SUFFIX) +#define omxSP_FFTGetBufSize_C_SC16 OMXCATBAR(SP_FFTGetBufSize_C_SC16, OMXSP_SUFFIX) +#define omxSP_FFTGetBufSize_C_SC32 OMXCATBAR(SP_FFTGetBufSize_C_SC32, OMXSP_SUFFIX) +#define omxSP_FFTGetBufSize_R_S16S32 OMXCATBAR(SP_FFTGetBufSize_R_S16S32, OMXSP_SUFFIX) +#define omxSP_FFTGetBufSize_R_S32 OMXCATBAR(SP_FFTGetBufSize_R_S32, OMXSP_SUFFIX) +#define omxSP_FFTInit_C_SC16 OMXCATBAR(SP_FFTInit_C_SC16, OMXSP_SUFFIX) +#define omxSP_FFTInit_C_SC32 OMXCATBAR(SP_FFTInit_C_SC32, OMXSP_SUFFIX) +#define omxSP_FFTInit_R_S16S32 OMXCATBAR(SP_FFTInit_R_S16S32, OMXSP_SUFFIX) +#define omxSP_FFTInit_R_S32 OMXCATBAR(SP_FFTInit_R_S32, OMXSP_SUFFIX) +#define omxSP_FFTInv_CCSToR_S32_Sfs OMXCATBAR(SP_FFTInv_CCSToR_S32_Sfs, OMXSP_SUFFIX) +#define omxSP_FFTInv_CCSToR_S32S16_Sfs OMXCATBAR(SP_FFTInv_CCSToR_S32S16_Sfs, OMXSP_SUFFIX) +#define omxSP_FFTInv_CToC_SC16_Sfs OMXCATBAR(SP_FFTInv_CToC_SC16_Sfs, OMXSP_SUFFIX) +#define omxSP_FFTInv_CToC_SC32_Sfs OMXCATBAR(SP_FFTInv_CToC_SC32_Sfs, OMXSP_SUFFIX) +#define omxSP_FilterMedian_S32 OMXCATBAR(SP_FilterMedian_S32, OMXSP_SUFFIX) +#define omxSP_FilterMedian_S32_I OMXCATBAR(SP_FilterMedian_S32_I, OMXSP_SUFFIX) +#define omxSP_FIR_Direct_S16 OMXCATBAR(SP_FIR_Direct_S16, OMXSP_SUFFIX) +#define omxSP_FIR_Direct_S16_I OMXCATBAR(SP_FIR_Direct_S16_I, OMXSP_SUFFIX) +#define omxSP_FIR_Direct_S16_ISfs OMXCATBAR(SP_FIR_Direct_S16_ISfs, OMXSP_SUFFIX) +#define omxSP_FIR_Direct_S16_Sfs OMXCATBAR(SP_FIR_Direct_S16_Sfs, OMXSP_SUFFIX) +#define omxSP_FIROne_Direct_S16 OMXCATBAR(SP_FIROne_Direct_S16, OMXSP_SUFFIX) +#define omxSP_FIROne_Direct_S16_I OMXCATBAR(SP_FIROne_Direct_S16_I, OMXSP_SUFFIX) +#define omxSP_FIROne_Direct_S16_ISfs OMXCATBAR(SP_FIROne_Direct_S16_ISfs, OMXSP_SUFFIX) +#define omxSP_FIROne_Direct_S16_Sfs OMXCATBAR(SP_FIROne_Direct_S16_Sfs, OMXSP_SUFFIX) +#define omxSP_IIR_BiQuadDirect_S16 OMXCATBAR(SP_IIR_BiQuadDirect_S16, OMXSP_SUFFIX) +#define omxSP_IIR_BiQuadDirect_S16_I OMXCATBAR(SP_IIR_BiQuadDirect_S16_I, OMXSP_SUFFIX) +#define omxSP_IIR_Direct_S16 OMXCATBAR(SP_IIR_Direct_S16, OMXSP_SUFFIX) +#define omxSP_IIR_Direct_S16_I OMXCATBAR(SP_IIR_Direct_S16_I, OMXSP_SUFFIX) +#define omxSP_IIROne_BiQuadDirect_S16 OMXCATBAR(SP_IIROne_BiQuadDirect_S16, OMXSP_SUFFIX) +#define omxSP_IIROne_BiQuadDirect_S16_I OMXCATBAR(SP_IIROne_BiQuadDirect_S16_I, OMXSP_SUFFIX) +#define omxSP_IIROne_Direct_S16 OMXCATBAR(SP_IIROne_Direct_S16, OMXSP_SUFFIX) +#define omxSP_IIROne_Direct_S16_I OMXCATBAR(SP_IIROne_Direct_S16_I, OMXSP_SUFFIX) + +#define omxVCCOMM_Average_16x OMXCATBAR(VCCOMM_Average_16x, OMXVCCOMM_SUFFIX) +#define omxVCCOMM_Average_8x OMXCATBAR(VCCOMM_Average_8x, OMXVCCOMM_SUFFIX) +#define omxVCCOMM_ComputeTextureErrorBlock OMXCATBAR(VCCOMM_ComputeTextureErrorBlock, OMXVCCOMM_SUFFIX) +#define omxVCCOMM_ComputeTextureErrorBlock_SAD OMXCATBAR(VCCOMM_ComputeTextureErrorBlock_SAD, OMXVCCOMM_SUFFIX) +#define omxVCCOMM_Copy16x16 OMXCATBAR(VCCOMM_Copy16x16, OMXVCCOMM_SUFFIX) +#define omxVCCOMM_Copy8x8 OMXCATBAR(VCCOMM_Copy8x8, OMXVCCOMM_SUFFIX) +#define omxVCCOMM_ExpandFrame_I OMXCATBAR(VCCOMM_ExpandFrame_I, OMXVCCOMM_SUFFIX) +#define omxVCCOMM_LimitMVToRect OMXCATBAR(VCCOMM_LimitMVToRect, OMXVCCOMM_SUFFIX) +#define omxVCCOMM_SAD_16x OMXCATBAR(VCCOMM_SAD_16x, OMXVCCOMM_SUFFIX) +#define omxVCCOMM_SAD_8x OMXCATBAR(VCCOMM_SAD_8x, OMXVCCOMM_SUFFIX) + +#define omxVCM4P10_Average_4x OMXCATBAR(VCM4P10_Average_4x, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_BlockMatch_Half OMXCATBAR(VCM4P10_BlockMatch_Half, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_BlockMatch_Integer OMXCATBAR(VCM4P10_BlockMatch_Integer, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_BlockMatch_Quarter OMXCATBAR(VCM4P10_BlockMatch_Quarter, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_DeblockChroma_I OMXCATBAR(VCM4P10_DeblockChroma_I, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_DeblockLuma_I OMXCATBAR(VCM4P10_DeblockLuma_I, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC OMXCATBAR(VCM4P10_DecodeChromaDcCoeffsToPairCAVLC, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_DecodeCoeffsToPairCAVLC OMXCATBAR(VCM4P10_DecodeCoeffsToPairCAVLC, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_DequantTransformResidualFromPairAndAdd OMXCATBAR(VCM4P10_DequantTransformResidualFromPairAndAdd, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_FilterDeblockingChroma_HorEdge_I OMXCATBAR(VCM4P10_FilterDeblockingChroma_HorEdge_I, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_FilterDeblockingChroma_VerEdge_I OMXCATBAR(VCM4P10_FilterDeblockingChroma_VerEdge_I, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_FilterDeblockingLuma_HorEdge_I OMXCATBAR(VCM4P10_FilterDeblockingLuma_HorEdge_I, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_FilterDeblockingLuma_VerEdge_I OMXCATBAR(VCM4P10_FilterDeblockingLuma_VerEdge_I, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_GetVLCInfo OMXCATBAR(VCM4P10_GetVLCInfo, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_InterpolateChroma OMXCATBAR(VCM4P10_InterpolateChroma, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_InterpolateHalfHor_Luma OMXCATBAR(VCM4P10_InterpolateHalfHor_Luma, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_InterpolateHalfVer_Luma OMXCATBAR(VCM4P10_InterpolateHalfVer_Luma, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_InterpolateLuma OMXCATBAR(VCM4P10_InterpolateLuma, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_InvTransformDequant_ChromaDC OMXCATBAR(VCM4P10_InvTransformDequant_ChromaDC, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_InvTransformDequant_LumaDC OMXCATBAR(VCM4P10_InvTransformDequant_LumaDC, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_InvTransformResidualAndAdd OMXCATBAR(VCM4P10_InvTransformResidualAndAdd, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_MEGetBufSize OMXCATBAR(VCM4P10_MEGetBufSize, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_MEInit OMXCATBAR(VCM4P10_MEInit, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_MotionEstimationMB OMXCATBAR(VCM4P10_MotionEstimationMB, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_PredictIntra_16x16 OMXCATBAR(VCM4P10_PredictIntra_16x16, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_PredictIntra_4x4 OMXCATBAR(VCM4P10_PredictIntra_4x4, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_PredictIntraChroma_8x8 OMXCATBAR(VCM4P10_PredictIntraChroma_8x8, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_SAD_4x OMXCATBAR(VCM4P10_SAD_4x, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_SADQuar_16x OMXCATBAR(VCM4P10_SADQuar_16x, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_SADQuar_4x OMXCATBAR(VCM4P10_SADQuar_4x, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_SADQuar_8x OMXCATBAR(VCM4P10_SADQuar_8x, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_SATD_4x4 OMXCATBAR(VCM4P10_SATD_4x4, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_SubAndTransformQDQResidual OMXCATBAR(VCM4P10_SubAndTransformQDQResidual, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_TransformDequantChromaDCFromPair OMXCATBAR(VCM4P10_TransformDequantChromaDCFromPair, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_TransformDequantLumaDCFromPair OMXCATBAR(VCM4P10_TransformDequantLumaDCFromPair, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_TransformQuant_ChromaDC OMXCATBAR(VCM4P10_TransformQuant_ChromaDC, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_TransformQuant_LumaDC OMXCATBAR(VCM4P10_TransformQuant_LumaDC, OMXVCM4P10_SUFFIX) + +#define omxVCM4P2_BlockMatch_Half_16x16 OMXCATBAR(VCM4P2_BlockMatch_Half_16x16, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_BlockMatch_Half_8x8 OMXCATBAR(VCM4P2_BlockMatch_Half_8x8, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_BlockMatch_Integer_16x16 OMXCATBAR(VCM4P2_BlockMatch_Integer_16x16, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_BlockMatch_Integer_8x8 OMXCATBAR(VCM4P2_BlockMatch_Integer_8x8, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_DCT8x8blk OMXCATBAR(VCM4P2_DCT8x8blk, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_DecodeBlockCoef_Inter OMXCATBAR(VCM4P2_DecodeBlockCoef_Inter, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_DecodeBlockCoef_Intra OMXCATBAR(VCM4P2_DecodeBlockCoef_Intra, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_DecodePadMV_PVOP OMXCATBAR(VCM4P2_DecodePadMV_PVOP, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_DecodeVLCZigzag_Inter OMXCATBAR(VCM4P2_DecodeVLCZigzag_Inter, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_DecodeVLCZigzag_IntraACVLC OMXCATBAR(VCM4P2_DecodeVLCZigzag_IntraACVLC, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_DecodeVLCZigzag_IntraDCVLC OMXCATBAR(VCM4P2_DecodeVLCZigzag_IntraDCVLC, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_EncodeMV OMXCATBAR(VCM4P2_EncodeMV, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_EncodeVLCZigzag_Inter OMXCATBAR(VCM4P2_EncodeVLCZigzag_Inter, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_EncodeVLCZigzag_IntraACVLC OMXCATBAR(VCM4P2_EncodeVLCZigzag_IntraACVLC, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_EncodeVLCZigzag_IntraDCVLC OMXCATBAR(VCM4P2_EncodeVLCZigzag_IntraDCVLC, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_FindMVpred OMXCATBAR(VCM4P2_FindMVpred, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_IDCT8x8blk OMXCATBAR(VCM4P2_IDCT8x8blk, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_MCReconBlock OMXCATBAR(VCM4P2_MCReconBlock, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_MEGetBufSize OMXCATBAR(VCM4P2_MEGetBufSize, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_MEInit OMXCATBAR(VCM4P2_MEInit, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_MotionEstimationMB OMXCATBAR(VCM4P2_MotionEstimationMB, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_PredictReconCoefIntra OMXCATBAR(VCM4P2_PredictReconCoefIntra, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_QuantInter_I OMXCATBAR(VCM4P2_QuantInter_I, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_QuantIntra_I OMXCATBAR(VCM4P2_QuantIntra_I, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_QuantInvInter_I OMXCATBAR(VCM4P2_QuantInvInter_I, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_QuantInvIntra_I OMXCATBAR(VCM4P2_QuantInvIntra_I, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_TransRecBlockCoef_inter OMXCATBAR(VCM4P2_TransRecBlockCoef_inter, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_TransRecBlockCoef_intra OMXCATBAR(VCM4P2_TransRecBlockCoef_intra, OMXVCM4P2_SUFFIX) + + +#endif /* _armOMX_h_ */ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/omxtypes.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/omxtypes.h new file mode 100644 index 0000000000000000000000000000000000000000..8b295a6feee35b4c7cca52b5ef61b36bb41e0c63 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/omxtypes.h @@ -0,0 +1,252 @@ +/** + * File: omxtypes.h + * Brief: Defines basic Data types used in OpenMAX v1.0.2 header files. + * + * Copyright © 2005-2008 The Khronos Group Inc. All Rights Reserved. + * + * These materials are protected by copyright laws and contain material + * proprietary to the Khronos Group, Inc. You may use these materials + * for implementing Khronos specifications, without altering or removing + * any trademark, copyright or other notice from the specification. + * + * Khronos Group makes no, and expressly disclaims any, representations + * or warranties, express or implied, regarding these materials, including, + * without limitation, any implied warranties of merchantability or fitness + * for a particular purpose or non-infringement of any intellectual property. + * Khronos Group makes no, and expressly disclaims any, warranties, express + * or implied, regarding the correctness, accuracy, completeness, timeliness, + * and reliability of these materials. + * + * Under no circumstances will the Khronos Group, or any of its Promoters, + * Contributors or Members or their respective partners, officers, directors, + * employees, agents or representatives be liable for any damages, whether + * direct, indirect, special or consequential damages for lost revenues, + * lost profits, or otherwise, arising from or in connection with these + * materials. + * + * Khronos and OpenMAX are trademarks of the Khronos Group Inc. + * + */ + +#ifndef _OMXTYPES_H_ +#define _OMXTYPES_H_ + +#include + +#define OMX_IN +#define OMX_OUT +#define OMX_INOUT + + +typedef enum { + + /* Mandatory return codes - use cases are explicitly described for each function */ + OMX_Sts_NoErr = 0, /* No error, the function completed successfully */ + OMX_Sts_Err = -2, /* Unknown/unspecified error */ + OMX_Sts_InvalidBitstreamValErr = -182, /* Invalid value detected during bitstream processing */ + OMX_Sts_MemAllocErr = -9, /* Not enough memory allocated for the operation */ + OMX_StsACAAC_GainCtrErr = -159, /* AAC: Unsupported gain control data detected */ + OMX_StsACAAC_PrgNumErr = -167, /* AAC: Invalid number of elements for one program */ + OMX_StsACAAC_CoefValErr = -163, /* AAC: Invalid quantized coefficient value */ + OMX_StsACAAC_MaxSfbErr = -162, /* AAC: Invalid maxSfb value in relation to numSwb */ + OMX_StsACAAC_PlsDataErr = -160, /* AAC: pulse escape sequence data error */ + + /* Optional return codes - use cases are explicitly described for each function*/ + OMX_Sts_BadArgErr = -5, /* Bad Arguments */ + + OMX_StsACAAC_TnsNumFiltErr = -157, /* AAC: Invalid number of TNS filters */ + OMX_StsACAAC_TnsLenErr = -156, /* AAC: Invalid TNS region length */ + OMX_StsACAAC_TnsOrderErr = -155, /* AAC: Invalid order of TNS filter */ + OMX_StsACAAC_TnsCoefResErr = -154, /* AAC: Invalid bit-resolution for TNS filter coefficients */ + OMX_StsACAAC_TnsCoefErr = -153, /* AAC: Invalid TNS filter coefficients */ + OMX_StsACAAC_TnsDirectErr = -152, /* AAC: Invalid TNS filter direction */ + + OMX_StsICJP_JPEGMarkerErr = -183, /* JPEG marker encountered within an entropy-coded block; */ + /* Huffman decoding operation terminated early. */ + OMX_StsICJP_JPEGMarker = -181, /* JPEG marker encountered; Huffman decoding */ + /* operation terminated early. */ + OMX_StsIPPP_ContextMatchErr = -17, /* Context parameter doesn't match to the operation */ + + OMX_StsSP_EvenMedianMaskSizeErr = -180, /* Even size of the Median Filter mask was replaced by the odd one */ + + OMX_Sts_MaximumEnumeration = INT_MAX /*Placeholder, forces enum of size OMX_INT*/ + + } OMXResult; /** Return value or error value returned from a function. Identical to OMX_INT */ + + +/* OMX_U8 */ +#if UCHAR_MAX == 0xff +typedef unsigned char OMX_U8; +#elif USHRT_MAX == 0xff +typedef unsigned short int OMX_U8; +#else +#error OMX_U8 undefined +#endif + + +/* OMX_S8 */ +#if SCHAR_MAX == 0x7f +typedef signed char OMX_S8; +#elif SHRT_MAX == 0x7f +typedef signed short int OMX_S8; +#else +#error OMX_S8 undefined +#endif + + +/* OMX_U16 */ +#if USHRT_MAX == 0xffff +typedef unsigned short int OMX_U16; +#elif UINT_MAX == 0xffff +typedef unsigned int OMX_U16; +#else +#error OMX_U16 undefined +#endif + + +/* OMX_S16 */ +#if SHRT_MAX == 0x7fff +typedef signed short int OMX_S16; +#elif INT_MAX == 0x7fff +typedef signed int OMX_S16; +#else +#error OMX_S16 undefined +#endif + + +/* OMX_U32 */ +#if UINT_MAX == 0xffffffff +typedef unsigned int OMX_U32; +#elif LONG_MAX == 0xffffffff +typedef unsigned long int OMX_U32; +#else +#error OMX_U32 undefined +#endif + + +/* OMX_S32 */ +#if INT_MAX == 0x7fffffff +typedef signed int OMX_S32; +#elif LONG_MAX == 0x7fffffff +typedef long signed int OMX_S32; +#else +#error OMX_S32 undefined +#endif + + +/* OMX_U64 & OMX_S64 */ +#if defined( _WIN32 ) || defined ( _WIN64 ) + typedef __int64 OMX_S64; /** Signed 64-bit integer */ + typedef unsigned __int64 OMX_U64; /** Unsigned 64-bit integer */ + #define OMX_MIN_S64 (0x8000000000000000i64) + #define OMX_MIN_U64 (0x0000000000000000i64) + #define OMX_MAX_S64 (0x7FFFFFFFFFFFFFFFi64) + #define OMX_MAX_U64 (0xFFFFFFFFFFFFFFFFi64) +#else + typedef long long OMX_S64; /** Signed 64-bit integer */ + typedef unsigned long long OMX_U64; /** Unsigned 64-bit integer */ + #define OMX_MIN_S64 (0x8000000000000000LL) + #define OMX_MIN_U64 (0x0000000000000000LL) + #define OMX_MAX_S64 (0x7FFFFFFFFFFFFFFFLL) + #define OMX_MAX_U64 (0xFFFFFFFFFFFFFFFFLL) +#endif + + +/* OMX_SC8 */ +typedef struct +{ + OMX_S8 Re; /** Real part */ + OMX_S8 Im; /** Imaginary part */ + +} OMX_SC8; /** Signed 8-bit complex number */ + + +/* OMX_SC16 */ +typedef struct +{ + OMX_S16 Re; /** Real part */ + OMX_S16 Im; /** Imaginary part */ + +} OMX_SC16; /** Signed 16-bit complex number */ + + +/* OMX_SC32 */ +typedef struct +{ + OMX_S32 Re; /** Real part */ + OMX_S32 Im; /** Imaginary part */ + +} OMX_SC32; /** Signed 32-bit complex number */ + + +/* OMX_SC64 */ +typedef struct +{ + OMX_S64 Re; /** Real part */ + OMX_S64 Im; /** Imaginary part */ + +} OMX_SC64; /** Signed 64-bit complex number */ + + +/* OMX_F32 */ +typedef float OMX_F32; /** Single precision floating point,IEEE 754 */ + + +/* OMX_F64 */ +typedef double OMX_F64; /** Double precision floating point,IEEE 754 */ + + +/* OMX_INT */ +typedef int OMX_INT; /** signed integer corresponding to machine word length, has maximum signed value INT_MAX*/ + + +#define OMX_MIN_S8 (-128) +#define OMX_MIN_U8 0 +#define OMX_MIN_S16 (-32768) +#define OMX_MIN_U16 0 +#define OMX_MIN_S32 (-2147483647-1) +#define OMX_MIN_U32 0 + +#define OMX_MAX_S8 (127) +#define OMX_MAX_U8 (255) +#define OMX_MAX_S16 (32767) +#define OMX_MAX_U16 (0xFFFF) +#define OMX_MAX_S32 (2147483647) +#define OMX_MAX_U32 (0xFFFFFFFF) + +typedef void OMXVoid; + +#ifndef NULL +#define NULL ((void*)0) +#endif + +/** Defines the geometric position and size of a rectangle, + * where x,y defines the coordinates of the top left corner + * of the rectangle, with dimensions width in the x-direction + * and height in the y-direction */ +typedef struct { + OMX_INT x; /** x-coordinate of top left corner of rectangle */ + OMX_INT y; /** y-coordinate of top left corner of rectangle */ + OMX_INT width; /** Width in the x-direction. */ + OMX_INT height; /** Height in the y-direction. */ +}OMXRect; + + +/** Defines the geometric position of a point, */ +typedef struct +{ + OMX_INT x; /** x-coordinate */ + OMX_INT y; /** y-coordinate */ + +} OMXPoint; + + +/** Defines the dimensions of a rectangle, or region of interest in an image */ +typedef struct +{ + OMX_INT width; /** Width of the rectangle, in the x-direction */ + OMX_INT height; /** Height of the rectangle, in the y-direction */ + +} OMXSize; + +#endif /* _OMXTYPES_H_ */ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/omxtypes_s.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/omxtypes_s.h new file mode 100644 index 0000000000000000000000000000000000000000..8d24b650a83d186c977d28d6a603b076be556d2c --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/omxtypes_s.h @@ -0,0 +1,77 @@ +;// +;// +;// File Name: omxtypes_s.h +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + +;// Mandatory return codes - use cases are explicitly described for each function +OMX_Sts_NoErr EQU 0 ;// No error the function completed successfully +OMX_Sts_Err EQU -2 ;// Unknown/unspecified error +OMX_Sts_InvalidBitstreamValErr EQU -182 ;// Invalid value detected during bitstream processing +OMX_Sts_MemAllocErr EQU -9 ;// Not enough memory allocated for the operation +OMX_StsACAAC_GainCtrErr EQU -159 ;// AAC: Unsupported gain control data detected +OMX_StsACAAC_PrgNumErr EQU -167 ;// AAC: Invalid number of elements for one program +OMX_StsACAAC_CoefValErr EQU -163 ;// AAC: Invalid quantized coefficient value +OMX_StsACAAC_MaxSfbErr EQU -162 ;// AAC: Invalid maxSfb value in relation to numSwb +OMX_StsACAAC_PlsDataErr EQU -160 ;// AAC: pulse escape sequence data error + +;// Optional return codes - use cases are explicitly described for each function +OMX_Sts_BadArgErr EQU -5 ;// Bad Arguments + +OMX_StsACAAC_TnsNumFiltErr EQU -157 ;// AAC: Invalid number of TNS filters +OMX_StsACAAC_TnsLenErr EQU -156 ;// AAC: Invalid TNS region length +OMX_StsACAAC_TnsOrderErr EQU -155 ;// AAC: Invalid order of TNS filter +OMX_StsACAAC_TnsCoefResErr EQU -154 ;// AAC: Invalid bit-resolution for TNS filter coefficients +OMX_StsACAAC_TnsCoefErr EQU -153 ;// AAC: Invalid TNS filter coefficients +OMX_StsACAAC_TnsDirectErr EQU -152 ;// AAC: Invalid TNS filter direction + +OMX_StsICJP_JPEGMarkerErr EQU -183 ;// JPEG marker encountered within an entropy-coded block; + ;// Huffman decoding operation terminated early. +OMX_StsICJP_JPEGMarker EQU -181 ;// JPEG marker encountered; Huffman decoding + ;// operation terminated early. +OMX_StsIPPP_ContextMatchErr EQU -17 ;// Context parameter doesn't match to the operation + +OMX_StsSP_EvenMedianMaskSizeErr EQU -180 ;// Even size of the Median Filter mask was replaced by the odd one + +OMX_Sts_MaximumEnumeration EQU 0x7FFFFFFF + + + +OMX_MIN_S8 EQU (-128) +OMX_MIN_U8 EQU 0 +OMX_MIN_S16 EQU (-32768) +OMX_MIN_U16 EQU 0 + + +OMX_MIN_S32 EQU (-2147483647-1) +OMX_MIN_U32 EQU 0 + +OMX_MAX_S8 EQU (127) +OMX_MAX_U8 EQU (255) +OMX_MAX_S16 EQU (32767) +OMX_MAX_U16 EQU (0xFFFF) +OMX_MAX_S32 EQU (2147483647) +OMX_MAX_U32 EQU (0xFFFFFFFF) + +OMX_VC_UPPER EQU 0x1 ;// Used by the PredictIntra functions +OMX_VC_LEFT EQU 0x2 ;// Used by the PredictIntra functions +OMX_VC_UPPER_RIGHT EQU 0x40 ;// Used by the PredictIntra functions + +NULL EQU 0 + +;// Structures + + INCLUDE armCOMM_s.h + + M_STRUCT OMXPoint + M_FIELD x, 4 + M_FIELD y, 4 + M_ENDSTRUCT + + END diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/build_vc.pl b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/build_vc.pl new file mode 100755 index 0000000000000000000000000000000000000000..1ae7005fbe7ca0893bbd8cf4de4b1322b7548a51 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/build_vc.pl @@ -0,0 +1,111 @@ +#!/usr/bin/perl +# +# +# File Name: build_vc.pl +# OpenMAX DL: v1.0.2 +# Revision: 9641 +# Date: Thursday, February 7, 2008 +# +# (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +# +# +# +# This file builds the OpenMAX DL vc domain library omxVC.o. +# + +use File::Spec; +use strict; + +my ($CC, $CC_OPTS, $AS, $AS_OPTS, $LIB, $LIB_OPTS, $LIB_TYPE); + +$CC = 'armcc'; +$CC_OPTS = '--no_unaligned_access --cpu ARM1136J-S -c'; +$AS = 'armasm'; +$AS_OPTS = '--no_unaligned_access --cpu ARM1136J-S'; +# $LIB = 'armlink'; +# $LIB_OPTS = '--partial -o'; +# $LIB_TYPE = '.o'; +$LIB = 'armar'; +$LIB_OPTS = '--create -r'; +$LIB_TYPE = '.a'; + +#------------------------ + +my (@headerlist, @filelist, $hd, $file, $ofile, $command, $objlist, $libfile, $h); + +# Define the list of directories containing included header files. +@headerlist = qw(api vc/api vc/m4p2/api vc/m4p10/api); + +# Define the list of source files to compile. +open(FILES, '; +close(FILES); + +# Fix the file separators in the header paths +foreach $h (@headerlist) +{ + $h = File::Spec->canonpath($h); +} + +# Create the include path to be passed to the compiler +$hd = '-I' . join(' -I', @headerlist); + +# Create the build directories "/lib/" and "/obj/" (if they are not there already) +mkdir "obj", 0777 if (! -d "obj"); +mkdir "lib", 0777 if (! -d "lib"); + +$objlist = ''; + +# Compile each file +foreach $file (@filelist) +{ + my $f; + my $base; + my $ext; + my $objfile; + + chomp($file); + $file = File::Spec->canonpath($file); + + (undef, undef, $f) = File::Spec->splitpath($file); + if(($base, $ext) = $f =~ /(.+)\.(\w)$/) + { + $objfile = File::Spec->catfile('obj', $base.'.o'); + + if($ext eq 'c') + { + $objlist .= "$objfile "; + $command = $CC.' '.$CC_OPTS.' '.$hd.' -o '.$objfile.' '.$file; + print "$command\n"; + system($command); + } + elsif($ext eq 's') + { + $objlist .= "$objfile "; + $command = $AS.' '.$AS_OPTS.' '.$hd.' -o '.$objfile.' '.$file; + print "$command\n"; + system($command); + } + else + { + print "Ignoring file: $f\n"; + } + } + else + { + die "No file extension found: $f\n"; + } +} + +# Do the final link stage to create the libraries. +$libfile = File::Spec->catfile('lib', 'omxVC'.$LIB_TYPE); +$command = $LIB.' '.$LIB_OPTS.' '.$libfile.' '.$objlist; +print "$command\n"; +(system($command) == 0) and print "Build successful\n"; + + + + + + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/filelist_vc.txt b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/filelist_vc.txt new file mode 100644 index 0000000000000000000000000000000000000000..0f1623f250d7f7d2e40caffbe24ce1611a9d6119 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/filelist_vc.txt @@ -0,0 +1,74 @@ +./api/armCOMM.h +./api/armCOMM_BitDec_s.h +./api/armCOMM_Bitstream.h +./api/armCOMM_IDCT_s.h +./api/armCOMM_IDCTTable.h +./api/armCOMM_MaskTable.h +./api/armCOMM_s.h +./api/armCOMM_Version.h +./api/armOMX_ReleaseVersion.h +./api/omxtypes.h +./api/omxtypes_s.h +./src/armCOMM_IDCTTable.c +./src/armCOMM_MaskTable.c +./vc/api/armVC.h +./vc/api/armVCCOMM_s.h +./vc/api/omxVC.h +./vc/api/omxVC_s.h +./vc/comm/src/omxVCCOMM_Copy16x16_s.s +./vc/comm/src/omxVCCOMM_Copy8x8_s.s +./vc/comm/src/omxVCCOMM_ExpandFrame_I_s.s +./vc/m4p10/api/armVCM4P10_CAVLCTables.h +./vc/m4p10/src/armVCM4P10_Average_4x_Align_unsafe_s.s +./vc/m4p10/src/armVCM4P10_CAVLCTables.c +./vc/m4p10/src/armVCM4P10_DeblockingChroma_unsafe_s.s +./vc/m4p10/src/armVCM4P10_DeblockingLuma_unsafe_s.s +./vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair_s.s +./vc/m4p10/src/armVCM4P10_DequantTables_s.s +./vc/m4p10/src/armVCM4P10_Interpolate_Chroma_s.s +./vc/m4p10/src/armVCM4P10_InterpolateLuma_Align_unsafe_s.s +./vc/m4p10/src/armVCM4P10_InterpolateLuma_Copy_unsafe_s.s +./vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s +./vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s +./vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s +./vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s +./vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s +./vc/m4p10/src/armVCM4P10_QuantTables_s.s +./vc/m4p10/src/armVCM4P10_TransformResidual4x4_s.s +./vc/m4p10/src/armVCM4P10_UnpackBlock4x4_s.s +./vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c +./vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c +./vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c +./vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c +./vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s +./vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s +./vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s +./vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.s +./vc/m4p10/src/omxVCM4P10_InterpolateChroma.c +./vc/m4p10/src/omxVCM4P10_InterpolateLuma_s.s +./vc/m4p10/src/omxVCM4P10_PredictIntra_16x16_s.s +./vc/m4p10/src/omxVCM4P10_PredictIntra_4x4_s.s +./vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8_s.s +./vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair_s.s +./vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair_s.s +./vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h +./vc/m4p2/api/armVCM4P2_ZigZag_Tables.h +./vc/m4p2/src/armVCM4P2_Clip8_s.s +./vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s +./vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c +./vc/m4p2/src/armVCM4P2_Lookup_Tables.c +./vc/m4p2/src/armVCM4P2_SetPredDir_s.s +./vc/m4p2/src/armVCM4P2_Zigzag_Tables.c +./vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c +./vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c +./vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP_s.s +./vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter_s.s +./vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s +./vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s +./vc/m4p2/src/omxVCM4P2_FindMVpred_s.s +./vc/m4p2/src/omxVCM4P2_IDCT8x8blk_s.s +./vc/m4p2/src/omxVCM4P2_MCReconBlock_s.s +./vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s +./vc/m4p2/src/omxVCM4P2_QuantInvInter_I_s.s +./vc/m4p2/src/omxVCM4P2_QuantInvIntra_I_s.s +./vc/src/armVC_Version.c \ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM.c new file mode 100644 index 0000000000000000000000000000000000000000..e572a896754dd46c166e31ae827eab526d62a645 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM.c @@ -0,0 +1,936 @@ +/** + * + * File Name: armCOMM.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Defines Common APIs used across OpenMAX API's + */ + +#include "omxtypes.h" +#include "armCOMM.h" + +/***********************************************************************/ + /* Miscellaneous Arithmetic operations */ + +/** + * Function: armRoundFloatToS16 + * + * Description: + * Converts a double precision value into a short int after rounding + * + * Parameters: + * [in] Value Float value to be converted + * + * Return Value: + * [out] converted value in OMX_S16 format + * + */ + +OMX_S16 armRoundFloatToS16 (OMX_F64 Value) +{ + if (Value > 0) + { + return (OMX_S16)(Value + .5); + } + else + { + return (OMX_S16)(Value - .5); + } +} + +/** + * Function: armRoundFloatToS32 + * + * Description: + * Converts a double precision value into a int after rounding + * + * Parameters: + * [in] Value Float value to be converted + * + * Return Value: + * [out] converted value in OMX_S32 format + * + */ + +OMX_S32 armRoundFloatToS32 (OMX_F64 Value) +{ + if (Value > 0) + { + return (OMX_S32)(Value + .5); + } + else + { + return (OMX_S32)(Value - .5); + } +} +/** + * Function: armSatRoundFloatToS16 + * + * Description: + * Converts a double precision value into a short int after rounding and saturation + * + * Parameters: + * [in] Value Float value to be converted + * + * Return Value: + * [out] converted value in OMX_S16 format + * + */ + +OMX_S16 armSatRoundFloatToS16 (OMX_F64 Value) +{ + if (Value > 0) + { + Value += 0.5; + + if(Value > (OMX_S16)OMX_MAX_S16 ) + { + return (OMX_S16)OMX_MAX_S16; + } + else + { + return (OMX_S16)Value; + } + } + else + { + Value -= 0.5; + + if(Value < (OMX_S16)OMX_MIN_S16 ) + { + return (OMX_S16)OMX_MIN_S16; + } + else + { + return (OMX_S16)Value; + } + } +} + +/** + * Function: armSatRoundFloatToS32 + * + * Description: + * Converts a double precision value into a int after rounding and saturation + * + * Parameters: + * [in] Value Float value to be converted + * + * Return Value: + * [out] converted value in OMX_S32 format + * + */ + +OMX_S32 armSatRoundFloatToS32 (OMX_F64 Value) +{ + if (Value > 0) + { + Value += 0.5; + + if(Value > (OMX_S32)OMX_MAX_S32 ) + { + return (OMX_S32)OMX_MAX_S32; + } + else + { + return (OMX_S32)Value; + } + } + else + { + Value -= 0.5; + + if(Value < (OMX_S32)OMX_MIN_S32 ) + { + return (OMX_S32)OMX_MIN_S32; + } + else + { + return (OMX_S32)Value; + } + } +} + +/** + * Function: armSatRoundFloatToU16 + * + * Description: + * Converts a double precision value into a unsigned short int after rounding and saturation + * + * Parameters: + * [in] Value Float value to be converted + * + * Return Value: + * [out] converted value in OMX_U16 format + * + */ + +OMX_U16 armSatRoundFloatToU16 (OMX_F64 Value) +{ + Value += 0.5; + + if(Value > (OMX_U16)OMX_MAX_U16 ) + { + return (OMX_U16)OMX_MAX_U16; + } + else + { + return (OMX_U16)Value; + } +} + +/** + * Function: armSatRoundFloatToU32 + * + * Description: + * Converts a double precision value into a unsigned int after rounding and saturation + * + * Parameters: + * [in] Value Float value to be converted + * + * Return Value: + * [out] converted value in OMX_U32 format + * + */ + +OMX_U32 armSatRoundFloatToU32 (OMX_F64 Value) +{ + Value += 0.5; + + if(Value > (OMX_U32)OMX_MAX_U32 ) + { + return (OMX_U32)OMX_MAX_U32; + } + else + { + return (OMX_U32)Value; + } +} + +/** + * Function: armRoundFloatToS64 + * + * Description: + * Converts a double precision value into a 64 bit int after rounding + * + * Parameters: + * [in] Value Float value to be converted + * + * Return Value: + * [out] converted value in OMX_S64 format + * + */ + +OMX_S64 armRoundFloatToS64 (OMX_F64 Value) +{ + if (Value > 0) + { + return (OMX_S64)(Value + .5); + } + else + { + return (OMX_S64)(Value - .5); + } +} + +/** + * Function: armSignCheck + * + * Description: + * Checks the sign of a variable: + * returns 1 if it is Positive + * returns 0 if it is 0 + * returns -1 if it is Negative + * + * Remarks: + * + * Parameters: + * [in] var Variable to be checked + * + * Return Value: + * OMX_INT -- returns 1 if it is Positive + * returns 0 if it is 0 + * returns -1 if it is Negative + */ + +OMX_INT armSignCheck ( + OMX_S16 var +) + +{ + OMX_INT Sign; + + if (var < 0) + { + Sign = -1; + } + else if ( var > 0) + { + Sign = 1; + } + else + { + Sign = 0; + } + + return Sign; +} + +/** + * Function: armClip + * + * Description: Clips the input between MAX and MIN value + * + * + * Remarks: + * + * Parameters: + * [in] Min lower bound + * [in] Max upper bound + * [in] src variable to the clipped + * + * Return Value: + * OMX_S32 -- returns clipped value + */ + +OMX_S32 armClip ( + OMX_INT min, + OMX_INT max, + OMX_S32 src +) + +{ + if (src > max) + { + src = max; + } + else if (src < min) + { + src = min; + } + + return src; +} + +/** + * Function: armClip_F32 + * + * Description: Clips the input between MAX and MIN value + * + * + * Remarks: + * + * Parameters: + * [in] Min lower bound + * [in] Max upper bound + * [in] src variable to the clipped + * + * Return Value: + * OMX_F32 -- returns clipped value + */ + +OMX_F32 armClip_F32 ( + OMX_F32 min, + OMX_F32 max, + OMX_F32 src +) + +{ + if (src > max) + { + src = max; + } + else if (src < min) + { + src = min; + } + + return src; +} + +/** + * Function: armShiftSat_F32 + * + * Description: Divides a float value by 2^shift and + * saturates it for unsigned value range for satBits. + * Second parameter is like "shifting" the corresponding + * integer value. Takes care of rounding while clipping the final + * value. + * + * Parameters: + * [in] v Number to be operated upon + * [in] shift Divides the input "v" by "2^shift" + * [in] satBits Final range is [0, 2^satBits) + * + * Return Value: + * OMX_S32 -- returns "shifted" saturated value + */ + +OMX_U32 armShiftSat_F32(OMX_F32 v, OMX_INT shift, OMX_INT satBits) +{ + OMX_U32 allOnes = (OMX_U32)(-1); + OMX_U32 maxV = allOnes >> (32-satBits); + OMX_F32 vShifted, vRounded, shiftDiv = (OMX_F32)(1 << shift); + OMX_U32 vInt; + OMX_U32 vIntSat; + + if(v <= 0) + return 0; + + vShifted = v / shiftDiv; + vRounded = (OMX_F32)(vShifted + 0.5); + vInt = (OMX_U32)vRounded; + vIntSat = vInt; + if(vIntSat > maxV) + vIntSat = maxV; + return vIntSat; +} + +/** + * Functions: armSwapElem + * + * Description: + * These function swaps two elements at the specified pointer locations. + * The size of each element could be anything as specified by + * + * Return Value: + * OMXResult -- Error status from the function + */ +OMXResult armSwapElem( + OMX_U8 *pBuf1, + OMX_U8 *pBuf2, + OMX_INT elemSize + ) +{ + OMX_INT i; + OMX_U8 temp; + armRetArgErrIf(!pBuf1 || !pBuf2, OMX_Sts_BadArgErr); + + for(i = 0; i < elemSize; i++) + { + temp = *(pBuf1 + i); + *(pBuf1 + i) = *(pBuf2 + i); + *(pBuf2 + i) = temp; + } + return OMX_Sts_NoErr; +} + +/** + * Function: armMedianOf3 + * + * Description: Finds the median of three numbers + * + * Remarks: + * + * Parameters: + * [in] fEntry First entry + * [in] sEntry second entry + * [in] tEntry Third entry + * + * Return Value: + * OMX_S32 -- returns the median value + */ + +OMX_S32 armMedianOf3 ( + OMX_S32 fEntry, + OMX_S32 sEntry, + OMX_S32 tEntry +) +{ + OMX_S32 a, b, c; + + a = armMin (fEntry, sEntry); + b = armMax (fEntry, sEntry); + c = armMin (b, tEntry); + return (armMax (a, c)); +} + +/** + * Function: armLogSize + * + * Description: Finds the size of a positive value and returns the same + * + * Remarks: + * + * Parameters: + * [in] value Positive value + * + * Return Value: + * OMX_U8 -- Returns the minimum number of bits required to represent the positive value. + This is the smallest k>=0 such that that value is less than (1< 0; value = value >> 1) + { + i++; + } + return i; +} + +/***********************************************************************/ + /* Saturating Arithmetic operations */ + +/** + * Function :armSatAdd_S32() + * + * Description : + * Returns the result of saturated addition of the two inputs Value1, Value2 + * + * Parametrs: + * [in] Value1 First Operand + * [in] Value2 Second Operand + * + * Return: + * [out] Result of operation + * + * + **/ + +OMX_S32 armSatAdd_S32(OMX_S32 Value1,OMX_S32 Value2) +{ + OMX_S32 Result; + + Result = Value1 + Value2; + + if( (Value1^Value2) >= 0) + { + /*Same sign*/ + if( (Result^Value1) >= 0) + { + /*Result has not saturated*/ + return Result; + } + else + { + if(Value1 >= 0) + { + /*Result has saturated in positive side*/ + return OMX_MAX_S32; + } + else + { + /*Result has saturated in negative side*/ + return OMX_MIN_S32; + } + + } + + } + else + { + return Result; + } + +} + +/** + * Function :armSatAdd_S64() + * + * Description : + * Returns the result of saturated addition of the two inputs Value1, Value2 + * + * Parametrs: + * [in] Value1 First Operand + * [in] Value2 Second Operand + * + * Return: + * [out] Result of operation + * + * + **/ + +OMX_S64 armSatAdd_S64(OMX_S64 Value1,OMX_S64 Value2) +{ + OMX_S64 Result; + + Result = Value1 + Value2; + + if( (Value1^Value2) >= 0) + { + /*Same sign*/ + if( (Result^Value1) >= 0) + { + /*Result has not saturated*/ + return Result; + } + else + { + if(Value1 >= 0) + { + /*Result has saturated in positive side*/ + Result = OMX_MAX_S64; + return Result; + } + else + { + /*Result has saturated in negative side*/ + return OMX_MIN_S64; + } + + } + + } + else + { + return Result; + } + +} + +/** Function :armSatSub_S32() + * + * Description : + * Returns the result of saturated substraction of the two inputs Value1, Value2 + * + * Parametrs: + * [in] Value1 First Operand + * [in] Value2 Second Operand + * + * Return: + * [out] Result of operation + * + **/ + +OMX_S32 armSatSub_S32(OMX_S32 Value1,OMX_S32 Value2) +{ + OMX_S32 Result; + + Result = Value1 - Value2; + + if( (Value1^Value2) < 0) + { + /*Opposite sign*/ + if( (Result^Value1) >= 0) + { + /*Result has not saturated*/ + return Result; + } + else + { + if(Value1 >= 0) + { + /*Result has saturated in positive side*/ + return OMX_MAX_S32; + } + else + { + /*Result has saturated in negative side*/ + return OMX_MIN_S32; + } + + } + + } + else + { + return Result; + } + +} + +/** + * Function :armSatMac_S32() + * + * Description : + * Returns the result of Multiplication of Value1 and Value2 and subesquent saturated + * accumulation with Mac + * + * Parametrs: + * [in] Value1 First Operand + * [in] Value2 Second Operand + * [in] Mac Accumulator + * + * Return: + * [out] Result of operation + **/ + +OMX_S32 armSatMac_S32(OMX_S32 Mac,OMX_S16 Value1,OMX_S16 Value2) +{ + OMX_S32 Result; + + Result = (OMX_S32)(Value1*Value2); + Result = armSatAdd_S32( Mac , Result ); + + return Result; +} + +/** + * Function :armSatMac_S16S32_S32 + * + * Description : + * Returns the result of saturated MAC operation of the three inputs delayElem, filTap , mac + * + * mac = mac + Saturate_in_32Bits(delayElem * filTap) + * + * Parametrs: + * [in] delayElem First 32 bit Operand + * [in] filTap Second 16 bit Operand + * [in] mac Result of MAC operation + * + * Return: + * [out] mac Result of operation + * + **/ + +OMX_S32 armSatMac_S16S32_S32(OMX_S32 mac, OMX_S32 delayElem, OMX_S16 filTap ) +{ + + OMX_S32 result; + + result = armSatMulS16S32_S32(filTap,delayElem); + + if ( result > OMX_MAX_S16 ) + { + result = OMX_MAX_S32; + } + else if( result < OMX_MIN_S16 ) + { + result = OMX_MIN_S32; + } + else + { + result = delayElem * filTap; + } + + mac = armSatAdd_S32(mac,result); + + return mac; +} + + +/** + * Function :armSatRoundRightShift_S32_S16 + * + * Description : + * Returns the result of rounded right shift operation of input by the scalefactor + * + * output = Saturate_in_16Bits( ( Right/LeftShift( (Round(input) , shift ) ) + * + * Parametrs: + * [in] input The input to be operated on + * [in] shift The shift number + * + * Return: + * [out] Result of operation + * + **/ + + +OMX_S16 armSatRoundRightShift_S32_S16(OMX_S32 input, OMX_INT shift) +{ + input = armSatRoundLeftShift_S32(input,-shift); + + if ( input > OMX_MAX_S16 ) + { + return (OMX_S16)OMX_MAX_S16; + } + else if (input < OMX_MIN_S16) + { + return (OMX_S16)OMX_MIN_S16; + } + else + { + return (OMX_S16)input; + } + +} + +/** + * Function :armSatRoundLeftShift_S32() + * + * Description : + * Returns the result of saturating left-shift operation on input + * Or rounded Right shift if the input Shift is negative. + * + * Parametrs: + * [in] Value Operand + * [in] Shift Operand for shift operation + * + * Return: + * [out] Result of operation + * + **/ + +OMX_S32 armSatRoundLeftShift_S32(OMX_S32 Value, OMX_INT Shift) +{ + OMX_INT i; + + if (Shift < 0) + { + Shift = -Shift; + Value = armSatAdd_S32(Value, (1 << (Shift - 1))); + Value = Value >> Shift; + } + else + { + for (i = 0; i < Shift; i++) + { + Value = armSatAdd_S32(Value, Value); + } + } + return Value; +} + +/** + * Function :armSatRoundLeftShift_S64() + * + * Description : + * Returns the result of saturating left-shift operation on input + * Or rounded Right shift if the input Shift is negative. + * + * Parametrs: + * [in] Value Operand + * [in] shift Operand for shift operation + * + * Return: + * [out] Result of operation + * + **/ + +OMX_S64 armSatRoundLeftShift_S64(OMX_S64 Value, OMX_INT Shift) +{ + OMX_INT i; + + if (Shift < 0) + { + Shift = -Shift; + Value = armSatAdd_S64(Value, ((OMX_S64)1 << (Shift - 1))); + Value = Value >> Shift; + } + else + { + for (i = 0; i < Shift; i++) + { + Value = armSatAdd_S64(Value, Value); + } + } + return Value; +} + +/** + * Function :armSatMulS16S32_S32() + * + * Description : + * Returns the result of a S16 data type multiplied with an S32 data type + * in a S32 container + * + * Parametrs: + * [in] input1 Operand 1 + * [in] input2 Operand 2 + * + * Return: + * [out] Result of operation + * + **/ + + +OMX_S32 armSatMulS16S32_S32(OMX_S16 input1,OMX_S32 input2) +{ + OMX_S16 hi2,lo1; + OMX_U16 lo2; + + OMX_S32 temp1,temp2; + OMX_S32 result; + + lo1 = input1; + + hi2 = ( input2 >> 16 ); + lo2 = ( (OMX_U32)( input2 << 16 ) >> 16 ); + + temp1 = hi2 * lo1; + temp2 = ( lo2* lo1 ) >> 16; + + result = armSatAdd_S32(temp1,temp2); + + return result; +} + +/** + * Function :armSatMulS32S32_S32() + * + * Description : + * Returns the result of a S32 data type multiplied with an S32 data type + * in a S32 container + * + * Parametrs: + * [in] input1 Operand 1 + * [in] input2 Operand 2 + * + * Return: + * [out] Result of operation + * + **/ + +OMX_S32 armSatMulS32S32_S32(OMX_S32 input1,OMX_S32 input2) +{ + OMX_S16 hi1,hi2; + OMX_U16 lo1,lo2; + + OMX_S32 temp1,temp2,temp3; + OMX_S32 result; + + hi1 = ( input1 >> 16 ); + lo1 = ( (OMX_U32)( input1 << 16 ) >> 16 ); + + hi2 = ( input2 >> 16 ); + lo2 = ( (OMX_U32)( input2 << 16 ) >> 16 ); + + temp1 = hi1 * hi2; + temp2 = ( hi1* lo2 ) >> 16; + temp3 = ( hi2* lo1 ) >> 16; + + result = armSatAdd_S32(temp1,temp2); + result = armSatAdd_S32(result,temp3); + + return result; +} + +/** + * Function :armIntDivAwayFromZero() + * + * Description : Integer division with rounding to the nearest integer. + * Half-integer values are rounded away from zero + * unless otherwise specified. For example 3//2 is rounded + * to 2, and -3//2 is rounded to -2. + * + * Parametrs: + * [in] Num Operand 1 + * [in] Deno Operand 2 + * + * Return: + * [out] Result of operation input1//input2 + * + **/ + +OMX_S32 armIntDivAwayFromZero (OMX_S32 Num, OMX_S32 Deno) +{ + OMX_F64 result; + + result = ((OMX_F64)Num)/((OMX_F64)Deno); + + if (result >= 0) + { + result += 0.5; + } + else + { + result -= 0.5; + } + + return (OMX_S32)(result); +} + + +/*End of File*/ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM_Bitstream.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM_Bitstream.c new file mode 100644 index 0000000000000000000000000000000000000000..9ef9319d3056336525f7c5d4adcd77a4bfb9e1f3 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM_Bitstream.c @@ -0,0 +1,329 @@ +/** + * + * File Name: armCOMM_Bitstream.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Defines bitstream encode and decode functions common to all codecs + */ + +#include "omxtypes.h" +#include "armCOMM.h" +#include "armCOMM_Bitstream.h" + +/*************************************** + * Fixed bit length Decode + ***************************************/ + +/** + * Function: armLookAheadBits() + * + * Description: + * Get the next N bits from the bitstream without advancing the bitstream pointer + * + * Parameters: + * [in] **ppBitStream + * [in] *pOffset + * [in] N=1...32 + * + * Returns Value + */ + +OMX_U32 armLookAheadBits(const OMX_U8 **ppBitStream, OMX_INT *pOffset, OMX_INT N) +{ + const OMX_U8 *pBitStream = *ppBitStream; + OMX_INT Offset = *pOffset; + OMX_U32 Value; + + armAssert(Offset>=0 && Offset<=7); + armAssert(N>=1 && N<=32); + + /* Read next 32 bits from stream */ + Value = (pBitStream[0] << 24 ) | ( pBitStream[1] << 16) | (pBitStream[2] << 8 ) | (pBitStream[3]) ; + Value = (Value << Offset ) | (pBitStream[4] >> (8-Offset)); + + /* Return N bits */ + return Value >> (32-N); +} + + +/** + * Function: armGetBits() + * + * Description: + * Read N bits from the bitstream + * + * Parameters: + * [in] *ppBitStream + * [in] *pOffset + * [in] N=1..32 + * + * [out] *ppBitStream + * [out] *pOffset + * Returns Value + */ + + +OMX_U32 armGetBits(const OMX_U8 **ppBitStream, OMX_INT *pOffset, OMX_INT N) +{ + const OMX_U8 *pBitStream = *ppBitStream; + OMX_INT Offset = *pOffset; + OMX_U32 Value; + + if(N == 0) + { + return 0; + } + + armAssert(Offset>=0 && Offset<=7); + armAssert(N>=1 && N<=32); + + /* Read next 32 bits from stream */ + Value = (pBitStream[0] << 24 ) | ( pBitStream[1] << 16) | (pBitStream[2] << 8 ) | (pBitStream[3]) ; + Value = (Value << Offset ) | (pBitStream[4] >> (8-Offset)); + + /* Advance bitstream pointer by N bits */ + Offset += N; + *ppBitStream = pBitStream + (Offset>>3); + *pOffset = Offset & 7; + + /* Return N bits */ + return Value >> (32-N); +} + +/** + * Function: armByteAlign() + * + * Description: + * Align the pointer *ppBitStream to the next byte boundary + * + * Parameters: + * [in] *ppBitStream + * [in] *pOffset + * + * [out] *ppBitStream + * [out] *pOffset + * + **/ + +OMXVoid armByteAlign(const OMX_U8 **ppBitStream,OMX_INT *pOffset) +{ + if(*pOffset > 0) + { + *ppBitStream += 1; + *pOffset = 0; + } +} + +/** + * Function: armSkipBits() + * + * Description: + * Skip N bits from the value at *ppBitStream + * + * Parameters: + * [in] *ppBitStream + * [in] *pOffset + * [in] N + * + * [out] *ppBitStream + * [out] *pOffset + * + **/ + + +OMXVoid armSkipBits(const OMX_U8 **ppBitStream,OMX_INT *pOffset,OMX_INT N) +{ + OMX_INT Offset = *pOffset; + const OMX_U8 *pBitStream = *ppBitStream; + + /* Advance bitstream pointer by N bits */ + Offset += N; + *ppBitStream = pBitStream + (Offset>>3); + *pOffset = Offset & 7; +} + +/*************************************** + * Variable bit length Decode + ***************************************/ + +/** + * Function: armUnPackVLC32() + * + * Description: + * Variable length decode of variable length symbol (max size 32 bits) read from + * the bit stream pointed by *ppBitStream at *pOffset by using the table + * pointed by pCodeBook + * + * Parameters: + * [in] *pBitStream + * [in] *pOffset + * [in] pCodeBook + * + * [out] *pBitStream + * [out] *pOffset + * + * Returns : Code Book Index if successfull. + * : ARM_NO_CODEBOOK_INDEX = -1 if search fails. + **/ +#ifndef C_OPTIMIZED_IMPLEMENTATION + +OMX_U16 armUnPackVLC32( + const OMX_U8 **ppBitStream, + OMX_INT *pOffset, + const ARM_VLC32 *pCodeBook +) +{ + const OMX_U8 *pBitStream = *ppBitStream; + OMX_INT Offset = *pOffset; + OMX_U32 Value; + OMX_INT Index; + + armAssert(Offset>=0 && Offset<=7); + + /* Read next 32 bits from stream */ + Value = (pBitStream[0] << 24 ) | ( pBitStream[1] << 16) | (pBitStream[2] << 8 ) | (pBitStream[3]) ; + Value = (Value << Offset ) | (pBitStream[4] >> (8-Offset)); + + /* Search through the codebook */ + for (Index=0; pCodeBook->codeLen != 0; Index++) + { + if (pCodeBook->codeWord == (Value >> (32 - pCodeBook->codeLen))) + { + Offset = Offset + pCodeBook->codeLen; + *ppBitStream = pBitStream + (Offset >> 3) ; + *pOffset = Offset & 7; + + return Index; + } + pCodeBook++; + } + + /* No code match found */ + return ARM_NO_CODEBOOK_INDEX; +} + +#endif + +/*************************************** + * Fixed bit length Encode + ***************************************/ + +/** + * Function: armPackBits + * + * Description: + * Pack a VLC code word into the bitstream + * + * Remarks: + * + * Parameters: + * [in] ppBitStream pointer to the pointer to the current byte + * in the bit stream. + * [in] pOffset pointer to the bit position in the byte + * pointed by *ppBitStream. Valid within 0 + * to 7. + * [in] codeWord Code word that need to be inserted in to the + * bitstream + * [in] codeLength Length of the code word valid range 1...32 + * + * [out] ppBitStream *ppBitStream is updated after the block is encoded, + * so that it points to the current byte in the bit + * stream buffer. + * [out] pBitOffset *pBitOffset is updated so that it points to the + * current bit position in the byte pointed by + * *ppBitStream. + * + * Return Value: + * Standard OMX_RESULT result. See enumeration for possible result codes. + * + */ + +OMXResult armPackBits ( + OMX_U8 **ppBitStream, + OMX_INT *pOffset, + OMX_U32 codeWord, + OMX_INT codeLength +) +{ + OMX_U8 *pBitStream = *ppBitStream; + OMX_INT Offset = *pOffset; + OMX_U32 Value; + + /* checking argument validity */ + armRetArgErrIf(Offset < 0, OMX_Sts_BadArgErr); + armRetArgErrIf(Offset > 7, OMX_Sts_BadArgErr); + armRetArgErrIf(codeLength < 1, OMX_Sts_BadArgErr); + armRetArgErrIf(codeLength > 32, OMX_Sts_BadArgErr); + + /* Prepare the first byte */ + codeWord = codeWord << (32-codeLength); + Value = (pBitStream[0] >> (8-Offset)) << (8-Offset); + Value = Value | (codeWord >> (24+Offset)); + + /* Write out whole bytes */ + while (8-Offset <= codeLength) + { + *pBitStream++ = (OMX_U8)Value; + codeWord = codeWord << (8-Offset); + codeLength = codeLength - (8-Offset); + Offset = 0; + Value = codeWord >> 24; + } + + /* Write out final partial byte */ + *pBitStream = (OMX_U8)Value; + *ppBitStream = pBitStream; + *pOffset = Offset + codeLength; + + return OMX_Sts_NoErr; +} + +/*************************************** + * Variable bit length Encode + ***************************************/ + +/** + * Function: armPackVLC32 + * + * Description: + * Pack a VLC code word into the bitstream + * + * Remarks: + * + * Parameters: + * [in] ppBitStream pointer to the pointer to the current byte + * in the bit stream. + * [in] pBitOffset pointer to the bit position in the byte + * pointed by *ppBitStream. Valid within 0 + * to 7. + * [in] code VLC code word that need to be inserted in to the + * bitstream + * + * [out] ppBitStream *ppBitStream is updated after the block is encoded, + * so that it points to the current byte in the bit + * stream buffer. + * [out] pBitOffset *pBitOffset is updated so that it points to the + * current bit position in the byte pointed by + * *ppBitStream. + * + * Return Value: + * Standard OMX_RESULT result. See enumeration for possible result codes. + * + */ + +OMXResult armPackVLC32 ( + OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + ARM_VLC32 code +) +{ + return (armPackBits(ppBitStream, pBitOffset, code.codeWord, code.codeLen)); +} + +/*End of File*/ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM_IDCTTable.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM_IDCTTable.c new file mode 100644 index 0000000000000000000000000000000000000000..9e4679cdb317eb3ead9d7651975c739e42980d08 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM_IDCTTable.c @@ -0,0 +1,60 @@ +/** + * + * File Name: armCOMM_IDCTTable.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * File: armCOMM_IDCTTable.c + * Brief: Defines Tables used in IDCT computation + * + */ + +#include "armCOMM_IDCTTable.h" + + /* Table of s(u)*A(u)*A(v)/16 at Q15 + * s(u)=1.0 0 <= u <= 5 + * s(6)=2.0 + * s(7)=4.0 + * A(0) = 2*sqrt(2) + * A(u) = 4*cos(u*pi/16) for (u!=0) + */ + +__align(4) const OMX_U16 armCOMM_IDCTPreScale [64] = +{ + 0x4000, 0x58c5, 0x539f, 0x4b42, 0x4000, 0x3249, 0x4546, 0x46a1, + 0x58c5, 0x7b21, 0x73fc, 0x6862, 0x58c5, 0x45bf, 0x6016, 0x61f8, + 0x539f, 0x73fc, 0x6d41, 0x6254, 0x539f, 0x41b3, 0x5a82, 0x5c48, + 0x4b42, 0x6862, 0x6254, 0x587e, 0x4b42, 0x3b21, 0x5175, 0x530d, + 0x4000, 0x58c5, 0x539f, 0x4b42, 0x4000, 0x3249, 0x4546, 0x46a1, + 0x3249, 0x45bf, 0x41b3, 0x3b21, 0x3249, 0x2782, 0x366d, 0x377e, + 0x22a3, 0x300b, 0x2d41, 0x28ba, 0x22a3, 0x1b37, 0x257e, 0x263a, + 0x11a8, 0x187e, 0x1712, 0x14c3, 0x11a8, 0x0de0, 0x131d, 0x137d +}; + /* Above array armCOMM_IDCTPreScale, in Q23 format */ +const OMX_U32 armCOMM_IDCTPreScaleU32 [64] = +{ + 0x400000, 0x58c543, 0x539eba, 0x4b418c, 0x400000, 0x3248d4, 0x4545ea, 0x46a157, + 0x58c543, 0x7b20d8, 0x73fbfc, 0x686214, 0x58c543, 0x45bf1f, 0x6015a5, 0x61f78b, + 0x539eba, 0x73fbfc, 0x6d413d, 0x6253a6, 0x539eba, 0x41b328, 0x5a827a, 0x5c4869, + 0x4b418c, 0x686214, 0x6253a6, 0x587de3, 0x4b418c, 0x3b20d8, 0x5174e0, 0x530d69, + 0x400000, 0x58c543, 0x539eba, 0x4b418c, 0x400000, 0x3248d4, 0x4545ea, 0x46a157, + 0x3248d4, 0x45bf1f, 0x41b328, 0x3b20d8, 0x3248d4, 0x27821d, 0x366d72, 0x377e6b, + 0x22a2f5, 0x300ad3, 0x2d413d, 0x28ba70, 0x22a2f5, 0x1b36b9, 0x257d86, 0x26398d, + 0x11a856, 0x187de3, 0x17121a, 0x14c35a, 0x11a856, 0x0ddf9b, 0x131cc7, 0x137ca2 +}; + +const OMX_U16 armCOMM_IDCTCoef [4] = +{ + 0x5a82, /* InvSqrt2 */ + 0x30fc, /* SinPIBy8 */ + 0x7642, /* CosPIBy8 */ + 0x0000 +}; + + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM_MaskTable.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM_MaskTable.c new file mode 100644 index 0000000000000000000000000000000000000000..3241db26ce03444b79fc6351a9af03c1331cd1f3 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM_MaskTable.c @@ -0,0 +1,45 @@ +/* ---------------------------------------------------------------- + * + * + * File Name: armCOMM_MaskTable.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Mask Table to mask the end of array. + * + */ + +#include "omxtypes.h" + +#define MaskTableSize 72 + +const OMX_U16 armCOMM_qMaskTable16[MaskTableSize] = +{ + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0xFFFF, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0xFFFF, 0xFFFF, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0xFFFF, 0xFFFF, 0xFFFF, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0000, 0x0000, 0x0000, 0x0000, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0000, 0x0000, 0x0000, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0000, 0x0000, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0000, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF +}; + +const OMX_U8 armCOMM_qMaskTable8[MaskTableSize] = +{ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF +}; diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/armVC.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/armVC.h new file mode 100644 index 0000000000000000000000000000000000000000..7fa7716740b9122ae10b1c8f5f1e5ed2c8aac4c9 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/armVC.h @@ -0,0 +1,1153 @@ +/** + * + * File Name: armVC.h + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * File: armVideo.h + * Brief: Declares API's/Basic Data types used across the OpenMAX Video domain + * + */ + + +#ifndef _armVideo_H_ +#define _armVideo_H_ + +#include "omxVC.h" +#include "armCOMM_Bitstream.h" + +/** + * ARM specific state structure to hold Motion Estimation information. + */ + +struct m4p2_MESpec +{ + OMXVCM4P2MEParams MEParams; + OMXVCM4P2MEMode MEMode; +}; + +struct m4p10_MESpec +{ + OMXVCM4P10MEParams MEParams; + OMXVCM4P10MEMode MEMode; +}; + +typedef struct m4p2_MESpec ARMVCM4P2_MESpec; +typedef struct m4p10_MESpec ARMVCM4P10_MESpec; + +/** + * Function: armVCM4P2_CompareMV + * + * Description: + * Performs comparision of motion vectors and SAD's to decide the + * best MV and SAD + * + * Remarks: + * + * Parameters: + * [in] mvX x coordinate of the candidate motion vector + * [in] mvY y coordinate of the candidate motion vector + * [in] candSAD Candidate SAD + * [in] bestMVX x coordinate of the best motion vector + * [in] bestMVY y coordinate of the best motion vector + * [in] bestSAD best SAD + * + * Return Value: + * OMX_INT -- 1 to indicate that the current sad is the best + * 0 to indicate that it is NOT the best SAD + */ + +OMX_INT armVCM4P2_CompareMV ( + OMX_S16 mvX, + OMX_S16 mvY, + OMX_INT candSAD, + OMX_S16 bestMVX, + OMX_S16 bestMVY, + OMX_INT bestSAD); + +/** + * Function: armVCM4P2_ACDCPredict + * + * Description: + * Performs adaptive DC/AC coefficient prediction for an intra block. Prior + * to the function call, prediction direction (predDir) should be selected + * as specified in subclause 7.4.3.1 of ISO/IEC 14496-2. + * + * Remarks: + * + * Parameters: + * [in] pSrcDst pointer to the coefficient buffer which contains + * the quantized coefficient residuals (PQF) of the + * current block + * [in] pPredBufRow pointer to the coefficient row buffer + * [in] pPredBufCol pointer to the coefficient column buffer + * [in] curQP quantization parameter of the current block. curQP + * may equal to predQP especially when the current + * block and the predictor block are in the same + * macroblock. + * [in] predQP quantization parameter of the predictor block + * [in] predDir indicates the prediction direction which takes one + * of the following values: + * OMX_VIDEO_HORIZONTAL predict horizontally + * OMX_VIDEO_VERTICAL predict vertically + * [in] ACPredFlag a flag indicating if AC prediction should be + * performed. It is equal to ac_pred_flag in the bit + * stream syntax of MPEG-4 + * [in] videoComp video component type (luminance, chrominance or + * alpha) of the current block + * [in] flag This flag defines the if one wants to use this functions to + * calculate PQF (set 1, prediction) or QF (set 0, reconstruction) + * [out] pPreACPredict pointer to the predicted coefficients buffer. + * Filled ONLY if it is not NULL + * [out] pSrcDst pointer to the coefficient buffer which contains + * the quantized coefficients (QF) of the current + * block + * [out] pPredBufRow pointer to the updated coefficient row buffer + * [out] pPredBufCol pointer to the updated coefficient column buffer + * [out] pSumErr pointer to the updated sum of the difference + * between predicted and unpredicted coefficients + * If this is NULL, do not update + * + * Return Value: + * Standard OMXResult result. See enumeration for possible result codes. + * + */ + +OMXResult armVCM4P2_ACDCPredict( + OMX_S16 * pSrcDst, + OMX_S16 * pPreACPredict, + OMX_S16 * pPredBufRow, + OMX_S16 * pPredBufCol, + OMX_INT curQP, + OMX_INT predQP, + OMX_INT predDir, + OMX_INT ACPredFlag, + OMXVCM4P2VideoComponent videoComp, + OMX_U8 flag, + OMX_INT *pSumErr +); + +/** + * Function: armVCM4P2_SetPredDir + * + * Description: + * Performs detecting the prediction direction + * + * Remarks: + * + * Parameters: + * [in] blockIndex block index indicating the component type and + * position as defined in subclause 6.1.3.8, of ISO/IEC + * 14496-2. Furthermore, indexes 6 to 9 indicate the + * alpha blocks spatially corresponding to luminance + * blocks 0 to 3 in the same macroblock. + * [in] pCoefBufRow pointer to the coefficient row buffer + * [in] pQpBuf pointer to the quantization parameter buffer + * [out] predQP quantization parameter of the predictor block + * [out] predDir indicates the prediction direction which takes one + * of the following values: + * OMX_VIDEO_HORIZONTAL predict horizontally + * OMX_VIDEO_VERTICAL predict vertically + * + * Return Value: + * Standard OMXResult result. See enumeration for possible result codes. + * + */ + +OMXResult armVCM4P2_SetPredDir( + OMX_INT blockIndex, + OMX_S16 *pCoefBufRow, + OMX_S16 *pCoefBufCol, + OMX_INT *predDir, + OMX_INT *predQP, + const OMX_U8 *pQpBuf +); + +/** + * Function: armVCM4P2_EncodeVLCZigzag_Intra + * + * Description: + * Performs zigzag scanning and VLC encoding for one intra block. + * + * Remarks: + * + * Parameters: + * [in] ppBitStream pointer to the pointer to the current byte in + * the bit stream + * [in] pBitOffset pointer to the bit position in the byte pointed + * by *ppBitStream. Valid within 0 to 7. + * [in] pQDctBlkCoef pointer to the quantized DCT coefficient + * [in] predDir AC prediction direction, which is used to decide + * the zigzag scan pattern. This takes one of the + * following values: + * OMX_VIDEO_NONE AC prediction not used. + * Performs classical zigzag + * scan. + * OMX_VIDEO_HORIZONTAL Horizontal prediction. + * Performs alternate-vertical + * zigzag scan. + * OMX_VIDEO_VERTICAL Vertical prediction. + * Performs alternate-horizontal + * zigzag scan. + * [in] pattern block pattern which is used to decide whether + * this block is encoded + * [in] start start indicates whether the encoding begins with 0th element + * or 1st. + * [out] ppBitStream *ppBitStream is updated after the block is encoded, + * so that it points to the current byte in the bit + * stream buffer. + * [out] pBitOffset *pBitOffset is updated so that it points to the + * current bit position in the byte pointed by + * *ppBitStream. + * + * Return Value: + * Standard OMXResult result. See enumeration for possible result codes. + * + */ + +OMXResult armVCM4P2_EncodeVLCZigzag_Intra( + OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + const OMX_S16 *pQDctBlkCoef, + OMX_U8 predDir, + OMX_U8 pattern, + OMX_INT shortVideoHeader, + OMX_U8 start +); + +/** + * Function: armVCM4P2_DecodeVLCZigzag_Intra + * + * Description: + * Performs VLC decoding and inverse zigzag scan for one intra coded block. + * + * Remarks: + * + * Parameters: + * [in] ppBitStream pointer to the pointer to the current byte in + * the bitstream buffer + * [in] pBitOffset pointer to the bit position in the byte pointed + * to by *ppBitStream. *pBitOffset is valid within + * [0-7]. + * [in] predDir AC prediction direction which is used to decide + * the zigzag scan pattern. It takes one of the + * following values: + * OMX_VIDEO_NONE AC prediction not used; + * perform classical zigzag scan; + * OMX_VIDEO_HORIZONTAL Horizontal prediction; + * perform alternate-vertical + * zigzag scan; + * OMX_VIDEO_VERTICAL Vertical prediction; + * thus perform + * alternate-horizontal + * zigzag scan. + * [in] videoComp video component type (luminance, chrominance or + * alpha) of the current block + * [in] shortVideoHeader binary flag indicating presence of short_video_header; escape modes 0-3 are used if shortVideoHeader==0, + * and escape mode 4 is used when shortVideoHeader==1. + * [in] start start indicates whether the encoding begins with 0th element + * or 1st. + * [out] ppBitStream *ppBitStream is updated after the block is + * decoded, so that it points to the current byte + * in the bit stream buffer + * [out] pBitOffset *pBitOffset is updated so that it points to the + * current bit position in the byte pointed by + * *ppBitStream + * [out] pDst pointer to the coefficient buffer of current + * block. Should be 32-bit aligned + * + * Return Value: + * Standard OMXResult result. See enumeration for possible result codes. + * + */ + +OMXResult armVCM4P2_DecodeVLCZigzag_Intra( + const OMX_U8 ** ppBitStream, + OMX_INT * pBitOffset, + OMX_S16 * pDst, + OMX_U8 predDir, + OMX_INT shortVideoHeader, + OMX_U8 start +); + +/** + * Function: armVCM4P2_FillVLDBuffer + * + * Description: + * Performs filling of the coefficient buffer according to the run, level + * and sign, also updates the index + * + * Parameters: + * [in] storeRun Stored Run value (count of zeros) + * [in] storeLevel Stored Level value (non-zero value) + * [in] sign Flag indicating the sign of level + * [in] last status of the last flag + * [in] pIndex pointer to coefficient index in 8x8 matrix + * [out] pIndex pointer to updated coefficient index in 8x8 + * matrix + * [in] pZigzagTable pointer to the zigzag tables + * [out] pDst pointer to the coefficient buffer of current + * block. Should be 32-bit aligned + * Return Value: + * Standard OMXResult result. See enumeration for possible result codes. + * + */ + +OMXResult armVCM4P2_FillVLDBuffer( + OMX_U32 storeRun, + OMX_S16 * pDst, + OMX_S16 storeLevel, + OMX_U8 sign, + OMX_U8 last, + OMX_U8 * index, + const OMX_U8 * pZigzagTable +); + +/** + * Function: armVCM4P2_GetVLCBits + * + * Description: + * Performs escape mode decision based on the run, run+, level, level+ and + * last combinations. + * + * Remarks: + * + * Parameters: + * [in] ppBitStream pointer to the pointer to the current byte in + * the bit stream + * [in] pBitOffset pointer to the bit position in the byte pointed + * by *ppBitStream. Valid within 0 to 7 + * [in] shortVideoHeader binary flag indicating presence of short_video_header; escape modes 0-3 are used if shortVideoHeader==0, + * and escape mode 4 is used when shortVideoHeader==1. + * [in] start start indicates whether the encoding begins with + * 0th element or 1st. + * [in/out] pLast pointer to last status flag + * [in] runBeginSingleLevelEntriesL0 The run value from which level + * will be equal to 1: last == 0 + * [in] IndexBeginSingleLevelEntriesL0 Array index in the VLC table + * pointing to the + * runBeginSingleLevelEntriesL0 + * [in] runBeginSingleLevelEntriesL1 The run value from which level + * will be equal to 1: last == 1 + * [in] IndexBeginSingleLevelEntriesL1 Array index in the VLC table + * pointing to the + * runBeginSingleLevelEntriesL0 + * [in] pRunIndexTableL0 Run Index table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 0 + * [in] pVlcTableL0 VLC table for last == 0 + * [in] pRunIndexTableL1 Run Index table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 1 + * [in] pVlcTableL1 VLC table for last == 1 + * [in] pLMAXTableL0 Level MAX table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 0 + * [in] pLMAXTableL1 Level MAX table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 1 + * [in] pRMAXTableL0 Run MAX table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 0 + * [in] pRMAXTableL1 Run MAX table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 1 + * [out]pDst pointer to the coefficient buffer of current + * block. Should be 32-bit aligned + * + * Return Value: + * Standard OMXResult result. See enumeration for possible result codes. + * + */ + +OMXResult armVCM4P2_GetVLCBits ( + const OMX_U8 **ppBitStream, + OMX_INT * pBitOffset, + OMX_S16 * pDst, + OMX_INT shortVideoHeader, + OMX_U8 start, + OMX_U8 * pLast, + OMX_U8 runBeginSingleLevelEntriesL0, + OMX_U8 maxIndexForMultipleEntriesL0, + OMX_U8 maxRunForMultipleEntriesL1, + OMX_U8 maxIndexForMultipleEntriesL1, + const OMX_U8 * pRunIndexTableL0, + const ARM_VLC32 *pVlcTableL0, + const OMX_U8 * pRunIndexTableL1, + const ARM_VLC32 *pVlcTableL1, + const OMX_U8 * pLMAXTableL0, + const OMX_U8 * pLMAXTableL1, + const OMX_U8 * pRMAXTableL0, + const OMX_U8 * pRMAXTableL1, + const OMX_U8 * pZigzagTable +); + +/** + * Function: armVCM4P2_PutVLCBits + * + * Description: + * Checks the type of Escape Mode and put encoded bits for + * quantized DCT coefficients. + * + * Remarks: + * + * Parameters: + * [in] ppBitStream pointer to the pointer to the current byte in + * the bit stream + * [in] pBitOffset pointer to the bit position in the byte pointed + * by *ppBitStream. Valid within 0 to 7 + * [in] shortVideoHeader binary flag indicating presence of short_video_header; escape modes 0-3 are used if shortVideoHeader==0, + * and escape mode 4 is used when shortVideoHeader==1. + * [in] start start indicates whether the encoding begins with + * 0th element or 1st. + * [in] maxStoreRunL0 Max store possible (considering last and inter/intra) + * for last = 0 + * [in] maxStoreRunL1 Max store possible (considering last and inter/intra) + * for last = 1 + * [in] maxRunForMultipleEntriesL0 + * The run value after which level + * will be equal to 1: + * (considering last and inter/intra status) for last = 0 + * [in] maxRunForMultipleEntriesL1 + * The run value after which level + * will be equal to 1: + * (considering last and inter/intra status) for last = 1 + * [in] pRunIndexTableL0 Run Index table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 0 + * [in] pVlcTableL0 VLC table for last == 0 + * [in] pRunIndexTableL1 Run Index table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 1 + * [in] pVlcTableL1 VLC table for last == 1 + * [in] pLMAXTableL0 Level MAX table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 0 + * [in] pLMAXTableL1 Level MAX table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 1 + * [in] pRMAXTableL0 Run MAX table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 0 + * [in] pRMAXTableL1 Run MAX table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 1 + * [out] pQDctBlkCoef pointer to the quantized DCT coefficient + * [out] ppBitStream *ppBitStream is updated after the block is encoded + * so that it points to the current byte in the bit + * stream buffer. + * [out] pBitOffset *pBitOffset is updated so that it points to the + * current bit position in the byte pointed by + * *ppBitStream. + * + * Return Value: + * Standard OMXResult result. See enumeration for possible result codes. + * + */ + + +OMXResult armVCM4P2_PutVLCBits ( + OMX_U8 **ppBitStream, + OMX_INT * pBitOffset, + const OMX_S16 *pQDctBlkCoef, + OMX_INT shortVideoHeader, + OMX_U8 start, + OMX_U8 maxStoreRunL0, + OMX_U8 maxStoreRunL1, + OMX_U8 maxRunForMultipleEntriesL0, + OMX_U8 maxRunForMultipleEntriesL1, + const OMX_U8 * pRunIndexTableL0, + const ARM_VLC32 *pVlcTableL0, + const OMX_U8 * pRunIndexTableL1, + const ARM_VLC32 *pVlcTableL1, + const OMX_U8 * pLMAXTableL0, + const OMX_U8 * pLMAXTableL1, + const OMX_U8 * pRMAXTableL0, + const OMX_U8 * pRMAXTableL1, + const OMX_U8 * pZigzagTable +); +/** + * Function: armVCM4P2_FillVLCBuffer + * + * Description: + * Performs calculating the VLC bits depending on the escape type and insert + * the same in the bitstream + * + * Remarks: + * + * Parameters: + * [in] ppBitStream pointer to the pointer to the current byte in + * the bit stream + * [in] pBitOffset pointer to the bit position in the byte pointed + * by *ppBitStream. Valid within 0 to 7 + * [in] run Run value (count of zeros) to be encoded + * [in] level Level value (non-zero value) to be encoded + * [in] runPlus Calculated as runPlus = run - (RMAX + 1) + * [in] levelPlus Calculated as + * levelPlus = sign(level)*[abs(level) - LMAX] + * [in] fMode Flag indicating the escape modes + * [in] last status of the last flag + * [in] maxRunForMultipleEntries + * The run value after which level will be equal to 1: + * (considering last and inter/intra status) + * [in] pRunIndexTable Run Index table defined in + * armVCM4P2_Huff_tables_VLC.h + * [in] pVlcTable VLC table defined in armVCM4P2_Huff_tables_VLC.h + * [out] ppBitStream *ppBitStream is updated after the block is encoded + * so that it points to the current byte in the bit + * stream buffer. + * [out] pBitOffset *pBitOffset is updated so that it points to the + * current bit position in the byte pointed by + * *ppBitStream. + * + * Return Value: + * Standard OMXResult result. See enumeration for possible result codes. + * + */ + +OMXResult armVCM4P2_FillVLCBuffer ( + OMX_U8 **ppBitStream, + OMX_INT * pBitOffset, + OMX_U32 run, + OMX_S16 level, + OMX_U32 runPlus, + OMX_S16 levelPlus, + OMX_U8 fMode, + OMX_U8 last, + OMX_U8 maxRunForMultipleEntries, + const OMX_U8 *pRunIndexTable, + const ARM_VLC32 *pVlcTable +); + +/** + * Function: armVCM4P2_CheckVLCEscapeMode + * + * Description: + * Performs escape mode decision based on the run, run+, level, level+ and + * last combinations. + * + * Remarks: + * + * Parameters: + * [in] run Run value (count of zeros) to be encoded + * [in] level Level value (non-zero value) to be encoded + * [in] runPlus Calculated as runPlus = run - (RMAX + 1) + * [in] levelPlus Calculated as + * levelPlus = sign(level)*[abs(level) - LMAX] + * [in] maxStoreRun Max store possible (considering last and inter/intra) + * [in] maxRunForMultipleEntries + * The run value after which level + * will be equal to 1: + * (considering last and inter/intra status) + * [in] shortVideoHeader binary flag indicating presence of short_video_header; escape modes 0-3 are used if shortVideoHeader==0, + * and escape mode 4 is used when shortVideoHeader==1. + * [in] pRunIndexTable Run Index table defined in + * armVCM4P2_Huff_Tables_VLC.c + * (considering last and inter/intra status) + * + * + * Return Value: + * Returns an Escape mode which can take values from 0 to 3 + * 0 --> no escape mode, 1 --> escape type 1, + * 1 --> escape type 2, 3 --> escape type 3, check section 7.4.1.3 + * in the MPEG ISO standard. + * + */ + +OMX_U8 armVCM4P2_CheckVLCEscapeMode( + OMX_U32 run, + OMX_U32 runPlus, + OMX_S16 level, + OMX_S16 levelPlus, + OMX_U8 maxStoreRun, + OMX_U8 maxRunForMultipleEntries, + OMX_INT shortVideoHeader, + const OMX_U8 *pRunIndexTable +); + + +/** + * Function: armVCM4P2_BlockMatch_Integer + * + * Description: + * Performs a 16x16 block search; estimates motion vector and associated minimum SAD. + * Both the input and output motion vectors are represented using half-pixel units, and + * therefore a shift left or right by 1 bit may be required, respectively, to match the + * input or output MVs with other functions that either generate output MVs or expect + * input MVs represented using integer pixel units. + * + * Remarks: + * + * Parameters: + * [in] pSrcRefBuf pointer to the reference Y plane; points to the reference MB that + * corresponds to the location of the current macroblock in the current + * plane. + * [in] refWidth width of the reference plane + * [in] pRefRect pointer to the valid rectangular in reference plane. Relative to image origin. + * It's not limited to the image boundary, but depended on the padding. For example, + * if you pad 4 pixels outside the image border, then the value for left border + * can be -4 + * [in] pSrcCurrBuf pointer to the current macroblock extracted from original plane (linear array, + * 256 entries); must be aligned on an 8-byte boundary. + * [in] pCurrPointPos position of the current macroblock in the current plane + * [in] pSrcPreMV pointer to predicted motion vector; NULL indicates no predicted MV + * [in] pSrcPreSAD pointer to SAD associated with the predicted MV (referenced by pSrcPreMV) + * [in] searchRange search range for 16X16 integer block,the units of it is full pixel,the search range + * is the same in all directions.It is in inclusive of the boundary and specified in + * terms of integer pixel units. + * [in] pMESpec vendor-specific motion estimation specification structure; must have been allocated + * and then initialized using omxVCM4P2_MEInit prior to calling the block matching + * function. + * [in] BlockSize MacroBlock Size i.e either 16x16 or 8x8. + * [out] pDstMV pointer to estimated MV + * [out] pDstSAD pointer to minimum SAD + * + * Return Value: + * OMX_Sts_NoErr ¨C no error. + * OMX_Sts_BadArgErr ¨C bad arguments + * + */ + +OMXResult armVCM4P2_BlockMatch_Integer( + const OMX_U8 *pSrcRefBuf, + OMX_INT refWidth, + const OMXRect *pRefRect, + const OMX_U8 *pSrcCurrBuf, + const OMXVCM4P2Coordinate *pCurrPointPos, + const OMXVCMotionVector *pSrcPreMV, + const OMX_INT *pSrcPreSAD, + void *pMESpec, + OMXVCMotionVector *pDstMV, + OMX_INT *pDstSAD, + OMX_U8 BlockSize +); + +/** + * Function: armVCM4P2_BlockMatch_Half + * + * Description: + * Performs a 16x16 block match with half-pixel resolution. Returns the estimated + * motion vector and associated minimum SAD. This function estimates the half-pixel + * motion vector by interpolating the integer resolution motion vector referenced + * by the input parameter pSrcDstMV, i.e., the initial integer MV is generated + * externally. The input parameters pSrcRefBuf and pSearchPointRefPos should be + * shifted by the winning MV of 16x16 integer search prior to calling BlockMatch_Half_16x16. + * The function BlockMatch_Integer_16x16 may be used for integer motion estimation. + * + * Remarks: + * + * Parameters: + * [in] pSrcRefBuf pointer to the reference Y plane; points to the reference MB + * that corresponds to the location of the current macroblock in + * the current plane. + * [in] refWidth width of the reference plane + * [in] pRefRect reference plane valid region rectangle + * [in] pSrcCurrBuf pointer to the current macroblock extracted from original plane + * (linear array, 256 entries); must be aligned on an 8-byte boundary. + * [in] pSearchPointRefPos position of the starting point for half pixel search (specified + * in terms of integer pixel units) in the reference plane. + * [in] rndVal rounding control bit for half pixel motion estimation; + * 0=rounding control disabled; 1=rounding control enabled + * [in] pSrcDstMV pointer to the initial MV estimate; typically generated during a prior + * 16X16 integer search and its unit is half pixel. + * [in] BlockSize MacroBlock Size i.e either 16x16 or 8x8. + * [out]pSrcDstMV pointer to estimated MV + * [out]pDstSAD pointer to minimum SAD + * + * Return Value: + * OMX_Sts_NoErr ¨C no error + * OMX_Sts_BadArgErr ¨C bad arguments + * + */ + +OMXResult armVCM4P2_BlockMatch_Half( + const OMX_U8 *pSrcRefBuf, + OMX_INT refWidth, + const OMXRect *pRefRect, + const OMX_U8 *pSrcCurrBuf, + const OMXVCM4P2Coordinate *pSearchPointRefPos, + OMX_INT rndVal, + OMXVCMotionVector *pSrcDstMV, + OMX_INT *pDstSAD, + OMX_U8 BlockSize +); +/** + * Function: armVCM4P2_PadMV + * + * Description: + * Performs motion vector padding for a macroblock. + * + * Remarks: + * + * Parameters: + * [in] pSrcDstMV pointer to motion vector buffer of the current + * macroblock + * [in] pTransp pointer to transparent status buffer of the + * current macroblock + * [out] pSrcDstMV pointer to motion vector buffer in which the + * motion vectors have been padded + * Return Value: + * Standard OMXResult result. See enumeration for possible result codes. + * + */ + +OMXResult armVCM4P2_PadMV( + OMXVCMotionVector * pSrcDstMV, + OMX_U8 * pTransp +); + +/* + * H.264 Specific Declarations + */ +/* Defines */ +#define ARM_M4P10_Q_OFFSET (15) + + +/* Dequant tables */ + +extern const OMX_U8 armVCM4P10_PosToVCol4x4[16]; +extern const OMX_U8 armVCM4P10_PosToVCol2x2[4]; +extern const OMX_U8 armVCM4P10_VMatrix[6][3]; +extern const OMX_U32 armVCM4P10_MFMatrix[6][3]; + + +/* + * Description: + * This function perform the work required by the OpenMAX + * DecodeCoeffsToPair function and DecodeChromaDCCoeffsToPair. + * Since most of the code is common we share it here. + * + * Parameters: + * [in] ppBitStream Double pointer to current byte in bit stream buffer + * [in] pOffset Pointer to current bit position in the byte pointed + * to by *ppBitStream + * [in] sMaxNumCoeff Maximum number of non-zero coefficients in current + * block (4,15 or 16) + * [in] nTable Table number (0 to 4) according to the five columns + * of Table 9-5 in the H.264 spec + * [out] ppBitStream *ppBitStream is updated after each block is decoded + * [out] pOffset *pOffset is updated after each block is decoded + * [out] pNumCoeff Pointer to the number of nonzero coefficients in + * this block + * [out] ppPosCoefbuf Double pointer to destination residual + * coefficient-position pair buffer + * Return Value: + * Standard omxError result. See enumeration for possible result codes. + + */ + +OMXResult armVCM4P10_DecodeCoeffsToPair( + const OMX_U8** ppBitStream, + OMX_S32* pOffset, + OMX_U8* pNumCoeff, + OMX_U8**ppPosCoefbuf, + OMX_INT nTable, + OMX_INT sMaxNumCoeff + ); + +/* + * Description: + * Perform DC style intra prediction, averaging upper and left block + * + * Parameters: + * [in] pSrcLeft Pointer to the buffer of 16 left coefficients: + * p[x, y] (x = -1, y = 0..3) + * [in] pSrcAbove Pointer to the buffer of 16 above coefficients: + * p[x,y] (x = 0..3, y = -1) + * [in] leftStep Step of left coefficient buffer + * [in] dstStep Step of the destination buffer + * [in] availability Neighboring 16x16 MB availability flag + * [out] pDst Pointer to the destination buffer + * + * Return Value: + * None + */ + +void armVCM4P10_PredictIntraDC4x4( + const OMX_U8* pSrcLeft, + const OMX_U8 *pSrcAbove, + OMX_U8* pDst, + OMX_INT leftStep, + OMX_INT dstStep, + OMX_S32 availability +); + +/* + * Description + * Unpack a 4x4 block of coefficient-residual pair values + * + * Parameters: + * [in] ppSrc Double pointer to residual coefficient-position pair + * buffer output by CALVC decoding + * [out] ppSrc *ppSrc is updated to the start of next non empty block + * [out] pDst Pointer to unpacked 4x4 block + */ + +void armVCM4P10_UnpackBlock4x4( + const OMX_U8 **ppSrc, + OMX_S16* pDst +); + +/* + * Description + * Unpack a 2x2 block of coefficient-residual pair values + * + * Parameters: + * [in] ppSrc Double pointer to residual coefficient-position pair + * buffer output by CALVC decoding + * [out] ppSrc *ppSrc is updated to the start of next non empty block + * [out] pDst Pointer to unpacked 4x4 block + */ + +void armVCM4P10_UnpackBlock2x2( + const OMX_U8 **ppSrc, + OMX_S16* pDst +); + +/* + * Description + * Deblock one boundary pixel + * + * Parameters: + * [in] pQ0 Pointer to pixel q0 + * [in] Step Step between pixels q0 and q1 + * [in] tC0 Edge threshold value + * [in] alpha alpha threshold value + * [in] beta beta threshold value + * [in] bS deblocking strength + * [in] ChromaFlag True for chroma blocks + * [out] pQ0 Deblocked pixels + * + */ + +void armVCM4P10_DeBlockPixel( + OMX_U8 *pQ0, /* pointer to the pixel q0 */ + int Step, /* step between pixels q0 and q1 */ + int tC0, /* edge threshold value */ + int alpha, /* alpha */ + int beta, /* beta */ + int bS, /* deblocking strength */ + int ChromaFlag +); + +/** + * Function: armVCM4P10_InterpolateHalfHor_Luma + * + * Description: + * This function performs interpolation for horizontal 1/2-pel positions + * + * Remarks: + * + * [in] pSrc Pointer to top-left corner of block used to interpolate + in the reconstructed frame plane + * [in] iSrcStep Step of the source buffer. + * [in] iDstStep Step of the destination(interpolation) buffer. + * [in] iWidth Width of the current block + * [in] iHeight Height of the current block + * [out] pDst Pointer to the interpolation buffer of the 1/2-pel + * + * Return Value: + * Standard OMXResult value. + * + */ + +OMXResult armVCM4P10_InterpolateHalfHor_Luma( + const OMX_U8* pSrc, + OMX_U32 iSrcStep, + OMX_U8* pDst, + OMX_U32 iDstStep, + OMX_U32 iWidth, + OMX_U32 iHeight +); + +/** + * Function: armVCM4P10_InterpolateHalfVer_Luma + * + * Description: + * This function performs interpolation for vertical 1/2-pel positions + * around a full-pel position. + * + * Remarks: + * + * [in] pSrc Pointer to top-left corner of block used to interpolate + * in the reconstructed frame plane + * [in] iSrcStep Step of the source buffer. + * [in] iDstStep Step of the destination(interpolation) buffer. + * [in] iWidth Width of the current block + * [in] iHeight Height of the current block + * [out] pDst Pointer to the interpolation buffer of the 1/2-pel + * + * Return Value: + * Standard OMXResult value. + * + */ + +OMXResult armVCM4P10_InterpolateHalfVer_Luma( + const OMX_U8* pSrc, + OMX_U32 iSrcStep, + OMX_U8* pDst, + OMX_U32 iDstStep, + OMX_U32 iWidth, + OMX_U32 iHeight +); + +/** + * Function: armVCM4P10_InterpolateHalfDiag_Luma + * + * Description: + * This function performs interpolation for (1/2, 1/2) positions + * around a full-pel position. + * + * Remarks: + * + * [in] pSrc Pointer to top-left corner of block used to interpolate + * in the reconstructed frame plane + * [in] iSrcStep Step of the source buffer. + * [in] iDstStep Step of the destination(interpolation) buffer. + * [in] iWidth Width of the current block + * [in] iHeight Height of the current block + * [out] pDst Pointer to the interpolation buffer of the (1/2,1/2)-pel + * + * Return Value: + * Standard OMXResult value. + * + */ + +OMXResult armVCM4P10_InterpolateHalfDiag_Luma( + const OMX_U8* pSrc, + OMX_U32 iSrcStep, + OMX_U8* pDst, + OMX_U32 iDstStep, + OMX_U32 iWidth, + OMX_U32 iHeight +); + +/* + * Description: + * Transform Residual 4x4 Coefficients + * + * Parameters: + * [in] pSrc Source 4x4 block + * [out] pDst Destination 4x4 block + * + */ + +void armVCM4P10_TransformResidual4x4(OMX_S16* pDst, OMX_S16 *pSrc); + +/* + * Description: + * Forward Transform Residual 4x4 Coefficients + * + * Parameters: + * [in] pSrc Source 4x4 block + * [out] pDst Destination 4x4 block + * + */ + +void armVCM4P10_FwdTransformResidual4x4(OMX_S16* pDst, OMX_S16 *pSrc); + +OMX_INT armVCM4P10_CompareMotionCostToMV ( + OMX_S16 mvX, + OMX_S16 mvY, + OMXVCMotionVector diffMV, + OMX_INT candSAD, + OMXVCMotionVector *bestMV, + OMX_U32 nLamda, + OMX_S32 *pBestCost); + +/** + * Function: armVCCOMM_SAD + * + * Description: + * This function calculate the SAD for NxM blocks. + * + * Remarks: + * + * [in] pSrcOrg Pointer to the original block + * [in] iStepOrg Step of the original block buffer + * [in] pSrcRef Pointer to the reference block + * [in] iStepRef Step of the reference block buffer + * [in] iHeight Height of the block + * [in] iWidth Width of the block + * [out] pDstSAD Pointer of result SAD + * + * Return Value: + * Standard OMXResult value. + * + */ +OMXResult armVCCOMM_SAD( + const OMX_U8* pSrcOrg, + OMX_U32 iStepOrg, + const OMX_U8* pSrcRef, + OMX_U32 iStepRef, + OMX_S32* pDstSAD, + OMX_U32 iHeight, + OMX_U32 iWidth); + +/** + * Function: armVCCOMM_Average + * + * Description: + * This function calculates the average of two blocks and stores the result. + * + * Remarks: + * + * [in] pPred0 Pointer to the top-left corner of reference block 0 + * [in] pPred1 Pointer to the top-left corner of reference block 1 + * [in] iPredStep0 Step of reference block 0 + * [in] iPredStep1 Step of reference block 1 + * [in] iDstStep Step of the destination buffer + * [in] iWidth Width of the blocks + * [in] iHeight Height of the blocks + * [out] pDstPred Pointer to the destination buffer + * + * Return Value: + * Standard OMXResult value. + * + */ + OMXResult armVCCOMM_Average ( + const OMX_U8* pPred0, + const OMX_U8* pPred1, + OMX_U32 iPredStep0, + OMX_U32 iPredStep1, + OMX_U8* pDstPred, + OMX_U32 iDstStep, + OMX_U32 iWidth, + OMX_U32 iHeight +); + +/** + * Function: armVCM4P10_SADQuar + * + * Description: + * This function calculates the SAD between one block (pSrc) and the + * average of the other two (pSrcRef0 and pSrcRef1) + * + * Remarks: + * + * [in] pSrc Pointer to the original block + * [in] pSrcRef0 Pointer to reference block 0 + * [in] pSrcRef1 Pointer to reference block 1 + * [in] iSrcStep Step of the original block buffer + * [in] iRefStep0 Step of reference block 0 + * [in] iRefStep1 Step of reference block 1 + * [in] iHeight Height of the block + * [in] iWidth Width of the block + * [out] pDstSAD Pointer of result SAD + * + * Return Value: + * Standard OMXResult value. + * + */ +OMXResult armVCM4P10_SADQuar( + const OMX_U8* pSrc, + const OMX_U8* pSrcRef0, + const OMX_U8* pSrcRef1, + OMX_U32 iSrcStep, + OMX_U32 iRefStep0, + OMX_U32 iRefStep1, + OMX_U32* pDstSAD, + OMX_U32 iHeight, + OMX_U32 iWidth +); + +/** + * Function: armVCM4P10_Interpolate_Chroma + * + * Description: + * This function performs interpolation for chroma components. + * + * Remarks: + * + * [in] pSrc Pointer to top-left corner of block used to + * interpolate in the reconstructed frame plane + * [in] iSrcStep Step of the source buffer. + * [in] iDstStep Step of the destination(interpolation) buffer. + * [in] iWidth Width of the current block + * [in] iHeight Height of the current block + * [in] dx Fractional part of horizontal motion vector + * component in 1/8 pixel unit (0~7) + * [in] dy Fractional part of vertical motion vector + * component in 1/8 pixel unit (0~7) + * [out] pDst Pointer to the interpolation buffer + * + * Return Value: + * Standard OMXResult value. + * + */ + OMXResult armVCM4P10_Interpolate_Chroma( + OMX_U8 *pSrc, + OMX_U32 iSrcStep, + OMX_U8 *pDst, + OMX_U32 iDstStep, + OMX_U32 iWidth, + OMX_U32 iHeight, + OMX_U32 dx, + OMX_U32 dy +); + +/** + * Function: armVCM4P10_Interpolate_Luma + * + * Description: + * This function performs interpolation for luma components. + * + * Remarks: + * + * [in] pSrc Pointer to top-left corner of block used to + * interpolate in the reconstructed frame plane + * [in] iSrcStep Step of the source buffer. + * [in] iDstStep Step of the destination(interpolation) buffer. + * [in] iWidth Width of the current block + * [in] iHeight Height of the current block + * [in] dx Fractional part of horizontal motion vector + * component in 1/4 pixel unit (0~3) + * [in] dy Fractional part of vertical motion vector + * component in 1/4 pixel unit (0~3) + * [out] pDst Pointer to the interpolation buffer + * + * Return Value: + * Standard OMXResult value. + * + */ + + OMXResult armVCM4P10_Interpolate_Luma( + const OMX_U8 *pSrc, + OMX_U32 iSrcStep, + OMX_U8 *pDst, + OMX_U32 iDstStep, + OMX_U32 iWidth, + OMX_U32 iHeight, + OMX_U32 dx, + OMX_U32 dy +); + +/** + * Function: omxVCH264_DequantTransformACFromPair_U8_S16_C1_DLx + * + * Description: + * Reconstruct the 4x4 residual block from coefficient-position pair buffer, + * perform dequantisation and integer inverse transformation for 4x4 block of + * residuals and update the pair buffer pointer to next non-empty block. + * + * Remarks: + * + * Parameters: + * [in] ppSrc Double pointer to residual coefficient-position + * pair buffer output by CALVC decoding + * [in] pDC Pointer to the DC coefficient of this block, NULL + * if it doesn't exist + * [in] QP Quantization parameter + * [in] AC Flag indicating if at least one non-zero coefficient exists + * [out] pDst pointer to the reconstructed 4x4 block data + * + * Return Value: + * Standard omxError result. See enumeration for possible result codes. + * + */ + +OMXResult armVCM4P10_DequantTransformACFromPair_U8_S16_C1_DLx( + OMX_U8 **ppSrc, + OMX_S16 *pDst, + OMX_INT QP, + OMX_S16* pDC, + int AC +); + +#endif /*_armVideo_H_*/ + +/*End of File*/ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/armVCCOMM_s.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/armVCCOMM_s.h new file mode 100644 index 0000000000000000000000000000000000000000..7f0a9b809746267c1fa34acfde0a317f6558ad99 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/armVCCOMM_s.h @@ -0,0 +1,72 @@ +;// +;// +;// File Name: armVCCOMM_s.h +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// +;// ARM optimized OpenMAX AC header file +;// +;// Formula used: +;// MACRO for calculating median for three values. + + + + IF :LNOT::DEF:ARMVCCOMM_S_H + INCLUDE armCOMM_s.h + M_VARIANTS CortexA8, ARM1136JS + + IF ARM1136JS :LOR: CortexA8 + + ;///* + ;// * Macro: M_MEDIAN3 + ;// * + ;// * Description: Finds the median of three numbers + ;// * + ;// * Remarks: + ;// * + ;// * Parameters: + ;// * [in] x First entry for the list of three numbers. + ;// * [in] y Second entry for the list of three numbers. + ;// * Input value may be corrupted at the end of + ;// * the execution of this macro. + ;// * [in] z Third entry of the list of three numbers. + ;// * Input value corrupted at the end of the + ;// * execution of this macro. + ;// * [in] t Temporary scratch register. + ;// * [out]z Median of the three numbers. + ;// */ + + MACRO + + M_MEDIAN3 $x, $y, $z, $t + + SUBS $t, $y, $z; // if (y < z) + ADDLT $z, $z, $t; // swap y and z + SUBLT $y, $y, $t; + + ;// Now z' <= y', so there are three cases for the + ;// median value, depending on x. + + ;// 1) x <= z' <= y' : median value is z' + ;// 2) z' <= x <= y' : median value is x + ;// 3) z' <= y' <= x : median value is y' + + CMP $z, $x; // if ( x > min(y,z) ) + MOVLT $z, $x; // ans = x + + CMP $x, $y; // if ( x > max(y,z) ) + MOVGT $z, $y; // ans = max(y,z) + + MEND + ENDIF + + + + ENDIF ;// ARMACCOMM_S_H + + END \ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/omxVC.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/omxVC.h new file mode 100644 index 0000000000000000000000000000000000000000..7b3cc7289554a10744eacffc0d0af5ef39d61e8c --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/omxVC.h @@ -0,0 +1,4381 @@ +/** + * File: omxVC.h + * Brief: OpenMAX DL v1.0.2 - Video Coding library + * + * Copyright © 2005-2008 The Khronos Group Inc. All Rights Reserved. + * + * These materials are protected by copyright laws and contain material + * proprietary to the Khronos Group, Inc. You may use these materials + * for implementing Khronos specifications, without altering or removing + * any trademark, copyright or other notice from the specification. + * + * Khronos Group makes no, and expressly disclaims any, representations + * or warranties, express or implied, regarding these materials, including, + * without limitation, any implied warranties of merchantability or fitness + * for a particular purpose or non-infringement of any intellectual property. + * Khronos Group makes no, and expressly disclaims any, warranties, express + * or implied, regarding the correctness, accuracy, completeness, timeliness, + * and reliability of these materials. + * + * Under no circumstances will the Khronos Group, or any of its Promoters, + * Contributors or Members or their respective partners, officers, directors, + * employees, agents or representatives be liable for any damages, whether + * direct, indirect, special or consequential damages for lost revenues, + * lost profits, or otherwise, arising from or in connection with these + * materials. + * + * Khronos and OpenMAX are trademarks of the Khronos Group Inc. + * + */ + +/* *****************************************************************************************/ + +#ifndef _OMXVC_H_ +#define _OMXVC_H_ + +#include "omxtypes.h" + +#ifdef __cplusplus +extern "C" { +#endif + + +/* 6.1.1.1 Motion Vectors */ +/* In omxVC, motion vectors are represented as follows: */ + +typedef struct { + OMX_S16 dx; + OMX_S16 dy; +} OMXVCMotionVector; + + + +/** + * Function: omxVCCOMM_Average_8x (6.1.3.1.1) + * + * Description: + * This function calculates the average of two 8x4, 8x8, or 8x16 blocks. The + * result is rounded according to (a+b+1)/2. The block average function can + * be used in conjunction with half-pixel interpolation to obtain quarter + * pixel motion estimates, as described in [ISO14496-10], subclause 8.4.2.2.1. + * + * Input Arguments: + * + * pPred0 - Pointer to the top-left corner of reference block 0 + * pPred1 - Pointer to the top-left corner of reference block 1 + * iPredStep0 - Step of reference block 0 + * iPredStep1 - Step of reference block 1 + * iDstStep - Step of the destination buffer. + * iHeight - Height of the blocks + * + * Output Arguments: + * + * pDstPred - Pointer to the destination buffer. 8-byte aligned. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned under any of the following + * conditions: + * - one or more of the following pointers is NULL: pPred0, pPred1, or + * pDstPred. + * - pDstPred is not aligned on an 8-byte boundary. + * - iPredStep0 <= 0 or iPredStep0 is not a multiple of 8. + * - iPredStep1 <= 0 or iPredStep1 is not a multiple of 8. + * - iDstStep <= 0 or iDstStep is not a multiple of 8. + * - iHeight is not 4, 8, or 16. + * + */ +OMXResult omxVCCOMM_Average_8x ( + const OMX_U8 *pPred0, + const OMX_U8 *pPred1, + OMX_U32 iPredStep0, + OMX_U32 iPredStep1, + OMX_U8 *pDstPred, + OMX_U32 iDstStep, + OMX_U32 iHeight +); + + + +/** + * Function: omxVCCOMM_Average_16x (6.1.3.1.2) + * + * Description: + * This function calculates the average of two 16x16 or 16x8 blocks. The + * result is rounded according to (a+b+1)/2. The block average function can + * be used in conjunction with half-pixel interpolation to obtain quarter + * pixel motion estimates, as described in [ISO14496-10], subclause 8.4.2.2.1. + * + * Input Arguments: + * + * pPred0 - Pointer to the top-left corner of reference block 0 + * pPred1 - Pointer to the top-left corner of reference block 1 + * iPredStep0 - Step of reference block 0 + * iPredStep1 - Step of reference block 1 + * iDstStep - Step of the destination buffer + * iHeight - Height of the blocks + * + * Output Arguments: + * + * pDstPred - Pointer to the destination buffer. 16-byte aligned. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned under any of the following + * conditions: + * - one or more of the following pointers is NULL: pPred0, pPred1, or + * pDstPred. + * - pDstPred is not aligned on a 16-byte boundary. + * - iPredStep0 <= 0 or iPredStep0 is not a multiple of 16. + * - iPredStep1 <= 0 or iPredStep1 is not a multiple of 16. + * - iDstStep <= 0 or iDstStep is not a multiple of 16. + * - iHeight is not 8 or 16. + * + */ +OMXResult omxVCCOMM_Average_16x ( + const OMX_U8 *pPred0, + const OMX_U8 *pPred1, + OMX_U32 iPredStep0, + OMX_U32 iPredStep1, + OMX_U8 *pDstPred, + OMX_U32 iDstStep, + OMX_U32 iHeight +); + + + +/** + * Function: omxVCCOMM_ExpandFrame_I (6.1.3.2.1) + * + * Description: + * This function expands a reconstructed frame in-place. The unexpanded + * source frame should be stored in a plane buffer with sufficient space + * pre-allocated for edge expansion, and the input frame should be located in + * the plane buffer center. This function executes the pixel expansion by + * replicating source frame edge pixel intensities in the empty pixel + * locations (expansion region) between the source frame edge and the plane + * buffer edge. The width/height of the expansion regions on the + * horizontal/vertical edges is controlled by the parameter iExpandPels. + * + * Input Arguments: + * + * pSrcDstPlane - pointer to the top-left corner of the frame to be + * expanded; must be aligned on an 8-byte boundary. + * iFrameWidth - frame width; must be a multiple of 8. + * iFrameHeight -frame height; must be a multiple of 8. + * iExpandPels - number of pixels to be expanded in the horizontal and + * vertical directions; must be a multiple of 8. + * iPlaneStep - distance, in bytes, between the start of consecutive lines + * in the plane buffer; must be larger than or equal to + * (iFrameWidth + 2 * iExpandPels). + * + * Output Arguments: + * + * pSrcDstPlane -Pointer to the top-left corner of the frame (NOT the + * top-left corner of the plane); must be aligned on an 8-byte + * boundary. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned under any of the following + * conditions: + * - pSrcDstPlane is NULL. + * - pSrcDstPlane is not aligned on an 8-byte boundary. + * - one of the following parameters is either equal to zero or is a + * non-multiple of 8: iFrameHeight, iFrameWidth, iPlaneStep, or + * iExpandPels. + * - iPlaneStep < (iFrameWidth + 2 * iExpandPels). + * + */ +OMXResult omxVCCOMM_ExpandFrame_I ( + OMX_U8 *pSrcDstPlane, + OMX_U32 iFrameWidth, + OMX_U32 iFrameHeight, + OMX_U32 iExpandPels, + OMX_U32 iPlaneStep +); + + + +/** + * Function: omxVCCOMM_Copy8x8 (6.1.3.3.1) + * + * Description: + * Copies the reference 8x8 block to the current block. + * + * Input Arguments: + * + * pSrc - pointer to the reference block in the source frame; must be + * aligned on an 8-byte boundary. + * step - distance between the starts of consecutive lines in the reference + * frame, in bytes; must be a multiple of 8 and must be larger than + * or equal to 8. + * + * Output Arguments: + * + * pDst - pointer to the destination block; must be aligned on an 8-byte + * boundary. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned under any of the following + * conditions: + * - one or more of the following pointers is NULL: pSrc, pDst + * - one or more of the following pointers is not aligned on an 8-byte + * boundary: pSrc, pDst + * - step <8 or step is not a multiple of 8. + * + */ +OMXResult omxVCCOMM_Copy8x8 ( + const OMX_U8 *pSrc, + OMX_U8 *pDst, + OMX_INT step +); + + + +/** + * Function: omxVCCOMM_Copy16x16 (6.1.3.3.2) + * + * Description: + * Copies the reference 16x16 macroblock to the current macroblock. + * + * Input Arguments: + * + * pSrc - pointer to the reference macroblock in the source frame; must be + * aligned on a 16-byte boundary. + * step - distance between the starts of consecutive lines in the reference + * frame, in bytes; must be a multiple of 16 and must be larger + * than or equal to 16. + * + * Output Arguments: + * + * pDst - pointer to the destination macroblock; must be aligned on a + * 16-byte boundary. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned under any of the following + * conditions: + * - one or more of the following pointers is NULL: pSrc, pDst + * - one or more of the following pointers is not aligned on a 16-byte + * boundary: pSrc, pDst + * - step <16 or step is not a multiple of 16. + * + */ +OMXResult omxVCCOMM_Copy16x16 ( + const OMX_U8 *pSrc, + OMX_U8 *pDst, + OMX_INT step +); + + + +/** + * Function: omxVCCOMM_ComputeTextureErrorBlock_SAD (6.1.4.1.1) + * + * Description: + * Computes texture error of the block; also returns SAD. + * + * Input Arguments: + * + * pSrc - pointer to the source plane; must be aligned on an 8-byte + * boundary. + * srcStep - step of the source plane + * pSrcRef - pointer to the reference buffer, an 8x8 block; must be aligned + * on an 8-byte boundary. + * + * Output Arguments: + * + * pDst - pointer to the destination buffer, an 8x8 block; must be aligned + * on an 8-byte boundary. + * pDstSAD - pointer to the Sum of Absolute Differences (SAD) value + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments + * - At least one of the following + * pointers is NULL: pSrc, pSrcRef, pDst and pDstSAD. + * - pSrc is not 8-byte aligned. + * - SrcStep <= 0 or srcStep is not a multiple of 8. + * - pSrcRef is not 8-byte aligned. + * - pDst is not 8-byte aligned. + * + */ +OMXResult omxVCCOMM_ComputeTextureErrorBlock_SAD ( + const OMX_U8 *pSrc, + OMX_INT srcStep, + const OMX_U8 *pSrcRef, + OMX_S16 *pDst, + OMX_INT *pDstSAD +); + + + +/** + * Function: omxVCCOMM_ComputeTextureErrorBlock (6.1.4.1.2) + * + * Description: + * Computes the texture error of the block. + * + * Input Arguments: + * + * pSrc - pointer to the source plane. This should be aligned on an 8-byte + * boundary. + * srcStep - step of the source plane + * pSrcRef - pointer to the reference buffer, an 8x8 block. This should be + * aligned on an 8-byte boundary. + * + * Output Arguments: + * + * pDst - pointer to the destination buffer, an 8x8 block. This should be + * aligned on an 8-byte boundary. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments: + * - At least one of the following pointers is NULL: + * pSrc, pSrcRef, pDst. + * - pSrc is not 8-byte aligned. + * - SrcStep <= 0 or srcStep is not a multiple of 8. + * - pSrcRef is not 8-byte aligned. + * - pDst is not 8-byte aligned + * + */ +OMXResult omxVCCOMM_ComputeTextureErrorBlock ( + const OMX_U8 *pSrc, + OMX_INT srcStep, + const OMX_U8 *pSrcRef, + OMX_S16 *pDst +); + + + +/** + * Function: omxVCCOMM_LimitMVToRect (6.1.4.1.3) + * + * Description: + * Limits the motion vector associated with the current block/macroblock to + * prevent the motion compensated block/macroblock from moving outside a + * bounding rectangle as shown in Figure 6-1. + * + * Input Arguments: + * + * pSrcMV - pointer to the motion vector associated with the current block + * or macroblock + * pRectVOPRef - pointer to the bounding rectangle + * Xcoord, Ycoord - coordinates of the current block or macroblock + * size - size of the current block or macroblock; must be equal to 8 or + * 16. + * + * Output Arguments: + * + * pDstMV - pointer to the limited motion vector + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments. Returned if one or more of the + * following conditions is true: + * - at least one of the following pointers is NULL: + * pSrcMV, pDstMV, or pRectVOPRef. + * - size is not equal to either 8 or 16. + * - the width or height of the bounding rectangle is less than + * twice the block size. + */ +OMXResult omxVCCOMM_LimitMVToRect ( + const OMXVCMotionVector *pSrcMV, + OMXVCMotionVector *pDstMV, + const OMXRect *pRectVOPRef, + OMX_INT Xcoord, + OMX_INT Ycoord, + OMX_INT size +); + + + +/** + * Function: omxVCCOMM_SAD_16x (6.1.4.1.4) + * + * Description: + * This function calculates the SAD for 16x16 and 16x8 blocks. + * + * Input Arguments: + * + * pSrcOrg - Pointer to the original block; must be aligned on a 16-byte + * boundary. + * iStepOrg - Step of the original block buffer + * pSrcRef - Pointer to the reference block + * iStepRef - Step of the reference block buffer + * iHeight - Height of the block + * + * Output Arguments: + * + * pDstSAD - Pointer of result SAD + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments. Returned if one or more of the + * following conditions is true: + * - at least one of the following pointers is NULL: + * pSrcOrg, pDstSAD, or pSrcRef + * - pSrcOrg is not 16-byte aligned. + * - iStepOrg <= 0 or iStepOrg is not a multiple of 16 + * - iStepRef <= 0 or iStepRef is not a multiple of 16 + * - iHeight is not 8 or 16 + * + */ +OMXResult omxVCCOMM_SAD_16x ( + const OMX_U8 *pSrcOrg, + OMX_U32 iStepOrg, + const OMX_U8 *pSrcRef, + OMX_U32 iStepRef, + OMX_S32 *pDstSAD, + OMX_U32 iHeight +); + + + +/** + * Function: omxVCCOMM_SAD_8x (6.1.4.1.5) + * + * Description: + * This function calculates the SAD for 8x16, 8x8, 8x4 blocks. + * + * Input Arguments: + * + * pSrcOrg - Pointer to the original block; must be aligned on a 8-byte + * boundary. + * iStepOrg - Step of the original block buffer + * pSrcRef - Pointer to the reference block + * iStepRef - Step of the reference block buffer + * iHeight - Height of the block + * + * Output Arguments: + * + * pDstSAD -Pointer of result SAD + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments. Returned if one or more of the + * following conditions is true: + * - at least one of the following pointers is NULL: + * pSrcOrg, pDstSAD, or pSrcRef + * - pSrcOrg is not 8-byte aligned. + * - iStepOrg <= 0 or iStepOrg is not a multiple of 8 + * - iStepRef <= 0 or iStepRef is not a multiple of 8 + * - iHeight is not 4, 8 or 16 + * + */ +OMXResult omxVCCOMM_SAD_8x ( + const OMX_U8 *pSrcOrg, + OMX_U32 iStepOrg, + const OMX_U8 *pSrcRef, + OMX_U32 iStepRef, + OMX_S32*pDstSAD, + OMX_U32 iHeight +); + + + +/* 6.2.1.1 Direction */ +/* The direction enumerator is used with functions that perform AC/DC prediction and zig-zag scan. */ + +enum { + OMX_VC_NONE = 0, + OMX_VC_HORIZONTAL = 1, + OMX_VC_VERTICAL = 2 +}; + + + +/* 6.2.1.2 Bilinear Interpolation */ +/* The bilinear interpolation enumerator is used with motion estimation, motion compensation, and reconstruction functions. */ + +enum { + OMX_VC_INTEGER_PIXEL = 0, /* case a */ + OMX_VC_HALF_PIXEL_X = 1, /* case b */ + OMX_VC_HALF_PIXEL_Y = 2, /* case c */ + OMX_VC_HALF_PIXEL_XY = 3 /* case d */ +}; + + + +/* 6.2.1.3 Neighboring Macroblock Availability */ +/* Neighboring macroblock availability is indicated using the following flags: */ + +enum { + OMX_VC_UPPER = 1, /** above macroblock is available */ + OMX_VC_LEFT = 2, /** left macroblock is available */ + OMX_VC_CENTER = 4, + OMX_VC_RIGHT = 8, + OMX_VC_LOWER = 16, + OMX_VC_UPPER_LEFT = 32, /** above-left macroblock is available */ + OMX_VC_UPPER_RIGHT = 64, /** above-right macroblock is available */ + OMX_VC_LOWER_LEFT = 128, + OMX_VC_LOWER_RIGHT = 256 +}; + + + +/* 6.2.1.4 Video Components */ +/* A data type that enumerates video components is defined as follows: */ + +typedef enum { + OMX_VC_LUMINANCE, /** Luminance component */ + OMX_VC_CHROMINANCE /** chrominance component */ +} OMXVCM4P2VideoComponent; + + + +/* 6.2.1.5 MacroblockTypes */ +/* A data type that enumerates macroblock types is defined as follows: */ + +typedef enum { + OMX_VC_INTER = 0, /** P picture or P-VOP */ + OMX_VC_INTER_Q = 1, /** P picture or P-VOP */ + OMX_VC_INTER4V = 2, /** P picture or P-VOP */ + OMX_VC_INTRA = 3, /** I and P picture, I- and P-VOP */ + OMX_VC_INTRA_Q = 4, /** I and P picture, I- and P-VOP */ + OMX_VC_INTER4V_Q = 5 /** P picture or P-VOP (H.263)*/ +} OMXVCM4P2MacroblockType; + + + +/* 6.2.1.6 Coordinates */ +/* Coordinates are represented as follows: */ + +typedef struct { + OMX_INT x; + OMX_INT y; +} OMXVCM4P2Coordinate; + + + +/* 6.2.1.7 Motion Estimation Algorithms */ +/* A data type that enumerates motion estimation search methods is defined as follows: */ + +typedef enum { + OMX_VC_M4P2_FAST_SEARCH = 0, /** Fast motion search */ + OMX_VC_M4P2_FULL_SEARCH = 1 /** Full motion search */ +} OMXVCM4P2MEMode; + + + +/* 6.2.1.8 Motion Estimation Parameters */ +/* A data structure containing control parameters for + * motion estimation functions is defined as follows: + */ + +typedef struct { + OMX_INT searchEnable8x8; /** enables 8x8 search */ + OMX_INT halfPelSearchEnable; /** enables half-pel resolution */ + OMX_INT searchRange; /** search range */ + OMX_INT rndVal; /** rounding control; 0-disabled, 1-enabled*/ +} OMXVCM4P2MEParams; + + + +/* 6.2.1.9 Macroblock Information */ +/* A data structure containing macroblock parameters for + * motion estimation functions is defined as follows: + */ + +typedef struct { + OMX_S32 sliceId; /* slice number */ + OMXVCM4P2MacroblockType mbType; /* MB type: OMX_VC_INTRA, OMX_VC_INTER, or OMX_VC_INTER4 */ + OMX_S32 qp; /* quantization parameter*/ + OMX_U32 cbpy; /* CBP Luma */ + OMX_U32 cbpc; /* CBP Chroma */ + OMXVCMotionVector pMV0[2][2]; /* motion vector, represented using 1/2-pel units, + * pMV0[blocky][blockx] (blocky = 0~1, blockx =0~1) + */ + OMXVCMotionVector pMVPred[2][2]; /* motion vector prediction, represented using 1/2-pel units, + * pMVPred[blocky][blockx] (blocky = 0~1, blockx = 0~1) + */ + OMX_U8 pPredDir[2][2]; /* AC prediction direction: + * OMX_VC_NONE, OMX_VC_VERTICAL, OMX_VC_HORIZONTAL + */ +} OMXVCM4P2MBInfo, *OMXVCM4P2MBInfoPtr; + + + +/** + * Function: omxVCM4P2_FindMVpred (6.2.3.1.1) + * + * Description: + * Predicts a motion vector for the current block using the procedure + * specified in [ISO14496-2], subclause 7.6.5. The resulting predicted MV is + * returned in pDstMVPred. If the parameter pDstMVPredME if is not NULL then + * the set of three MV candidates used for prediction is also returned, + * otherwise pDstMVPredMEis NULL upon return. + * + * Input Arguments: + * + * pSrcMVCurMB - pointer to the MV buffer associated with the current Y + * macroblock; a value of NULL indicates unavailability. + * pSrcCandMV1 - pointer to the MV buffer containing the 4 MVs associated + * with the MB located to the left of the current MB; set to NULL + * if there is no MB to the left. + * pSrcCandMV2 - pointer to the MV buffer containing the 4 MVs associated + * with the MB located above the current MB; set to NULL if there + * is no MB located above the current MB. + * pSrcCandMV3 - pointer to the MV buffer containing the 4 MVs associated + * with the MB located to the right and above the current MB; set + * to NULL if there is no MB located to the above-right. + * iBlk - the index of block in the current macroblock + * pDstMVPredME - MV candidate return buffer; if set to NULL then + * prediction candidate MVs are not returned and pDstMVPredME will + * be NULL upon function return; if pDstMVPredME is non-NULL then it + * must point to a buffer containing sufficient space for three + * return MVs. + * + * Output Arguments: + * + * pDstMVPred - pointer to the predicted motion vector + * pDstMVPredME - if non-NULL upon input then pDstMVPredME points upon + * return to a buffer containing the three motion vector candidates + * used for prediction as specified in [ISO14496-2], subclause + * 7.6.5, otherwise if NULL upon input then pDstMVPredME is NULL + * upon output. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned under any of the following + * conditions: + * - the pointer pDstMVPred is NULL + * - the parameter iBlk does not fall into the range 0 <= iBlk<=3 + * + */ +OMXResult omxVCM4P2_FindMVpred ( + const OMXVCMotionVector *pSrcMVCurMB, + const OMXVCMotionVector *pSrcCandMV1, + const OMXVCMotionVector *pSrcCandMV2, + const OMXVCMotionVector *pSrcCandMV3, + OMXVCMotionVector *pDstMVPred, + OMXVCMotionVector *pDstMVPredME, + OMX_INT iBlk +); + + + +/** + * Function: omxVCM4P2_IDCT8x8blk (6.2.3.2.1) + * + * Description: + * Computes a 2D inverse DCT for a single 8x8 block, as defined in + * [ISO14496-2]. + * + * Input Arguments: + * + * pSrc - pointer to the start of the linearly arranged IDCT input buffer; + * must be aligned on a 16-byte boundary. According to + * [ISO14496-2], the input coefficient values should lie within the + * range [-2048, 2047]. + * + * Output Arguments: + * + * pDst - pointer to the start of the linearly arranged IDCT output buffer; + * must be aligned on a 16-byte boundary. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments: + * - pSrc or pDst is NULL. + * - pSrc or pDst is not 16-byte aligned. + * + */ +OMXResult omxVCM4P2_IDCT8x8blk ( + const OMX_S16 *pSrc, + OMX_S16 *pDst +); + + + +/** + * Function: omxVCM4P2_MEGetBufSize (6.2.4.1.1) + * + * Description: + * Computes the size, in bytes, of the vendor-specific specification + * structure for the following motion estimation functions: + * BlockMatch_Integer_8x8, BlockMatch_Integer_16x16, and MotionEstimationMB. + * + * Input Arguments: + * + * MEmode - motion estimation mode; available modes are defined by the + * enumerated type OMXVCM4P2MEMode + * pMEParams - motion estimation parameters + * + * Output Arguments: + * + * pSize - pointer to the number of bytes required for the specification + * structure + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - one or more of the following is true: + * - an invalid value was specified for the parameter MEmode + * - a negative or zero value was specified for the + * parameter pMEParams->searchRange + * + */ +OMXResult omxVCM4P2_MEGetBufSize ( + OMXVCM4P2MEMode MEmode, + const OMXVCM4P2MEParams *pMEParams, + OMX_U32 *pSize +); + + + +/** + * Function: omxVCM4P2_MEInit (6.2.4.1.2) + * + * Description: + * Initializes the vendor-specific specification structure required for the + * following motion estimation functions: BlockMatch_Integer_8x8, + * BlockMatch_Integer_16x16, and MotionEstimationMB. Memory for the + * specification structure *pMESpec must be allocated prior to calling the + * function, and should be aligned on a 4-byte boundary. Following + * initialization by this function, the vendor-specific structure *pMESpec + * should contain an implementation-specific representation of all motion + * estimation parameters received via the structure pMEParams, for example + * rndVal, searchRange, etc. The number of bytes required for the + * specification structure can be determined using the function + * omxVCM4P2_MEGetBufSize. + * + * Input Arguments: + * + * MEmode - motion estimation mode; available modes are defined by the + * enumerated type OMXVCM4P2MEMode + * pMEParams - motion estimation parameters + * pMESpec - pointer to the uninitialized ME specification structure + * + * Output Arguments: + * + * pMESpec - pointer to the initialized ME specification structure + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - one or more of the following is true: + * - an invalid value was specified for the parameter MEmode + * - a negative or zero value was specified for the + * parameter pMEParams->searchRange + * + */ +OMXResult omxVCM4P2_MEInit ( + OMXVCM4P2MEMode MEmode, + const OMXVCM4P2MEParams*pMEParams, + void *pMESpec +); + + + +/** + * Function: omxVCM4P2_BlockMatch_Integer_16x16 (6.2.4.2.1) + * + * Description: + * Performs a 16x16 block search; estimates motion vector and associated + * minimum SAD. Both the input and output motion vectors are represented using + * half-pixel units, and therefore a shift left or right by 1 bit may be + * required, respectively, to match the input or output MVs with other + * functions that either generate output MVs or expect input MVs represented + * using integer pixel units. + * + * Input Arguments: + * + * pSrcRefBuf - pointer to the reference Y plane; points to the reference + * MB that corresponds to the location of the current macroblock in + * the current plane. + * refWidth - width of the reference plane + * pRefRect - pointer to the valid reference plane rectangle; coordinates + * are specified relative to the image origin. Rectangle + * boundaries may extend beyond image boundaries if the image has + * been padded. For example, if padding extends 4 pixels beyond + * frame border, then the value for the left border could be set to + * -4. + * pSrcCurrBuf - pointer to the current block in the current macroblock + * buffer extracted from the original plane (linear array, 256 + * entries); must be aligned on a 16-byte boundary. The number of + * bytes between lines (step) is 16. + * pCurrPointPos - position of the current macroblock in the current plane + * pSrcPreMV - pointer to predicted motion vector; NULL indicates no + * predicted MV + * pSrcPreSAD - pointer to SAD associated with the predicted MV (referenced + * by pSrcPreMV); may be set to NULL if unavailable. + * pMESpec - vendor-specific motion estimation specification structure; + * must have been allocated and then initialized using + * omxVCM4P2_MEInit prior to calling the block matching function. + * + * Output Arguments: + * + * pDstMV - pointer to estimated MV + * pDstSAD - pointer to minimum SAD + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments. Returned if one of the following + * conditions is true: + * - at least one of the following pointers is NULL: pSrcRefBuf, + * pRefRect, pSrcCurrBuff, pCurrPointPos, pDstMV, pDstSAD or + * pMESpec, or + * - pSrcCurrBuf is not 16-byte aligned + * + */ +OMXResult omxVCM4P2_BlockMatch_Integer_16x16 ( + const OMX_U8 *pSrcRefBuf, + OMX_INT refWidth, + const OMXRect *pRefRect, + const OMX_U8 *pSrcCurrBuf, + const OMXVCM4P2Coordinate *pCurrPointPos, + const OMXVCMotionVector*pSrcPreMV, + const OMX_INT *pSrcPreSAD, + void *pMESpec, + OMXVCMotionVector*pDstMV, + OMX_INT *pDstSAD +); + + + +/** + * Function: omxVCM4P2_BlockMatch_Integer_8x8 (6.2.4.2.2) + * + * Description: + * Performs an 8x8 block search; estimates motion vector and associated + * minimum SAD. Both the input and output motion vectors are represented + * using half-pixel units, and therefore a shift left or right by 1 bit may be + * required, respectively, to match the input or output MVs with other + * functions that either generate output MVs or expect input MVs represented + * using integer pixel units. + * + * Input Arguments: + * + * pSrcRefBuf - pointer to the reference Y plane; points to the reference + * block that corresponds to the location of the current 8x8 block + * in the current plane. + * refWidth - width of the reference plane + * pRefRect - pointer to the valid reference plane rectangle; coordinates + * are specified relative to the image origin. Rectangle + * boundaries may extend beyond image boundaries if the image has + * been padded. + * pSrcCurrBuf - pointer to the current block in the current macroblock + * buffer extracted from the original plane (linear array, 128 + * entries); must be aligned on an 8-byte boundary. The number of + * bytes between lines (step) is 16 bytes. + * pCurrPointPos - position of the current block in the current plane + * pSrcPreMV - pointer to predicted motion vector; NULL indicates no + * predicted MV + * pSrcPreSAD - pointer to SAD associated with the predicted MV (referenced + * by pSrcPreMV); may be set to NULL if unavailable. + * pMESpec - vendor-specific motion estimation specification structure; + * must have been allocated and then initialized using + * omxVCM4P2_MEInit prior to calling the block matching function. + * + * Output Arguments: + * + * pDstMV - pointer to estimated MV + * pDstSAD - pointer to minimum SAD + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments. Returned if one of the following + * conditions is true: + * - at least one of the following pointers is NULL: pSrcRefBuf, + * pRefRect, pSrcCurrBuff, pCurrPointPos, pDstMV, pDstSAD or + * pMESpec, or + * - pSrcCurrBuf is not 8-byte aligned + * + */ +OMXResult omxVCM4P2_BlockMatch_Integer_8x8 ( + const OMX_U8 *pSrcRefBuf, + OMX_INT refWidth, + const OMXRect *pRefRect, + const OMX_U8 *pSrcCurrBuf, + const OMXVCM4P2Coordinate *pCurrPointPos, + const OMXVCMotionVector *pSrcPreMV, + const OMX_INT *pSrcPreSAD, + void *pMESpec, + OMXVCMotionVector *pDstMV, + OMX_INT *pDstSAD +); + + + +/** + * Function: omxVCM4P2_BlockMatch_Half_16x16 (6.2.4.2.3) + * + * Description: + * Performs a 16x16 block match with half-pixel resolution. Returns the + * estimated motion vector and associated minimum SAD. This function + * estimates the half-pixel motion vector by interpolating the integer + * resolution motion vector referenced by the input parameter pSrcDstMV, i.e., + * the initial integer MV is generated externally. The input parameters + * pSrcRefBuf and pSearchPointRefPos should be shifted by the winning MV of + * 16x16 integer search prior to calling BlockMatch_Half_16x16. The function + * BlockMatch_Integer_16x16 may be used for integer motion estimation. + * + * Input Arguments: + * + * pSrcRefBuf - pointer to the reference Y plane; points to the reference + * macroblock that corresponds to the location of the current + * macroblock in the current plane. + * refWidth - width of the reference plane + * pRefRect - reference plane valid region rectangle + * pSrcCurrBuf - pointer to the current block in the current macroblock + * buffer extracted from the original plane (linear array, 256 + * entries); must be aligned on a 16-byte boundary. The number of + * bytes between lines (step) is 16. + * pSearchPointRefPos - position of the starting point for half pixel + * search (specified in terms of integer pixel units) in the + * reference plane, i.e., the reference position pointed to by the + * predicted motion vector. + * rndVal - rounding control parameter: 0 - disabled; 1 - enabled. + * pSrcDstMV - pointer to the initial MV estimate; typically generated + * during a prior 16X16 integer search; specified in terms of + * half-pixel units. + * + * Output Arguments: + * + * pSrcDstMV - pointer to estimated MV + * pDstSAD - pointer to minimum SAD + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments. Returned if one of the following + * conditions is true: + * - at least one of the following pointers is NULL: pSrcRefBuf, + * pRefRect, pSrcCurrBuff, pSearchPointRefPos, pSrcDstMV. + * - pSrcCurrBuf is not 16-byte aligned, or + * + */ +OMXResult omxVCM4P2_BlockMatch_Half_16x16 ( + const OMX_U8 *pSrcRefBuf, + OMX_INT refWidth, + const OMXRect *pRefRect, + const OMX_U8 *pSrcCurrBuf, + const OMXVCM4P2Coordinate *pSearchPointRefPos, + OMX_INT rndVal, + OMXVCMotionVector *pSrcDstMV, + OMX_INT *pDstSAD +); + + + +/** + * Function: omxVCM4P2_BlockMatch_Half_8x8 (6.2.4.2.4) + * + * Description: + * Performs an 8x8 block match with half-pixel resolution. Returns the + * estimated motion vector and associated minimum SAD. This function + * estimates the half-pixel motion vector by interpolating the integer + * resolution motion vector referenced by the input parameter pSrcDstMV, i.e., + * the initial integer MV is generated externally. The input parameters + * pSrcRefBuf and pSearchPointRefPos should be shifted by the winning MV of + * 8x8 integer search prior to calling BlockMatch_Half_8x8. The function + * BlockMatch_Integer_8x8 may be used for integer motion estimation. + * + * Input Arguments: + * + * pSrcRefBuf - pointer to the reference Y plane; points to the reference + * block that corresponds to the location of the current 8x8 block + * in the current plane. + * refWidth - width of the reference plane + * pRefRect - reference plane valid region rectangle + * pSrcCurrBuf - pointer to the current block in the current macroblock + * buffer extracted from the original plane (linear array, 128 + * entries); must be aligned on a 8-byte boundary. The number of + * bytes between lines (step) is 16. + * pSearchPointRefPos - position of the starting point for half pixel + * search (specified in terms of integer pixel units) in the + * reference plane. + * rndVal - rounding control parameter: 0 - disabled; 1 - enabled. + * pSrcDstMV - pointer to the initial MV estimate; typically generated + * during a prior 8x8 integer search, specified in terms of + * half-pixel units. + * + * Output Arguments: + * + * pSrcDstMV - pointer to estimated MV + * pDstSAD - pointer to minimum SAD + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments. Returned if one of the following + * conditions is true: + * - at least one of the following pointers is NULL: + * pSrcRefBuf, pRefRect, pSrcCurrBuff, pSearchPointRefPos, pSrcDstMV + * - pSrcCurrBuf is not 8-byte aligned + * + */ +OMXResult omxVCM4P2_BlockMatch_Half_8x8 ( + const OMX_U8 *pSrcRefBuf, + OMX_INT refWidth, + const OMXRect *pRefRect, + const OMX_U8 *pSrcCurrBuf, + const OMXVCM4P2Coordinate *pSearchPointRefPos, + OMX_INT rndVal, + OMXVCMotionVector *pSrcDstMV, + OMX_INT *pDstSAD +); + + + +/** + * Function: omxVCM4P2_MotionEstimationMB (6.2.4.3.1) + * + * Description: + * Performs motion search for a 16x16 macroblock. Selects best motion search + * strategy from among inter-1MV, inter-4MV, and intra modes. Supports + * integer and half pixel resolution. + * + * Input Arguments: + * + * pSrcCurrBuf - pointer to the top-left corner of the current MB in the + * original picture plane; must be aligned on a 16-byte boundary. + * The function does not expect source data outside the region + * bounded by the MB to be available; for example it is not + * necessary for the caller to guarantee the availability of + * pSrcCurrBuf[-SrcCurrStep], i.e., the row of pixels above the MB + * to be processed. + * srcCurrStep - width of the original picture plane, in terms of full + * pixels; must be a multiple of 16. + * pSrcRefBuf - pointer to the reference Y plane; points to the reference + * plane location corresponding to the location of the current + * macroblock in the current plane; must be aligned on a 16-byte + * boundary. + * srcRefStep - width of the reference picture plane, in terms of full + * pixels; must be a multiple of 16. + * pRefRect - reference plane valid region rectangle, specified relative to + * the image origin + * pCurrPointPos - position of the current macroblock in the current plane + * pMESpec - pointer to the vendor-specific motion estimation specification + * structure; must be allocated and then initialized using + * omxVCM4P2_MEInit prior to calling this function. + * pMBInfo - array, of dimension four, containing pointers to information + * associated with four nearby MBs: + * - pMBInfo[0] - pointer to left MB information + * - pMBInfo[1] - pointer to top MB information + * - pMBInfo[2] - pointer to top-left MB information + * - pMBInfo[3] - pointer to top-right MB information + * Any pointer in the array may be set equal to NULL if the + * corresponding MB doesn't exist. For each MB, the following structure + * members are used: + * - mbType - macroblock type, either OMX_VC_INTRA, OMX_VC_INTER, or + * OMX_VC_INTER4V + * - pMV0[2][2] - estimated motion vectors; represented + * in 1/2 pixel units + * - sliceID - number of the slice to which the MB belongs + * pSrcDstMBCurr - pointer to information structure for the current MB. + * The following entries should be set prior to calling the + * function: sliceID - the number of the slice the to which the + * current MB belongs. The structure elements cbpy and cbpc are + * ignored. + * + * Output Arguments: + * + * pSrcDstMBCurr - pointer to updated information structure for the current + * MB after MB-level motion estimation has been completed. The + * following structure members are updated by the ME function: + * - mbType - macroblock type: OMX_VC_INTRA, OMX_VC_INTER, or + * OMX_VC_INTER4V. + * - pMV0[2][2] - estimated motion vectors; represented in + * terms of 1/2 pel units. + * - pMVPred[2][2] - predicted motion vectors; represented + * in terms of 1/2 pel units. + * The structure members cbpy and cbpc are not updated by the function. + * pDstSAD - pointer to the minimum SAD for INTER1V, or sum of minimum SADs + * for INTER4V + * pDstBlockSAD - pointer to an array of SAD values for each of the four + * 8x8 luma blocks in the MB. The block SADs are in scan order for + * each MB. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments. Returned if one or more of the + * following conditions is true: + * - at least one of the following pointers is NULL: pSrcCurrBuf, + * pSrcRefBuf, pRefRect, pCurrPointPos, pMBInter, pMBIntra, + * pSrcDstMBCurr, or pDstSAD. + * + */ +OMXResult omxVCM4P2_MotionEstimationMB ( + const OMX_U8 *pSrcCurrBuf, + OMX_S32 srcCurrStep, + const OMX_U8 *pSrcRefBuf, + OMX_S32 srcRefStep, + const OMXRect*pRefRect, + const OMXVCM4P2Coordinate *pCurrPointPos, + void *pMESpec, + const OMXVCM4P2MBInfoPtr *pMBInfo, + OMXVCM4P2MBInfo *pSrcDstMBCurr, + OMX_U16 *pDstSAD, + OMX_U16 *pDstBlockSAD +); + + + +/** + * Function: omxVCM4P2_DCT8x8blk (6.2.4.4.1) + * + * Description: + * Computes a 2D forward DCT for a single 8x8 block, as defined in + * [ISO14496-2]. + * + * Input Arguments: + * + * pSrc - pointer to the start of the linearly arranged input buffer; must + * be aligned on a 16-byte boundary. Input values (pixel + * intensities) are valid in the range [-255,255]. + * + * Output Arguments: + * + * pDst - pointer to the start of the linearly arranged output buffer; must + * be aligned on a 16-byte boundary. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments, returned if: + * - pSrc or pDst is NULL. + * - pSrc or pDst is not 16-byte aligned. + * + */ +OMXResult omxVCM4P2_DCT8x8blk ( + const OMX_S16 *pSrc, + OMX_S16 *pDst +); + + + +/** + * Function: omxVCM4P2_QuantIntra_I (6.2.4.4.2) + * + * Description: + * Performs quantization on intra block coefficients. This function supports + * bits_per_pixel == 8. + * + * Input Arguments: + * + * pSrcDst - pointer to the input intra block coefficients; must be aligned + * on a 16-byte boundary. + * QP - quantization parameter (quantizer_scale). + * blockIndex - block index indicating the component type and position, + * valid in the range 0 to 5, as defined in [ISO14496-2], subclause + * 6.1.3.8. + * shortVideoHeader - binary flag indicating presence of + * short_video_header; shortVideoHeader==1 selects linear intra DC + * mode, and shortVideoHeader==0 selects non linear intra DC mode. + * + * Output Arguments: + * + * pSrcDst - pointer to the output (quantized) interblock coefficients. + * When shortVideoHeader==1, AC coefficients are saturated on the + * interval [-127, 127], and DC coefficients are saturated on the + * interval [1, 254]. When shortVideoHeader==0, AC coefficients + * are saturated on the interval [-2047, 2047]. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments: + * - pSrcDst is NULL. + * - blockIndex < 0 or blockIndex >= 10 + * - QP <= 0 or QP >= 32. + * + */ +OMXResult omxVCM4P2_QuantIntra_I ( + OMX_S16 *pSrcDst, + OMX_U8 QP, + OMX_INT blockIndex, + OMX_INT shortVideoHeader +); + + + +/** + * Function: omxVCM4P2_QuantInter_I (6.2.4.4.3) + * + * Description: + * Performs quantization on an inter coefficient block; supports + * bits_per_pixel == 8. + * + * Input Arguments: + * + * pSrcDst - pointer to the input inter block coefficients; must be aligned + * on a 16-byte boundary. + * QP - quantization parameter (quantizer_scale) + * shortVideoHeader - binary flag indicating presence of short_video_header; + * shortVideoHeader==1 selects linear intra DC mode, and + * shortVideoHeader==0 selects non linear intra DC mode. + * + * Output Arguments: + * + * pSrcDst - pointer to the output (quantized) interblock coefficients. + * When shortVideoHeader==1, AC coefficients are saturated on the + * interval [-127, 127], and DC coefficients are saturated on the + * interval [1, 254]. When shortVideoHeader==0, AC coefficients + * are saturated on the interval [-2047, 2047]. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments: + * - pSrcDst is NULL. + * - QP <= 0 or QP >= 32. + * + */ +OMXResult omxVCM4P2_QuantInter_I ( + OMX_S16 *pSrcDst, + OMX_U8 QP, + OMX_INT shortVideoHeader +); + + + +/** + * Function: omxVCM4P2_TransRecBlockCoef_intra (6.2.4.4.4) + * + * Description: + * Quantizes the DCT coefficients, implements intra block AC/DC coefficient + * prediction, and reconstructs the current intra block texture for prediction + * on the next frame. Quantized row and column coefficients are returned in + * the updated coefficient buffers. + * + * Input Arguments: + * + * pSrc - pointer to the pixels of current intra block; must be aligned on + * an 8-byte boundary. + * pPredBufRow - pointer to the coefficient row buffer containing + * ((num_mb_per_row * 2 + 1) * 8) elements of type OMX_S16. + * Coefficients are organized into blocks of eight as described + * below (Internal Prediction Coefficient Update Procedures). The + * DC coefficient is first, and the remaining buffer locations + * contain the quantized AC coefficients. Each group of eight row + * buffer elements combined with one element eight elements ahead + * contains the coefficient predictors of the neighboring block + * that is spatially above or to the left of the block currently to + * be decoded. A negative-valued DC coefficient indicates that this + * neighboring block is not INTRA-coded or out of bounds, and + * therefore the AC and DC coefficients are invalid. Pointer must + * be aligned on an 8-byte boundary. + * pPredBufCol - pointer to the prediction coefficient column buffer + * containing 16 elements of type OMX_S16. Coefficients are + * organized as described in section 6.2.2.5. Pointer must be + * aligned on an 8-byte boundary. + * pSumErr - pointer to a flag indicating whether or not AC prediction is + * required; AC prediction is enabled if *pSumErr >=0, but the + * value is not used for coefficient prediction, i.e., the sum of + * absolute differences starts from 0 for each call to this + * function. Otherwise AC prediction is disabled if *pSumErr < 0 . + * blockIndex - block index indicating the component type and position, as + * defined in [ISO14496-2], subclause 6.1.3.8. + * curQp - quantization parameter of the macroblock to which the current + * block belongs + * pQpBuf - pointer to a 2-element quantization parameter buffer; pQpBuf[0] + * contains the quantization parameter associated with the 8x8 + * block left of the current block (QPa), and pQpBuf[1] contains + * the quantization parameter associated with the 8x8 block above + * the current block (QPc). In the event that the corresponding + * block is outside of the VOP bound, the Qp value will not affect + * the intra prediction process, as described in [ISO14496-2], + * sub-clause 7.4.3.3, Adaptive AC Coefficient Prediction. + * srcStep - width of the source buffer; must be a multiple of 8. + * dstStep - width of the reconstructed destination buffer; must be a + * multiple of 16. + * shortVideoHeader - binary flag indicating presence of + * short_video_header; shortVideoHeader==1 selects linear intra DC + * mode, and shortVideoHeader==0 selects non linear intra DC mode. + * + * Output Arguments: + * + * pDst - pointer to the quantized DCT coefficient buffer; pDst[0] contains + * the predicted DC coefficient; the remaining entries contain the + * quantized AC coefficients (without prediction). The pointer + * pDstmust be aligned on a 16-byte boundary. + * pRec - pointer to the reconstructed texture; must be aligned on an + * 8-byte boundary. + * pPredBufRow - pointer to the updated coefficient row buffer + * pPredBufCol - pointer to the updated coefficient column buffer + * pPreACPredict - if prediction is enabled, the parameter points to the + * start of the buffer containing the coefficient differences for + * VLC encoding. The entry pPreACPredict[0]indicates prediction + * direction for the current block and takes one of the following + * values: OMX_VC_NONE (prediction disabled), OMX_VC_HORIZONTAL, or + * OMX_VC_VERTICAL. The entries + * pPreACPredict[1]-pPreACPredict[7]contain predicted AC + * coefficients. If prediction is disabled (*pSumErr<0) then the + * contents of this buffer are undefined upon return from the + * function + * pSumErr - pointer to the value of the accumulated AC coefficient errors, + * i.e., sum of the absolute differences between predicted and + * unpredicted AC coefficients + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - Bad arguments: + * - At least one of the following pointers is NULL: pSrc, pDst, pRec, + * pCoefBufRow, pCoefBufCol, pQpBuf, pPreACPredict, pSumErr. + * - blockIndex < 0 or blockIndex >= 10; + * - curQP <= 0 or curQP >= 32. + * - srcStep, or dstStep <= 0 or not a multiple of 8. + * - pDst is not 16-byte aligned: . + * - At least one of the following pointers is not 8-byte aligned: + * pSrc, pRec. + * + * Note: The coefficient buffers must be updated in accordance with the + * update procedures defined in section in 6.2.2. + * + */ +OMXResult omxVCM4P2_TransRecBlockCoef_intra ( + const OMX_U8 *pSrc, + OMX_S16 *pDst, + OMX_U8 *pRec, + OMX_S16 *pPredBufRow, + OMX_S16 *pPredBufCol, + OMX_S16 *pPreACPredict, + OMX_INT *pSumErr, + OMX_INT blockIndex, + OMX_U8 curQp, + const OMX_U8 *pQpBuf, + OMX_INT srcStep, + OMX_INT dstStep, + OMX_INT shortVideoHeader +); + + + +/** + * Function: omxVCM4P2_TransRecBlockCoef_inter (6.2.4.4.5) + * + * Description: + * Implements DCT, and quantizes the DCT coefficients of the inter block + * while reconstructing the texture residual. There is no boundary check for + * the bit stream buffer. + * + * Input Arguments: + * + * pSrc -pointer to the residuals to be encoded; must be aligned on an + * 16-byte boundary. + * QP - quantization parameter. + * shortVideoHeader - binary flag indicating presence of short_video_header; + * shortVideoHeader==1 selects linear intra DC mode, and + * shortVideoHeader==0 selects non linear intra DC mode. + * + * Output Arguments: + * + * pDst - pointer to the quantized DCT coefficients buffer; must be aligned + * on a 16-byte boundary. + * pRec - pointer to the reconstructed texture residuals; must be aligned + * on a 16-byte boundary. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments: + * - At least one of the following pointers is either NULL or + * not 16-byte aligned: + * - pSrc + * - pDst + * - pRec + * - QP <= 0 or QP >= 32. + * + */ +OMXResult omxVCM4P2_TransRecBlockCoef_inter ( + const OMX_S16 *pSrc, + OMX_S16 *pDst, + OMX_S16 *pRec, + OMX_U8 QP, + OMX_INT shortVideoHeader +); + + + +/** + * Function: omxVCM4P2_EncodeVLCZigzag_IntraDCVLC (6.2.4.5.2) + * + * Description: + * Performs zigzag scan and VLC encoding of AC and DC coefficients for one + * intra block. Two versions of the function (DCVLC and ACVLC) are provided + * in order to support the two different methods of processing DC + * coefficients, as described in [ISO14496-2], subclause 7.4.1.4, "Intra DC + * Coefficient Decoding for the Case of Switched VLC Encoding". + * + * Input Arguments: + * + * ppBitStream - double pointer to the current byte in the bitstream + * pBitOffset - pointer to the bit position in the byte pointed by + * *ppBitStream. Valid within 0 to 7. + * pQDctBlkCoef - pointer to the quantized DCT coefficient + * predDir - AC prediction direction, which is used to decide the zigzag + * scan pattern; takes one of the following values: + * - OMX_VC_NONE - AC prediction not used. + * Performs classical zigzag scan. + * - OMX_VC_HORIZONTAL - Horizontal prediction. + * Performs alternate-vertical zigzag scan. + * - OMX_VC_VERTICAL - Vertical prediction. + * Performs alternate-horizontal zigzag scan. + * pattern - block pattern which is used to decide whether this block is + * encoded + * shortVideoHeader - binary flag indicating presence of + * short_video_header; escape modes 0-3 are used if + * shortVideoHeader==0, and escape mode 4 is used when + * shortVideoHeader==1. + * videoComp - video component type (luminance, chrominance) of the current + * block + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after the block is encoded, so + * that it points to the current byte in the bit stream buffer. + * pBitOffset - *pBitOffset is updated so that it points to the current bit + * position in the byte pointed by *ppBitStream. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - Bad arguments: + * - At least one of the following pointers is NULL: ppBitStream, + * *ppBitStream, pBitOffset, pQDctBlkCoef. + * - *pBitOffset < 0, or *pBitOffset >7. + * - PredDir is not one of: OMX_VC_NONE, OMX_VC_HORIZONTAL, or + * OMX_VC_VERTICAL. + * - VideoComp is not one component of enum OMXVCM4P2VideoComponent. + * + */ +OMXResult omxVCM4P2_EncodeVLCZigzag_IntraDCVLC ( + OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + const OMX_S16 *pQDctBlkCoef, + OMX_U8 predDir, + OMX_U8 pattern, + OMX_INT shortVideoHeader, + OMXVCM4P2VideoComponent videoComp +); + + + +/** + * Function: omxVCM4P2_EncodeVLCZigzag_IntraACVLC (6.2.4.5.2) + * + * Description: + * Performs zigzag scan and VLC encoding of AC and DC coefficients for one + * intra block. Two versions of the function (DCVLC and ACVLC) are provided + * in order to support the two different methods of processing DC + * coefficients, as described in [ISO14496-2], subclause 7.4.1.4, Intra DC + * Coefficient Decoding for the Case of Switched VLC Encoding. + * + * Input Arguments: + * + * ppBitStream - double pointer to the current byte in the bitstream + * pBitOffset - pointer to the bit position in the byte pointed by + * *ppBitStream. Valid within 0 to 7. + * pQDctBlkCoef - pointer to the quantized DCT coefficient + * predDir - AC prediction direction, which is used to decide the zigzag + * scan pattern; takes one of the following values: + * - OMX_VC_NONE - AC prediction not used. + * Performs classical zigzag scan. + * - OMX_VC_HORIZONTAL - Horizontal prediction. + * Performs alternate-vertical zigzag scan. + * - OMX_VC_VERTICAL - Vertical prediction. + * Performs alternate-horizontal zigzag scan. + * pattern - block pattern which is used to decide whether this block is + * encoded + * shortVideoHeader - binary flag indicating presence of + * short_video_header; escape modes 0-3 are used if + * shortVideoHeader==0, and escape mode 4 is used when + * shortVideoHeader==1. + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after the block is encoded, so + * that it points to the current byte in the bit stream buffer. + * pBitOffset - *pBitOffset is updated so that it points to the current bit + * position in the byte pointed by *ppBitStream. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - Bad arguments: + * - At least one of the following pointers is NULL: ppBitStream, + * *ppBitStream, pBitOffset, pQDctBlkCoef. + * - *pBitOffset < 0, or *pBitOffset >7. + * - PredDir is not one of: OMX_VC_NONE, OMX_VC_HORIZONTAL, or + * OMX_VC_VERTICAL. + * - VideoComp is not one component of enum OMXVCM4P2VideoComponent. + * + */ +OMXResult omxVCM4P2_EncodeVLCZigzag_IntraACVLC ( + OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + const OMX_S16 *pQDctBlkCoef, + OMX_U8 predDir, + OMX_U8 pattern, + OMX_INT shortVideoHeader +); + + + +/** + * Function: omxVCM4P2_EncodeVLCZigzag_Inter (6.2.4.5.3) + * + * Description: + * Performs classical zigzag scanning and VLC encoding for one inter block. + * + * Input Arguments: + * + * ppBitStream - pointer to the pointer to the current byte in the bit + * stream + * pBitOffset - pointer to the bit position in the byte pointed by + * *ppBitStream. Valid within 0 to 7 + * pQDctBlkCoef - pointer to the quantized DCT coefficient + * pattern - block pattern which is used to decide whether this block is + * encoded + * shortVideoHeader - binary flag indicating presence of + * short_video_header; escape modes 0-3 are used if + * shortVideoHeader==0, and escape mode 4 is used when + * shortVideoHeader==1. + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after the block is encoded so that + * it points to the current byte in the bit stream buffer. + * pBitOffset - *pBitOffset is updated so that it points to the current bit + * position in the byte pointed by *ppBitStream. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - Bad arguments + * - At least one of the pointers: is NULL: ppBitStream, *ppBitStream, + * pBitOffset, pQDctBlkCoef + * - *pBitOffset < 0, or *pBitOffset >7. + * + */ +OMXResult omxVCM4P2_EncodeVLCZigzag_Inter ( + OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + const OMX_S16 *pQDctBlkCoef, + OMX_U8 pattern, + OMX_INT shortVideoHeader +); + + + +/** + * Function: omxVCM4P2_EncodeMV (6.2.4.5.4) + * + * Description: + * Predicts a motion vector for the current macroblock, encodes the + * difference, and writes the output to the stream buffer. The input MVs + * pMVCurMB, pSrcMVLeftMB, pSrcMVUpperMB, and pSrcMVUpperRightMB should lie + * within the ranges associated with the input parameter fcodeForward, as + * described in [ISO14496-2], subclause 7.6.3. This function provides a + * superset of the functionality associated with the function + * omxVCM4P2_FindMVpred. + * + * Input Arguments: + * + * ppBitStream - double pointer to the current byte in the bitstream buffer + * pBitOffset - index of the first free (next available) bit in the stream + * buffer referenced by *ppBitStream, valid in the range 0 to 7. + * pMVCurMB - pointer to the current macroblock motion vector; a value of + * NULL indicates unavailability. + * pSrcMVLeftMB - pointer to the source left macroblock motion vector; a + * value of NULLindicates unavailability. + * pSrcMVUpperMB - pointer to source upper macroblock motion vector; a + * value of NULL indicates unavailability. + * pSrcMVUpperRightMB - pointer to source upper right MB motion vector; a + * value of NULL indicates unavailability. + * fcodeForward - an integer with values from 1 to 7; used in encoding + * motion vectors related to search range, as described in + * [ISO14496-2], subclause 7.6.3. + * MBType - macro block type, valid in the range 0 to 5 + * + * Output Arguments: + * + * ppBitStream - updated pointer to the current byte in the bit stream + * buffer + * pBitOffset - updated index of the next available bit position in stream + * buffer referenced by *ppBitStream + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments + * - At least one of the following pointers is NULL: ppBitStream, + * *ppBitStream, pBitOffset, pMVCurMB + * - *pBitOffset < 0, or *pBitOffset >7. + * - fcodeForward <= 0, or fcodeForward > 7, or MBType < 0. + * + */ +OMXResult omxVCM4P2_EncodeMV ( + OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + const OMXVCMotionVector *pMVCurMB, + const OMXVCMotionVector*pSrcMVLeftMB, + const OMXVCMotionVector *pSrcMVUpperMB, + const OMXVCMotionVector *pSrcMVUpperRightMB, + OMX_INT fcodeForward, + OMXVCM4P2MacroblockType MBType +); + + + +/** + * Function: omxVCM4P2_DecodePadMV_PVOP (6.2.5.1.1) + * + * Description: + * Decodes and pads the four motion vectors associated with a non-intra P-VOP + * macroblock. For macroblocks of type OMX_VC_INTER4V, the output MV is + * padded as specified in [ISO14496-2], subclause 7.6.1.6. Otherwise, for + * macroblocks of types other than OMX_VC_INTER4V, the decoded MV is copied to + * all four output MV buffer entries. + * + * Input Arguments: + * + * ppBitStream - pointer to the pointer to the current byte in the bit + * stream buffer + * pBitOffset - pointer to the bit position in the byte pointed to by + * *ppBitStream. *pBitOffset is valid within [0-7]. + * pSrcMVLeftMB, pSrcMVUpperMB, and pSrcMVUpperRightMB - pointers to the + * motion vector buffers of the macroblocks specially at the left, + * upper, and upper-right side of the current macroblock, + * respectively; a value of NULL indicates unavailability. Note: + * Any neighborhood macroblock outside the current VOP or video + * packet or outside the current GOB (when short_video_header is + * 1 ) for which gob_header_empty is 0 is treated as + * transparent, according to [ISO14496-2], subclause 7.6.5. + * fcodeForward - a code equal to vop_fcode_forward in MPEG-4 bit stream + * syntax + * MBType - the type of the current macroblock. If MBType is not equal to + * OMX_VC_INTER4V, the destination motion vector buffer is still + * filled with the same decoded vector. + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after the block is decoded, so + * that it points to the current byte in the bit stream buffer + * pBitOffset - *pBitOffset is updated so that it points to the current bit + * position in the byte pointed by *ppBitStream + * pDstMVCurMB - pointer to the motion vector buffer for the current + * macroblock; contains four decoded motion vectors + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments: + * - At least one of the following pointers is NULL: + * ppBitStream, *ppBitStream, pBitOffset, pDstMVCurMB + * - *pBitOffset exceeds [0,7] + * - fcodeForward exceeds (0,7] + * - MBType less than zero + * - motion vector buffer is not 4-byte aligned. + * OMX_Sts_Err - status error + * + */ +OMXResult omxVCM4P2_DecodePadMV_PVOP ( + const OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + OMXVCMotionVector *pSrcMVLeftMB, + OMXVCMotionVector*pSrcMVUpperMB, + OMXVCMotionVector *pSrcMVUpperRightMB, + OMXVCMotionVector*pDstMVCurMB, + OMX_INT fcodeForward, + OMXVCM4P2MacroblockType MBType +); + + + +/** + * Function: omxVCM4P2_DecodeVLCZigzag_IntraDCVLC (6.2.5.2.2) + * + * Description: + * Performs VLC decoding and inverse zigzag scan of AC and DC coefficients + * for one intra block. Two versions of the function (DCVLC and ACVLC) are + * provided in order to support the two different methods of processing DC + * coefficients, as described in [ISO14496-2], subclause 7.4.1.4, Intra DC + * Coefficient Decoding for the Case of Switched VLC Encoding. + * + * Input Arguments: + * + * ppBitStream - pointer to the pointer to the current byte in the + * bitstream buffer + * pBitOffset - pointer to the bit position in the current byte referenced + * by *ppBitStream. The parameter *pBitOffset is valid in the + * range [0-7]. + * Bit Position in one byte: |Most Least| + * *pBitOffset |0 1 2 3 4 5 6 7| + * predDir - AC prediction direction; used to select the zigzag scan + * pattern; takes one of the following values: + * - OMX_VC_NONE - AC prediction not used; + * performs classical zigzag scan. + * - OMX_VC_HORIZONTAL - Horizontal prediction; + * performs alternate-vertical zigzag scan; + * - OMX_VC_VERTICAL - Vertical prediction; + * performs alternate-horizontal zigzag scan. + * shortVideoHeader - binary flag indicating presence of + * short_video_header; escape modes 0-3 are used if + * shortVideoHeader==0, and escape mode 4 is used when + * shortVideoHeader==1. + * videoComp - video component type (luminance or chrominance) of the + * current block + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after the block is decoded such + * that it points to the current byte in the bit stream buffer + * pBitOffset - *pBitOffset is updated such that it points to the current + * bit position in the byte pointed by *ppBitStream + * pDst - pointer to the coefficient buffer of current block; must be + * 4-byte aligned. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments, if: + * - At least one of the following pointers is NULL: + * ppBitStream, *ppBitStream, pBitOffset, pDst + * - *pBitOffset exceeds [0,7] + * - preDir exceeds [0,2] + * - pDst is not 4-byte aligned + * OMX_Sts_Err - if: + * - In DecodeVLCZigzag_IntraDCVLC, dc_size > 12 + * - At least one of mark bits equals zero + * - Illegal stream encountered; code cannot be located in VLC table + * - Forbidden code encountered in the VLC FLC table. + * - The number of coefficients is greater than 64 + * + */ +OMXResult omxVCM4P2_DecodeVLCZigzag_IntraDCVLC ( + const OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + OMX_S16 *pDst, + OMX_U8 predDir, + OMX_INT shortVideoHeader, + OMXVCM4P2VideoComponent videoComp +); + + + +/** + * Function: omxVCM4P2_DecodeVLCZigzag_IntraACVLC (6.2.5.2.2) + * + * Description: + * Performs VLC decoding and inverse zigzag scan of AC and DC coefficients + * for one intra block. Two versions of the function (DCVLC and ACVLC) are + * provided in order to support the two different methods of processing DC + * coefficients, as described in [ISO14496-2], subclause 7.4.1.4, Intra DC + * Coefficient Decoding for the Case of Switched VLC Encoding. + * + * Input Arguments: + * + * ppBitStream - pointer to the pointer to the current byte in the + * bitstream buffer + * pBitOffset - pointer to the bit position in the current byte referenced + * by *ppBitStream. The parameter *pBitOffset is valid in the + * range [0-7]. Bit Position in one byte: |Most Least| *pBitOffset + * |0 1 2 3 4 5 6 7| + * predDir - AC prediction direction; used to select the zigzag scan + * pattern; takes one of the following values: OMX_VC_NONE - AC + * prediction not used; performs classical zigzag scan. + * OMX_VC_HORIZONTAL - Horizontal prediction; performs + * alternate-vertical zigzag scan; OMX_VC_VERTICAL - Vertical + * prediction; performs alternate-horizontal zigzag scan. + * shortVideoHeader - binary flag indicating presence of + * short_video_header; escape modes 0-3 are used if + * shortVideoHeader==0, and escape mode 4 is used when + * shortVideoHeader==1. + * videoComp - video component type (luminance or chrominance) of the + * current block + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after the block is decoded such + * that it points to the current byte in the bit stream buffer + * pBitOffset - *pBitOffset is updated such that it points to the current + * bit position in the byte pointed by *ppBitStream + * pDst - pointer to the coefficient buffer of current block; must be + * 4-byte aligned. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments At least one of the following + * pointers is NULL: ppBitStream, *ppBitStream, pBitOffset, pDst, + * or At least one of the following conditions is true: + * *pBitOffset exceeds [0,7], preDir exceeds [0,2], or pDst is + * not 4-byte aligned + * OMX_Sts_Err In DecodeVLCZigzag_IntraDCVLC, dc_size > 12 At least one of + * mark bits equals zero Illegal stream encountered; code cannot + * be located in VLC table Forbidden code encountered in the VLC + * FLC table The number of coefficients is greater than 64 + * + */ +OMXResult omxVCM4P2_DecodeVLCZigzag_IntraACVLC ( + const OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + OMX_S16 *pDst, + OMX_U8 predDir, + OMX_INT shortVideoHeader +); + + + +/** + * Function: omxVCM4P2_DecodeVLCZigzag_Inter (6.2.5.2.3) + * + * Description: + * Performs VLC decoding and inverse zigzag scan for one inter-coded block. + * + * Input Arguments: + * + * ppBitStream - double pointer to the current byte in the stream buffer + * pBitOffset - pointer to the next available bit in the current stream + * byte referenced by *ppBitStream. The parameter *pBitOffset is + * valid within the range [0-7]. + * shortVideoHeader - binary flag indicating presence of + * short_video_header; escape modes 0-3 are used if + * shortVideoHeader==0, and escape mode 4 is used when + * shortVideoHeader==1. + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after the block is decoded such + * that it points to the current byte in the stream buffer + * pBitOffset - *pBitOffset is updated after decoding such that it points + * to the next available bit in the stream byte referenced by + * *ppBitStream + * pDst - pointer to the coefficient buffer of current block; must be + * 4-byte aligned. + * + * Return Value: + * + * OMX_Sts_BadArgErr - bad arguments: + * - At least one of the following pointers is NULL: + * ppBitStream, *ppBitStream, pBitOffset, pDst + * - pDst is not 4-byte aligned + * - *pBitOffset exceeds [0,7] + * OMX_Sts_Err - status error, if: + * - At least one mark bit is equal to zero + * - Encountered an illegal stream code that cannot be found in the VLC table + * - Encountered an illegal code in the VLC FLC table + * - The number of coefficients is greater than 64 + * + */ +OMXResult omxVCM4P2_DecodeVLCZigzag_Inter ( + const OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + OMX_S16 *pDst, + OMX_INT shortVideoHeader +); + + + +/** + * Function: omxVCM4P2_QuantInvIntra_I (6.2.5.3.2) + * + * Description: + * Performs the second inverse quantization mode on an intra/inter coded + * block. Supports bits_per_pixel = 8. The output coefficients are clipped to + * the range [-2048, 2047]. + * + * Input Arguments: + * + * pSrcDst - pointer to the input (quantized) intra/inter block; must be + * aligned on a 16-byte boundary. + * QP - quantization parameter (quantizer_scale) + * videoComp - video component type of the current block. Takes one of the + * following flags: OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE (intra + * version only). + * shortVideoHeader - binary flag indicating presence of short_video_header + * (intra version only). + * + * Output Arguments: + * + * pSrcDst - pointer to the output (dequantized) intra/inter block + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; one or more of the following is + * true: + * - pSrcDst is NULL + * - QP <= 0 or QP >=31 + * - videoComp is neither OMX_VC_LUMINANCE nor OMX_VC_CHROMINANCE. + * + */ +OMXResult omxVCM4P2_QuantInvIntra_I ( + OMX_S16 *pSrcDst, + OMX_INT QP, + OMXVCM4P2VideoComponent videoComp, + OMX_INT shortVideoHeader +); + + + +/** + * Function: omxVCM4P2_QuantInvInter_I (6.2.5.3.2) + * + * Description: + * Performs the second inverse quantization mode on an intra/inter coded + * block. Supports bits_per_pixel = 8. The output coefficients are clipped to + * the range [-2048, 2047]. + * + * Input Arguments: + * + * pSrcDst - pointer to the input (quantized) intra/inter block; must be + * aligned on a 16-byte boundary. + * QP - quantization parameter (quantizer_scale) + * videoComp - video component type of the current block. Takes one of the + * following flags: OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE (intra + * version only). + * shortVideoHeader - binary flag indicating presence of short_video_header + * (intra version only). + * + * Output Arguments: + * + * pSrcDst - pointer to the output (dequantized) intra/inter block + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; one or more of the following is + * true: + * - pSrcDst is NULL + * - QP <= 0 or QP >=31 + * - videoComp is neither OMX_VC_LUMINANCE nor OMX_VC_CHROMINANCE. + * + */ +OMXResult omxVCM4P2_QuantInvInter_I ( + OMX_S16 *pSrcDst, + OMX_INT QP +); + + + +/** + * Function: omxVCM4P2_DecodeBlockCoef_Intra (6.2.5.4.1) + * + * Description: + * Decodes the INTRA block coefficients. Inverse quantization, inversely + * zigzag positioning, and IDCT, with appropriate clipping on each step, are + * performed on the coefficients. The results are then placed in the output + * frame/plane on a pixel basis. Note: This function will be used only when + * at least one non-zero AC coefficient of current block exists in the bit + * stream. The DC only condition will be handled in another function. + * + * + * Input Arguments: + * + * ppBitStream - pointer to the pointer to the current byte in the bit + * stream buffer. There is no boundary check for the bit stream + * buffer. + * pBitOffset - pointer to the bit position in the byte pointed to by + * *ppBitStream. *pBitOffset is valid within [0-7]. + * step - width of the destination plane + * pCoefBufRow - pointer to the coefficient row buffer; must be aligned on + * an 8-byte boundary. + * pCoefBufCol - pointer to the coefficient column buffer; must be aligned + * on an 8-byte boundary. + * curQP - quantization parameter of the macroblock which the current block + * belongs to + * pQPBuf - pointer to the quantization parameter buffer + * blockIndex - block index indicating the component type and position as + * defined in [ISO14496-2], subclause 6.1.3.8, Figure 6-5. + * intraDCVLC - a code determined by intra_dc_vlc_thr and QP. This allows a + * mechanism to switch between two VLC for coding of Intra DC + * coefficients as per [ISO14496-2], Table 6-21. + * ACPredFlag - a flag equal to ac_pred_flag (of luminance) indicating if + * the ac coefficients of the first row or first column are + * differentially coded for intra coded macroblock. + * shortVideoHeader - binary flag indicating presence of + * short_video_header; shortVideoHeader==1 selects linear intra DC + * mode, and shortVideoHeader==0 selects non linear intra DC mode. + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after the block is decoded, so + * that it points to the current byte in the bit stream buffer + * pBitOffset - *pBitOffset is updated so that it points to the current bit + * position in the byte pointed by *ppBitStream + * pDst - pointer to the block in the destination plane; must be aligned on + * an 8-byte boundary. + * pCoefBufRow - pointer to the updated coefficient row buffer. + * pCoefBufCol - pointer to the updated coefficient column buffer Note: + * The coefficient buffers must be updated in accordance with the + * update procedure defined in section 6.2.2. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments, if: + * - At least one of the following pointers is NULL: + * ppBitStream, *ppBitStream, pBitOffset, pCoefBufRow, pCoefBufCol, + * pQPBuf, pDst. + * - *pBitOffset exceeds [0,7] + * - curQP exceeds (1, 31) + * - blockIndex exceeds [0,5] + * - step is not the multiple of 8 + * - a pointer alignment requirement was violated. + * OMX_Sts_Err - status error. Refer to OMX_Sts_Err of DecodeVLCZigzag_Intra. + * + */ +OMXResult omxVCM4P2_DecodeBlockCoef_Intra ( + const OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + OMX_U8 *pDst, + OMX_INT step, + OMX_S16 *pCoefBufRow, + OMX_S16 *pCoefBufCol, + OMX_U8 curQP, + const OMX_U8 *pQPBuf, + OMX_INT blockIndex, + OMX_INT intraDCVLC, + OMX_INT ACPredFlag, + OMX_INT shortVideoHeader +); + + + +/** + * Function: omxVCM4P2_DecodeBlockCoef_Inter (6.2.5.4.2) + * + * Description: + * Decodes the INTER block coefficients. This function performs inverse + * quantization, inverse zigzag positioning, and IDCT (with appropriate + * clipping on each step) on the coefficients. The results (residuals) are + * placed in a contiguous array of 64 elements. For INTER block, the output + * buffer holds the residuals for further reconstruction. + * + * Input Arguments: + * + * ppBitStream - pointer to the pointer to the current byte in the bit + * stream buffer. There is no boundary check for the bit stream + * buffer. + * pBitOffset - pointer to the bit position in the byte pointed to by + * *ppBitStream. *pBitOffset is valid within [0-7] + * QP - quantization parameter + * shortVideoHeader - binary flag indicating presence of + * short_video_header; shortVideoHeader==1 selects linear intra DC + * mode, and shortVideoHeader==0 selects non linear intra DC mode. + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after the block is decoded, so + * that it points to the current byte in the bit stream buffer + * pBitOffset - *pBitOffset is updated so that it points to the current bit + * position in the byte pointed by *ppBitStream + * pDst - pointer to the decoded residual buffer (a contiguous array of 64 + * elements of OMX_S16 data type); must be aligned on a 16-byte + * boundary. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments, if: + * - At least one of the following pointers is Null: + * ppBitStream, *ppBitStream, pBitOffset , pDst + * - *pBitOffset exceeds [0,7] + * - QP <= 0. + * - pDst is not 16-byte aligned + * OMX_Sts_Err - status error. Refer to OMX_Sts_Err of DecodeVLCZigzag_Inter . + * + */ +OMXResult omxVCM4P2_DecodeBlockCoef_Inter ( + const OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + OMX_S16 *pDst, + OMX_INT QP, + OMX_INT shortVideoHeader +); + + + +/** + * Function: omxVCM4P2_PredictReconCoefIntra (6.2.5.4.3) + * + * Description: + * Performs adaptive DC/AC coefficient prediction for an intra block. Prior + * to the function call, prediction direction (predDir) should be selected as + * specified in [ISO14496-2], subclause 7.4.3.1. + * + * Input Arguments: + * + * pSrcDst - pointer to the coefficient buffer which contains the quantized + * coefficient residuals (PQF) of the current block; must be + * aligned on a 4-byte boundary. The output coefficients are + * saturated to the range [-2048, 2047]. + * pPredBufRow - pointer to the coefficient row buffer; must be aligned on + * a 4-byte boundary. + * pPredBufCol - pointer to the coefficient column buffer; must be aligned + * on a 4-byte boundary. + * curQP - quantization parameter of the current block. curQP may equal to + * predQP especially when the current block and the predictor block + * are in the same macroblock. + * predQP - quantization parameter of the predictor block + * predDir - indicates the prediction direction which takes one of the + * following values: OMX_VC_HORIZONTAL - predict horizontally + * OMX_VC_VERTICAL - predict vertically + * ACPredFlag - a flag indicating if AC prediction should be performed. It + * is equal to ac_pred_flag in the bit stream syntax of MPEG-4 + * videoComp - video component type (luminance or chrominance) of the + * current block + * + * Output Arguments: + * + * pSrcDst - pointer to the coefficient buffer which contains the quantized + * coefficients (QF) of the current block + * pPredBufRow - pointer to the updated coefficient row buffer + * pPredBufCol - pointer to the updated coefficient column buffer Note: + * Buffer update: Update the AC prediction buffer (both row and + * column buffer). + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments, if: + * - At least one of the pointers is NULL: + * pSrcDst, pPredBufRow, or pPredBufCol. + * - curQP <= 0, + * - predQP <= 0, + * - curQP >31, + * - predQP > 31, + * - preDir exceeds [1,2] + * - pSrcDst, pPredBufRow, or pPredBufCol is not 4-byte aligned. + * + */ +OMXResult omxVCM4P2_PredictReconCoefIntra ( + OMX_S16 *pSrcDst, + OMX_S16 *pPredBufRow, + OMX_S16 *pPredBufCol, + OMX_INT curQP, + OMX_INT predQP, + OMX_INT predDir, + OMX_INT ACPredFlag, + OMXVCM4P2VideoComponent videoComp +); + + + +/** + * Function: omxVCM4P2_MCReconBlock (6.2.5.5.1) + * + * Description: + * Performs motion compensation prediction for an 8x8 block using + * interpolation described in [ISO14496-2], subclause 7.6.2. + * + * Input Arguments: + * + * pSrc - pointer to the block in the reference plane. + * srcStep - distance between the start of consecutive lines in the + * reference plane, in bytes; must be a multiple of 8. + * dstStep - distance between the start of consecutive lines in the + * destination plane, in bytes; must be a multiple of 8. + * pSrcResidue - pointer to a buffer containing the 16-bit prediction + * residuals; must be 16-byte aligned. If the pointer is NULL, then + * no prediction is done, only motion compensation, i.e., the block + * is moved with interpolation. + * predictType - bilinear interpolation type, as defined in section + * 6.2.1.2. + * rndVal - rounding control parameter: 0 - disabled; 1 - enabled. + * + * Output Arguments: + * + * pDst - pointer to the destination buffer; must be 8-byte aligned. If + * prediction residuals are added then output intensities are + * clipped to the range [0,255]. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned under any of the following + * conditions: + * - pDst is not 8-byte aligned. + * - pSrcResidue is not 16-byte aligned. + * - one or more of the following pointers is NULL: pSrc or pDst. + * - either srcStep or dstStep is not a multiple of 8. + * - invalid type specified for the parameter predictType. + * - the parameter rndVal is not equal either to 0 or 1. + * + */ +OMXResult omxVCM4P2_MCReconBlock ( + const OMX_U8 *pSrc, + OMX_INT srcStep, + const OMX_S16 *pSrcResidue, + OMX_U8 *pDst, + OMX_INT dstStep, + OMX_INT predictType, + OMX_INT rndVal +); + + + +/* 6.3.1.1 Intra 16x16 Prediction Modes */ +/* A data type that enumerates intra_16x16 macroblock prediction modes is defined as follows: */ + +typedef enum { + OMX_VC_16X16_VERT = 0, /** Intra_16x16_Vertical */ + OMX_VC_16X16_HOR = 1, /** Intra_16x16_Horizontal */ + OMX_VC_16X16_DC = 2, /** Intra_16x16_DC */ + OMX_VC_16X16_PLANE = 3 /** Intra_16x16_Plane */ +} OMXVCM4P10Intra16x16PredMode; + + + +/* 6.3.1.2 Intra 4x4 Prediction Modes */ +/* A data type that enumerates intra_4x4 macroblock prediction modes is defined as follows: */ + +typedef enum { + OMX_VC_4X4_VERT = 0, /** Intra_4x4_Vertical */ + OMX_VC_4X4_HOR = 1, /** Intra_4x4_Horizontal */ + OMX_VC_4X4_DC = 2, /** Intra_4x4_DC */ + OMX_VC_4X4_DIAG_DL = 3, /** Intra_4x4_Diagonal_Down_Left */ + OMX_VC_4X4_DIAG_DR = 4, /** Intra_4x4_Diagonal_Down_Right */ + OMX_VC_4X4_VR = 5, /** Intra_4x4_Vertical_Right */ + OMX_VC_4X4_HD = 6, /** Intra_4x4_Horizontal_Down */ + OMX_VC_4X4_VL = 7, /** Intra_4x4_Vertical_Left */ + OMX_VC_4X4_HU = 8 /** Intra_4x4_Horizontal_Up */ +} OMXVCM4P10Intra4x4PredMode; + + + +/* 6.3.1.3 Chroma Prediction Modes */ +/* A data type that enumerates intra chroma prediction modes is defined as follows: */ + +typedef enum { + OMX_VC_CHROMA_DC = 0, /** Intra_Chroma_DC */ + OMX_VC_CHROMA_HOR = 1, /** Intra_Chroma_Horizontal */ + OMX_VC_CHROMA_VERT = 2, /** Intra_Chroma_Vertical */ + OMX_VC_CHROMA_PLANE = 3 /** Intra_Chroma_Plane */ +} OMXVCM4P10IntraChromaPredMode; + + + +/* 6.3.1.4 Motion Estimation Modes */ +/* A data type that enumerates H.264 motion estimation modes is defined as follows: */ + +typedef enum { + OMX_VC_M4P10_FAST_SEARCH = 0, /** Fast motion search */ + OMX_VC_M4P10_FULL_SEARCH = 1 /** Full motion search */ +} OMXVCM4P10MEMode; + + + +/* 6.3.1.5 Macroblock Types */ +/* A data type that enumerates H.264 macroblock types is defined as follows: */ + +typedef enum { + OMX_VC_P_16x16 = 0, /* defined by [ISO14496-10] */ + OMX_VC_P_16x8 = 1, + OMX_VC_P_8x16 = 2, + OMX_VC_P_8x8 = 3, + OMX_VC_PREF0_8x8 = 4, + OMX_VC_INTER_SKIP = 5, + OMX_VC_INTRA_4x4 = 8, + OMX_VC_INTRA_16x16 = 9, + OMX_VC_INTRA_PCM = 10 +} OMXVCM4P10MacroblockType; + + + +/* 6.3.1.6 Sub-Macroblock Types */ +/* A data type that enumerates H.264 sub-macroblock types is defined as follows: */ + +typedef enum { + OMX_VC_SUB_P_8x8 = 0, /* defined by [ISO14496-10] */ + OMX_VC_SUB_P_8x4 = 1, + OMX_VC_SUB_P_4x8 = 2, + OMX_VC_SUB_P_4x4 = 3 +} OMXVCM4P10SubMacroblockType; + + + +/* 6.3.1.7 Variable Length Coding (VLC) Information */ + +typedef struct { + OMX_U8 uTrailing_Ones; /* Trailing ones; 3 at most */ + OMX_U8 uTrailing_One_Signs; /* Trailing ones signal */ + OMX_U8 uNumCoeffs; /* Total number of non-zero coefs, including trailing ones */ + OMX_U8 uTotalZeros; /* Total number of zero coefs */ + OMX_S16 iLevels[16]; /* Levels of non-zero coefs, in reverse zig-zag order */ + OMX_U8 uRuns[16]; /* Runs for levels and trailing ones, in reverse zig-zag order */ +} OMXVCM4P10VLCInfo; + + + +/* 6.3.1.8 Macroblock Information */ + +typedef struct { + OMX_S32 sliceId; /* slice number */ + OMXVCM4P10MacroblockType mbType; /* MB type */ + OMXVCM4P10SubMacroblockType subMBType[4]; /* sub-block type */ + OMX_S32 qpy; /* qp for luma */ + OMX_S32 qpc; /* qp for chroma */ + OMX_U32 cbpy; /* CBP Luma */ + OMX_U32 cbpc; /* CBP Chroma */ + OMXVCMotionVector pMV0[4][4]; /* motion vector, represented using 1/4-pel units, pMV0[blocky][blockx] (blocky = 0~3, blockx =0~3) */ + OMXVCMotionVector pMVPred[4][4]; /* motion vector prediction, Represented using 1/4-pel units, pMVPred[blocky][blockx] (blocky = 0~3, blockx = 0~3) */ + OMX_U8 pRefL0Idx[4]; /* reference picture indices */ + OMXVCM4P10Intra16x16PredMode Intra16x16PredMode; /* best intra 16x16 prediction mode */ + OMXVCM4P10Intra4x4PredMode pIntra4x4PredMode[16]; /* best intra 4x4 prediction mode for each block, pMV0 indexed as above */ +} OMXVCM4P10MBInfo, *OMXVCM4P10MBInfoPtr; + + + +/* 6.3.1.9 Motion Estimation Parameters */ + +typedef struct { + OMX_S32 blockSplitEnable8x8; /* enables 16x8, 8x16, 8x8 */ + OMX_S32 blockSplitEnable4x4; /* enable splitting of 8x4, 4x8, 4x4 blocks */ + OMX_S32 halfSearchEnable; + OMX_S32 quarterSearchEnable; + OMX_S32 intraEnable4x4; /* 1=enable, 0=disable */ + OMX_S32 searchRange16x16; /* integer pixel units */ + OMX_S32 searchRange8x8; + OMX_S32 searchRange4x4; +} OMXVCM4P10MEParams; + + + +/** + * Function: omxVCM4P10_PredictIntra_4x4 (6.3.3.1.1) + * + * Description: + * Perform Intra_4x4 prediction for luma samples. If the upper-right block is + * not available, then duplication work should be handled inside the function. + * Users need not define them outside. + * + * Input Arguments: + * + * pSrcLeft - Pointer to the buffer of 4 left pixels: + * p[x, y] (x = -1, y = 0..3) + * pSrcAbove - Pointer to the buffer of 8 above pixels: + * p[x,y] (x = 0..7, y =-1); + * must be aligned on a 4-byte boundary. + * pSrcAboveLeft - Pointer to the above left pixels: p[x,y] (x = -1, y = -1) + * leftStep - Step of left pixel buffer; must be a multiple of 4. + * dstStep - Step of the destination buffer; must be a multiple of 4. + * predMode - Intra_4x4 prediction mode. + * availability - Neighboring 4x4 block availability flag, refer to + * "Neighboring Macroblock Availability" . + * + * Output Arguments: + * + * pDst - Pointer to the destination buffer; must be aligned on a 4-byte + * boundary. + * + * Return Value: + * If the function runs without error, it returns OMX_Sts_NoErr. + * If one of the following cases occurs, the function returns + * OMX_Sts_BadArgErr: + * pDst is NULL. + * dstStep < 4, or dstStep is not a multiple of 4. + * leftStep is not a multiple of 4. + * predMode is not in the valid range of enumeration + * OMXVCM4P10Intra4x4PredMode. + * predMode is OMX_VC_4x4_VERT, but availability doesn't set OMX_VC_UPPER + * indicating p[x,-1] (x = 0..3) is not available. + * predMode is OMX_VC_4x4_HOR, but availability doesn't set OMX_VC_LEFT + * indicating p[-1,y] (y = 0..3) is not available. + * predMode is OMX_VC_4x4_DIAG_DL, but availability doesn't set + * OMX_VC_UPPER indicating p[x, -1] (x = 0..3) is not available. + * predMode is OMX_VC_4x4_DIAG_DR, but availability doesn't set + * OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating + * p[x,-1] (x = 0..3), or p[-1,y] (y = 0..3) or p[-1,-1] is not + * available. + * predMode is OMX_VC_4x4_VR, but availability doesn't set + * OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating + * p[x,-1] (x = 0..3), or p[-1,y] (y = 0..3) or p[-1,-1] is not + * available. + * predMode is OMX_VC_4x4_HD, but availability doesn't set + * OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating + * p[x,-1] (x = 0..3), or p[-1,y] (y = 0..3) or p[-1,-1] is not + * available. + * predMode is OMX_VC_4x4_VL, but availability doesn't set OMX_VC_UPPER + * indicating p[x,-1] (x = 0..3) is not available. + * predMode is OMX_VC_4x4_HU, but availability doesn't set OMX_VC_LEFT + * indicating p[-1,y] (y = 0..3) is not available. + * availability sets OMX_VC_UPPER, but pSrcAbove is NULL. + * availability sets OMX_VC_LEFT, but pSrcLeft is NULL. + * availability sets OMX_VC_UPPER_LEFT, but pSrcAboveLeft is NULL. + * either pSrcAbove or pDst is not aligned on a 4-byte boundary. + * + * Note: + * pSrcAbove, pSrcAbove, pSrcAboveLeft may be invalid pointers if + * they are not used by intra prediction as implied in predMode. + * + */ +OMXResult omxVCM4P10_PredictIntra_4x4 ( + const OMX_U8 *pSrcLeft, + const OMX_U8 *pSrcAbove, + const OMX_U8 *pSrcAboveLeft, + OMX_U8 *pDst, + OMX_INT leftStep, + OMX_INT dstStep, + OMXVCM4P10Intra4x4PredMode predMode, + OMX_S32 availability +); + + + +/** + * Function: omxVCM4P10_PredictIntra_16x16 (6.3.3.1.2) + * + * Description: + * Perform Intra_16x16 prediction for luma samples. If the upper-right block + * is not available, then duplication work should be handled inside the + * function. Users need not define them outside. + * + * Input Arguments: + * + * pSrcLeft - Pointer to the buffer of 16 left pixels: p[x, y] (x = -1, y = + * 0..15) + * pSrcAbove - Pointer to the buffer of 16 above pixels: p[x,y] (x = 0..15, + * y= -1); must be aligned on a 16-byte boundary. + * pSrcAboveLeft - Pointer to the above left pixels: p[x,y] (x = -1, y = -1) + * leftStep - Step of left pixel buffer; must be a multiple of 16. + * dstStep - Step of the destination buffer; must be a multiple of 16. + * predMode - Intra_16x16 prediction mode, please refer to section 3.4.1. + * availability - Neighboring 16x16 MB availability flag. Refer to + * section 3.4.4. + * + * Output Arguments: + * + * pDst -Pointer to the destination buffer; must be aligned on a 16-byte + * boundary. + * + * Return Value: + * If the function runs without error, it returns OMX_Sts_NoErr. + * If one of the following cases occurs, the function returns + * OMX_Sts_BadArgErr: + * pDst is NULL. + * dstStep < 16. or dstStep is not a multiple of 16. + * leftStep is not a multiple of 16. + * predMode is not in the valid range of enumeration + * OMXVCM4P10Intra16x16PredMode + * predMode is OMX_VC_16X16_VERT, but availability doesn't set + * OMX_VC_UPPER indicating p[x,-1] (x = 0..15) is not available. + * predMode is OMX_VC_16X16_HOR, but availability doesn't set OMX_VC_LEFT + * indicating p[-1,y] (y = 0..15) is not available. + * predMode is OMX_VC_16X16_PLANE, but availability doesn't set + * OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating + * p[x,-1](x = 0..15), or p[-1,y] (y = 0..15), or p[-1,-1] is not + * available. + * availability sets OMX_VC_UPPER, but pSrcAbove is NULL. + * availability sets OMX_VC_LEFT, but pSrcLeft is NULL. + * availability sets OMX_VC_UPPER_LEFT, but pSrcAboveLeft is NULL. + * either pSrcAbove or pDst is not aligned on a 16-byte boundary. + * + * Note: + * pSrcAbove, pSrcAbove, pSrcAboveLeft may be invalid pointers if + * they are not used by intra prediction implied in predMode. + * Note: + * OMX_VC_UPPER_RIGHT is not used in intra_16x16 luma prediction. + * + */ +OMXResult omxVCM4P10_PredictIntra_16x16 ( + const OMX_U8 *pSrcLeft, + const OMX_U8 *pSrcAbove, + const OMX_U8 *pSrcAboveLeft, + OMX_U8 *pDst, + OMX_INT leftStep, + OMX_INT dstStep, + OMXVCM4P10Intra16x16PredMode predMode, + OMX_S32 availability +); + + + +/** + * Function: omxVCM4P10_PredictIntraChroma_8x8 (6.3.3.1.3) + * + * Description: + * Performs intra prediction for chroma samples. + * + * Input Arguments: + * + * pSrcLeft - Pointer to the buffer of 8 left pixels: p[x, y] (x = -1, y= + * 0..7). + * pSrcAbove - Pointer to the buffer of 8 above pixels: p[x,y] (x = 0..7, y + * = -1); must be aligned on an 8-byte boundary. + * pSrcAboveLeft - Pointer to the above left pixels: p[x,y] (x = -1, y = -1) + * leftStep - Step of left pixel buffer; must be a multiple of 8. + * dstStep - Step of the destination buffer; must be a multiple of 8. + * predMode - Intra chroma prediction mode, please refer to section 3.4.3. + * availability - Neighboring chroma block availability flag, please refer + * to "Neighboring Macroblock Availability". + * + * Output Arguments: + * + * pDst - Pointer to the destination buffer; must be aligned on an 8-byte + * boundary. + * + * Return Value: + * If the function runs without error, it returns OMX_Sts_NoErr. + * If any of the following cases occurs, the function returns + * OMX_Sts_BadArgErr: + * pDst is NULL. + * dstStep < 8 or dstStep is not a multiple of 8. + * leftStep is not a multiple of 8. + * predMode is not in the valid range of enumeration + * OMXVCM4P10IntraChromaPredMode. + * predMode is OMX_VC_CHROMA_VERT, but availability doesn't set + * OMX_VC_UPPER indicating p[x,-1] (x = 0..7) is not available. + * predMode is OMX_VC_CHROMA_HOR, but availability doesn't set OMX_VC_LEFT + * indicating p[-1,y] (y = 0..7) is not available. + * predMode is OMX_VC_CHROMA_PLANE, but availability doesn't set + * OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating + * p[x,-1](x = 0..7), or p[-1,y] (y = 0..7), or p[-1,-1] is not + * available. + * availability sets OMX_VC_UPPER, but pSrcAbove is NULL. + * availability sets OMX_VC_LEFT, but pSrcLeft is NULL. + * availability sets OMX_VC_UPPER_LEFT, but pSrcAboveLeft is NULL. + * either pSrcAbove or pDst is not aligned on a 8-byte boundary. + * + * Note: pSrcAbove, pSrcAbove, pSrcAboveLeft may be invalid pointer if + * they are not used by intra prediction implied in predMode. + * + * Note: OMX_VC_UPPER_RIGHT is not used in intra chroma prediction. + * + */ +OMXResult omxVCM4P10_PredictIntraChroma_8x8 ( + const OMX_U8 *pSrcLeft, + const OMX_U8 *pSrcAbove, + const OMX_U8 *pSrcAboveLeft, + OMX_U8 *pDst, + OMX_INT leftStep, + OMX_INT dstStep, + OMXVCM4P10IntraChromaPredMode predMode, + OMX_S32 availability +); + + + +/** + * Function: omxVCM4P10_InterpolateLuma (6.3.3.2.1) + * + * Description: + * Performs quarter-pixel interpolation for inter luma MB. It is assumed that + * the frame is already padded when calling this function. + * + * Input Arguments: + * + * pSrc - Pointer to the source reference frame buffer + * srcStep - reference frame step, in bytes; must be a multiple of roi.width + * dstStep - destination frame step, in bytes; must be a multiple of + * roi.width + * dx - Fractional part of horizontal motion vector component in 1/4 pixel + * unit; valid in the range [0,3] + * dy - Fractional part of vertical motion vector y component in 1/4 pixel + * unit; valid in the range [0,3] + * roi - Dimension of the interpolation region; the parameters roi.width and + * roi.height must be equal to either 4, 8, or 16. + * + * Output Arguments: + * + * pDst - Pointer to the destination frame buffer: + * if roi.width==4, 4-byte alignment required + * if roi.width==8, 8-byte alignment required + * if roi.width==16, 16-byte alignment required + * + * Return Value: + * If the function runs without error, it returns OMX_Sts_NoErr. + * If one of the following cases occurs, the function returns + * OMX_Sts_BadArgErr: + * pSrc or pDst is NULL. + * srcStep or dstStep < roi.width. + * dx or dy is out of range [0,3]. + * roi.width or roi.height is out of range {4, 8, 16}. + * roi.width is equal to 4, but pDst is not 4 byte aligned. + * roi.width is equal to 8 or 16, but pDst is not 8 byte aligned. + * srcStep or dstStep is not a multiple of 8. + * + */ +OMXResult omxVCM4P10_InterpolateLuma ( + const OMX_U8 *pSrc, + OMX_S32 srcStep, + OMX_U8 *pDst, + OMX_S32 dstStep, + OMX_S32 dx, + OMX_S32 dy, + OMXSize roi +); + + + +/** + * Function: omxVCM4P10_InterpolateChroma (6.3.3.2.2) + * + * Description: + * Performs 1/8-pixel interpolation for inter chroma MB. + * + * Input Arguments: + * + * pSrc -Pointer to the source reference frame buffer + * srcStep -Reference frame step in bytes + * dstStep -Destination frame step in bytes; must be a multiple of + * roi.width. + * dx -Fractional part of horizontal motion vector component in 1/8 pixel + * unit; valid in the range [0,7] + * dy -Fractional part of vertical motion vector component in 1/8 pixel + * unit; valid in the range [0,7] + * roi -Dimension of the interpolation region; the parameters roi.width and + * roi.height must be equal to either 2, 4, or 8. + * + * Output Arguments: + * + * pDst -Pointer to the destination frame buffer: + * if roi.width==2, 2-byte alignment required + * if roi.width==4, 4-byte alignment required + * if roi.width==8, 8-byte alignment required + * + * Return Value: + * If the function runs without error, it returns OMX_Sts_NoErr. + * If one of the following cases occurs, the function returns + * OMX_Sts_BadArgErr: + * pSrc or pDst is NULL. + * srcStep or dstStep < 8. + * dx or dy is out of range [0-7]. + * roi.width or roi.height is out of range {2,4,8}. + * roi.width is equal to 2, but pDst is not 2-byte aligned. + * roi.width is equal to 4, but pDst is not 4-byte aligned. + * roi.width is equal to 8, but pDst is not 8 byte aligned. + * srcStep or dstStep is not a multiple of 8. + * + */ +OMXResult omxVCM4P10_InterpolateChroma ( + const OMX_U8 *pSrc, + OMX_S32 srcStep, + OMX_U8 *pDst, + OMX_S32 dstStep, + OMX_S32 dx, + OMX_S32 dy, + OMXSize roi +); + + + +/** + * Function: omxVCM4P10_FilterDeblockingLuma_VerEdge_I (6.3.3.3.1) + * + * Description: + * Performs in-place deblock filtering on four vertical edges of the luma + * macroblock (16x16). + * + * Input Arguments: + * + * pSrcDst - Pointer to the input macroblock; must be 16-byte aligned. + * srcdstStep -Step of the arrays; must be a multiple of 16. + * pAlpha -Array of size 2 of alpha thresholds (the first item is the alpha + * threshold for the external vertical edge, and the second item is + * for the internal vertical edge); per [ISO14496-10] alpha values + * must be in the range [0,255]. + * pBeta -Array of size 2 of beta thresholds (the first item is the beta + * threshold for the external vertical edge, and the second item is + * for the internal vertical edge); per [ISO14496-10] beta values + * must be in the range [0,18]. + * pThresholds -Array of size 16 of Thresholds (TC0) (values for the left + * edge of each 4x4 block, arranged in vertical block order); must + * be aligned on a 4-byte boundary.. Per [ISO14496-10] values must + * be in the range [0,25]. + * pBS -Array of size 16 of BS parameters (arranged in vertical block + * order); valid in the range [0,4] with the following + * restrictions: i) pBS[i]== 4 may occur only for 0<=i<=3, ii) + * pBS[i]== 4 if and only if pBS[i^3]== 4. Must be 4-byte aligned. + * + * Output Arguments: + * + * pSrcDst -Pointer to filtered output macroblock. + * + * Return Value: + * If the function runs without error, it returns OMX_Sts_NoErr. + * If one of the following cases occurs, the function returns + * OMX_Sts_BadArgErr: + * Either of the pointers in pSrcDst, pAlpha, pBeta, pThresholds, or pBS + * is NULL. + * Either pThresholds or pBS is not aligned on a 4-byte boundary. + * pSrcDst is not 16-byte aligned. + * srcdstStep is not a multiple of 16. + * pAlpha[0] and/or pAlpha[1] is outside the range [0,255]. + * pBeta[0] and/or pBeta[1] is outside the range [0,18]. + * One or more entries in the table pThresholds[0..15]is outside of the + * range [0,25]. + * pBS is out of range, i.e., one of the following conditions is true: + * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or (pBS[i]==4 && + * pBS[i^3]!=4) for 0<=i<=3. + * + */ +OMXResult omxVCM4P10_FilterDeblockingLuma_VerEdge_I ( + OMX_U8 *pSrcDst, + OMX_S32 srcdstStep, + const OMX_U8 *pAlpha, + const OMX_U8 *pBeta, + const OMX_U8 *pThresholds, + const OMX_U8 *pBS +); + + + +/** + * Function: omxVCM4P10_FilterDeblockingLuma_HorEdge_I (6.3.3.3.2) + * + * Description: + * Performs in-place deblock filtering on four horizontal edges of the luma + * macroblock (16x16). + * + * Input Arguments: + * + * pSrcDst - pointer to the input macroblock; must be 16-byte aligned. + * srcdstStep - step of the arrays; must be a multiple of 16. + * pAlpha - array of size 2 of alpha thresholds (the first item is the alpha + * threshold for the external vertical edge, and the second item is + * for the internal horizontal edge); per [ISO14496-10] alpha + * values must be in the range [0,255]. + * pBeta - array of size 2 of beta thresholds (the first item is the beta + * threshold for the external horizontal edge, and the second item + * is for the internal horizontal edge). Per [ISO14496-10] beta + * values must be in the range [0,18]. + * pThresholds - array of size 16 containing thresholds, TC0, for the top + * horizontal edge of each 4x4 block, arranged in horizontal block + * order; must be aligned on a 4-byte boundary. Per [ISO14496 10] + * values must be in the range [0,25]. + * pBS - array of size 16 of BS parameters (arranged in horizontal block + * order); valid in the range [0,4] with the following + * restrictions: i) pBS[i]== 4 may occur only for 0<=i<=3, ii) + * pBS[i]== 4 if and only if pBS[i^3]== 4. Must be 4-byte aligned. + * + * Output Arguments: + * + * pSrcDst -Pointer to filtered output macroblock. + * + * Return Value: + * + * OMX_Sts_NoErr, if the function runs without error. + * + * OMX_Sts_BadArgErr, if one of the following cases occurs: + * - one or more of the following pointers is NULL: pSrcDst, pAlpha, + * pBeta, pThresholds, or pBS. + * - either pThresholds or pBS is not aligned on a 4-byte boundary. + * - pSrcDst is not 16-byte aligned. + * - srcdstStep is not a multiple of 16. + * - pAlpha[0] and/or pAlpha[1] is outside the range [0,255]. + * - pBeta[0] and/or pBeta[1] is outside the range [0,18]. + * - One or more entries in the table pThresholds[0..15] is + * outside of the range [0,25]. + * - pBS is out of range, i.e., one of the following conditions is true: + * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or + * (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3. + * + */ +OMXResult omxVCM4P10_FilterDeblockingLuma_HorEdge_I ( + OMX_U8 *pSrcDst, + OMX_S32 srcdstStep, + const OMX_U8 *pAlpha, + const OMX_U8 *pBeta, + const OMX_U8 *pThresholds, + const OMX_U8 *pBS +); + + + +/** + * Function: omxVCM4P10_FilterDeblockingChroma_VerEdge_I (6.3.3.3.3) + * + * Description: + * Performs in-place deblock filtering on four vertical edges of the chroma + * macroblock (8x8). + * + * Input Arguments: + * + * pSrcDst - Pointer to the input macroblock; must be 8-byte aligned. + * srcdstStep - Step of the arrays; must be a multiple of 8. + * pAlpha - Array of size 2 of alpha thresholds (the first item is alpha + * threshold for external vertical edge, and the second item is for + * internal vertical edge); per [ISO14496-10] alpha values must be + * in the range [0,255]. + * pBeta - Array of size 2 of beta thresholds (the first item is the beta + * threshold for the external vertical edge, and the second item is + * for the internal vertical edge); per [ISO14496-10] beta values + * must be in the range [0,18]. + * pThresholds - Array of size 8 containing thresholds, TC0, for the left + * vertical edge of each 4x2 chroma block, arranged in vertical + * block order; must be aligned on a 4-byte boundary. Per + * [ISO14496-10] values must be in the range [0,25]. + * pBS - Array of size 16 of BS parameters (values for each 2x2 chroma + * block, arranged in vertical block order). This parameter is the + * same as the pBS parameter passed into FilterDeblockLuma_VerEdge; + * valid in the range [0,4] with the following restrictions: i) + * pBS[i]== 4 may occur only for 0<=i<=3, ii) pBS[i]== 4 if and + * only if pBS[i^3]== 4. Must be 4 byte aligned. + * + * Output Arguments: + * + * pSrcDst -Pointer to filtered output macroblock. + * + * Return Value: + * + * OMX_Sts_NoErr, if the function runs without error. + * + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - one or more of the following pointers is NULL: pSrcDst, pAlpha, + * pBeta, pThresholds, or pBS. + * - pSrcDst is not 8-byte aligned. + * - srcdstStep is not a multiple of 8. + * - pThresholds is not 4-byte aligned. + * - pAlpha[0] and/or pAlpha[1] is outside the range [0,255]. + * - pBeta[0] and/or pBeta[1] is outside the range [0,18]. + * - One or more entries in the table pThresholds[0..7] is outside + * of the range [0,25]. + * - pBS is out of range, i.e., one of the following conditions is true: + * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or + * (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3. + * - pBS is not 4-byte aligned. + * + */ +OMXResult omxVCM4P10_FilterDeblockingChroma_VerEdge_I ( + OMX_U8 *pSrcDst, + OMX_S32 srcdstStep, + const OMX_U8 *pAlpha, + const OMX_U8 *pBeta, + const OMX_U8 *pThresholds, + const OMX_U8 *pBS +); + + + +/** + * Function: omxVCM4P10_FilterDeblockingChroma_HorEdge_I (6.3.3.3.4) + * + * Description: + * Performs in-place deblock filtering on the horizontal edges of the chroma + * macroblock (8x8). + * + * Input Arguments: + * + * pSrcDst - pointer to the input macroblock; must be 8-byte aligned. + * srcdstStep - array step; must be a multiple of 8. + * pAlpha - array of size 2 containing alpha thresholds; the first element + * contains the threshold for the external horizontal edge, and the + * second element contains the threshold for internal horizontal + * edge. Per [ISO14496-10] alpha values must be in the range + * [0,255]. + * pBeta - array of size 2 containing beta thresholds; the first element + * contains the threshold for the external horizontal edge, and the + * second element contains the threshold for the internal + * horizontal edge. Per [ISO14496-10] beta values must be in the + * range [0,18]. + * pThresholds - array of size 8 containing thresholds, TC0, for the top + * horizontal edge of each 2x4 chroma block, arranged in horizontal + * block order; must be aligned on a 4-byte boundary. Per + * [ISO14496-10] values must be in the range [0,25]. + * pBS - array of size 16 containing BS parameters for each 2x2 chroma + * block, arranged in horizontal block order; valid in the range + * [0,4] with the following restrictions: i) pBS[i]== 4 may occur + * only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^3]== 4. + * Must be 4-byte aligned. + * + * Output Arguments: + * + * pSrcDst -Pointer to filtered output macroblock. + * + * Return Value: + * + * OMX_Sts_NoErr, if the function runs without error. + * + * OMX_Sts_BadArgErr, if one of the following cases occurs: + * - any of the following pointers is NULL: + * pSrcDst, pAlpha, pBeta, pThresholds, or pBS. + * - pSrcDst is not 8-byte aligned. + * - srcdstStep is not a multiple of 8. + * - pThresholds is not 4-byte aligned. + * - pAlpha[0] and/or pAlpha[1] is outside the range [0,255]. + * - pBeta[0] and/or pBeta[1] is outside the range [0,18]. + * - One or more entries in the table pThresholds[0..7] is outside + * of the range [0,25]. + * - pBS is out of range, i.e., one of the following conditions is true: + * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or + * (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3. + * - pBS is not 4-byte aligned. + * + */ +OMXResult omxVCM4P10_FilterDeblockingChroma_HorEdge_I ( + OMX_U8 *pSrcDst, + OMX_S32 srcdstStep, + const OMX_U8 *pAlpha, + const OMX_U8 *pBeta, + const OMX_U8 *pThresholds, + const OMX_U8 *pBS +); + + + +/** + * Function: omxVCM4P10_DeblockLuma_I (6.3.3.3.5) + * + * Description: + * This function performs in-place deblock filtering the horizontal and + * vertical edges of a luma macroblock (16x16). + * + * Input Arguments: + * + * pSrcDst - pointer to the input macroblock; must be 16-byte aligned. + * srcdstStep - image width; must be a multiple of 16. + * pAlpha - pointer to a 2x2 table of alpha thresholds, organized as + * follows: {external vertical edge, internal vertical edge, + * external horizontal edge, internal horizontal edge }. Per + * [ISO14496-10] alpha values must be in the range [0,255]. + * pBeta - pointer to a 2x2 table of beta thresholds, organized as follows: + * {external vertical edge, internal vertical edge, external + * horizontal edge, internal horizontal edge }. Per [ISO14496-10] + * beta values must be in the range [0,18]. + * pThresholds - pointer to a 16x2 table of threshold (TC0), organized as + * follows: {values for the left or above edge of each 4x4 block, + * arranged in vertical block order and then in horizontal block + * order}; must be aligned on a 4-byte boundary. Per [ISO14496-10] + * values must be in the range [0,25]. + * pBS - pointer to a 16x2 table of BS parameters arranged in scan block + * order for vertical edges and then horizontal edges; valid in the + * range [0,4] with the following restrictions: i) pBS[i]== 4 may + * occur only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^3]== + * 4. Must be 4-byte aligned. + * + * Output Arguments: + * + * pSrcDst - pointer to filtered output macroblock. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments + * - one or more of the following pointers is NULL: pSrcDst, pAlpha, + * pBeta, pThresholds or pBS. + * - pSrcDst is not 16-byte aligned. + * - either pThresholds or pBS is not aligned on a 4-byte boundary. + * - one or more entries in the table pAlpha[0..3] is outside the range + * [0,255]. + * - one or more entries in the table pBeta[0..3] is outside the range + * [0,18]. + * - one or more entries in the table pThresholds[0..31]is outside of + * the range [0,25]. + * - pBS is out of range, i.e., one of the following conditions is true: + * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or + * (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3. + * - srcdstStep is not a multiple of 16. + * + */ +OMXResult omxVCM4P10_DeblockLuma_I ( + OMX_U8 *pSrcDst, + OMX_S32 srcdstStep, + const OMX_U8 *pAlpha, + const OMX_U8 *pBeta, + const OMX_U8 *pThresholds, + const OMX_U8 *pBS +); + + + +/** + * Function: omxVCM4P10_DeblockChroma_I (6.3.3.3.6) + * + * Description: + * Performs in-place deblocking filtering on all edges of the chroma + * macroblock (16x16). + * + * Input Arguments: + * + * pSrcDst - pointer to the input macroblock; must be 8-byte aligned. + * srcdstStep - step of the arrays; must be a multiple of 8. + * pAlpha - pointer to a 2x2 array of alpha thresholds, organized as + * follows: {external vertical edge, internal vertical edge, + * external horizontal edge, internal horizontal edge }. Per + * [ISO14496-10] alpha values must be in the range [0,255]. + * pBeta - pointer to a 2x2 array of Beta Thresholds, organized as follows: + * { external vertical edge, internal vertical edge, external + * horizontal edge, internal horizontal edge }. Per [ISO14496-10] + * beta values must be in the range [0,18]. + * pThresholds - array of size 8x2 of Thresholds (TC0) (values for the left + * or above edge of each 4x2 or 2x4 block, arranged in vertical + * block order and then in horizontal block order); must be aligned + * on a 4-byte boundary. Per [ISO14496-10] values must be in the + * range [0,25]. + * pBS - array of size 16x2 of BS parameters (arranged in scan block order + * for vertical edges and then horizontal edges); valid in the + * range [0,4] with the following restrictions: i) pBS[i]== 4 may + * occur only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^3]== + * 4. Must be 4-byte aligned. + * + * Output Arguments: + * + * pSrcDst - pointer to filtered output macroblock. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments + * - one or more of the following pointers is NULL: pSrcDst, pAlpha, + * pBeta, pThresholds, or pBS. + * - pSrcDst is not 8-byte aligned. + * - either pThresholds or pBS is not 4-byte aligned. + * - one or more entries in the table pAlpha[0..3] is outside the range + * [0,255]. + * - one or more entries in the table pBeta[0..3] is outside the range + * [0,18]. + * - one or more entries in the table pThresholds[0..15]is outside of + * the range [0,25]. + * - pBS is out of range, i.e., one of the following conditions is true: + * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or + * (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3. + * - srcdstStep is not a multiple of 8. + * + */ +OMXResult omxVCM4P10_DeblockChroma_I ( + OMX_U8 *pSrcDst, + OMX_S32 srcdstStep, + const OMX_U8 *pAlpha, + const OMX_U8 *pBeta, + const OMX_U8 *pThresholds, + const OMX_U8 *pBS +); + + + +/** + * Function: omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC (6.3.4.1.1) + * + * Description: + * Performs CAVLC decoding and inverse raster scan for a 2x2 block of + * ChromaDCLevel. The decoded coefficients in the packed position-coefficient + * buffer are stored in reverse zig-zag order, i.e., the first buffer element + * contains the last non-zero postion-coefficient pair of the block. Within + * each position-coefficient pair, the position entry indicates the + * raster-scan position of the coefficient, while the coefficient entry + * contains the coefficient value. + * + * Input Arguments: + * + * ppBitStream - Double pointer to current byte in bit stream buffer + * pOffset - Pointer to current bit position in the byte pointed to by + * *ppBitStream; valid in the range [0,7]. + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after each block is decoded + * pOffset - *pOffset is updated after each block is decoded + * pNumCoeff - Pointer to the number of nonzero coefficients in this block + * ppPosCoefBuf - Double pointer to destination residual + * coefficient-position pair buffer. Buffer position + * (*ppPosCoefBuf) is updated upon return, unless there are only + * zero coefficients in the currently decoded block. In this case + * the caller is expected to bypass the transform/dequantization of + * the empty blocks. + * + * Return Value: + * + * OMX_Sts_NoErr, if the function runs without error. + * + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - ppBitStream or pOffset is NULL. + * - ppPosCoefBuf or pNumCoeff is NULL. + * OMX_Sts_Err - if one of the following is true: + * - an illegal code is encountered in the bitstream + * + */ +OMXResult omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC ( + const OMX_U8 **ppBitStream, + OMX_S32*pOffset, + OMX_U8 *pNumCoeff, + OMX_U8 **ppPosCoefbuf +); + + + +/** + * Function: omxVCM4P10_DecodeCoeffsToPairCAVLC (6.3.4.1.2) + * + * Description: + * Performs CAVLC decoding and inverse zigzag scan for 4x4 block of + * Intra16x16DCLevel, Intra16x16ACLevel, LumaLevel, and ChromaACLevel. Inverse + * field scan is not supported. The decoded coefficients in the packed + * position-coefficient buffer are stored in reverse zig-zag order, i.e., the + * first buffer element contains the last non-zero postion-coefficient pair of + * the block. Within each position-coefficient pair, the position entry + * indicates the raster-scan position of the coefficient, while the + * coefficient entry contains the coefficient value. + * + * Input Arguments: + * + * ppBitStream -Double pointer to current byte in bit stream buffer + * pOffset - Pointer to current bit position in the byte pointed to by + * *ppBitStream; valid in the range [0,7]. + * sMaxNumCoeff - Maximum the number of non-zero coefficients in current + * block + * sVLCSelect - VLC table selector, obtained from the number of non-zero + * coefficients contained in the above and left 4x4 blocks. It is + * equivalent to the variable nC described in H.264 standard table + * 9 5, except its value can t be less than zero. + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after each block is decoded. + * Buffer position (*ppPosCoefBuf) is updated upon return, unless + * there are only zero coefficients in the currently decoded block. + * In this case the caller is expected to bypass the + * transform/dequantization of the empty blocks. + * pOffset - *pOffset is updated after each block is decoded + * pNumCoeff - Pointer to the number of nonzero coefficients in this block + * ppPosCoefBuf - Double pointer to destination residual + * coefficient-position pair buffer + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - ppBitStream or pOffset is NULL. + * - ppPosCoefBuf or pNumCoeff is NULL. + * - sMaxNumCoeff is not equal to either 15 or 16. + * - sVLCSelect is less than 0. + * + * OMX_Sts_Err - if one of the following is true: + * - an illegal code is encountered in the bitstream + * + */ +OMXResult omxVCM4P10_DecodeCoeffsToPairCAVLC ( + const OMX_U8 **ppBitStream, + OMX_S32 *pOffset, + OMX_U8 *pNumCoeff, + OMX_U8 **ppPosCoefbuf, + OMX_INT sVLCSelect, + OMX_INT sMaxNumCoeff +); + + + +/** + * Function: omxVCM4P10_TransformDequantLumaDCFromPair (6.3.4.2.1) + * + * Description: + * Reconstructs the 4x4 LumaDC block from the coefficient-position pair + * buffer, performs integer inverse, and dequantization for 4x4 LumaDC + * coefficients, and updates the pair buffer pointer to the next non-empty + * block. + * + * Input Arguments: + * + * ppSrc - Double pointer to residual coefficient-position pair buffer + * output by CALVC decoding + * QP - Quantization parameter QpY + * + * Output Arguments: + * + * ppSrc - *ppSrc is updated to the start of next non empty block + * pDst - Pointer to the reconstructed 4x4 LumaDC coefficients buffer; must + * be aligned on a 8-byte boundary. + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - ppSrc or pDst is NULL. + * - pDst is not 8 byte aligned. + * - QP is not in the range of [0-51]. + * + */ +OMXResult omxVCM4P10_TransformDequantLumaDCFromPair ( + const OMX_U8 **ppSrc, + OMX_S16 *pDst, + OMX_INT QP +); + + + +/** + * Function: omxVCM4P10_TransformDequantChromaDCFromPair (6.3.4.2.2) + * + * Description: + * Reconstruct the 2x2 ChromaDC block from coefficient-position pair buffer, + * perform integer inverse transformation, and dequantization for 2x2 chroma + * DC coefficients, and update the pair buffer pointer to next non-empty + * block. + * + * Input Arguments: + * + * ppSrc - Double pointer to residual coefficient-position pair buffer + * output by CALVC decoding + * QP - Quantization parameter QpC + * + * Output Arguments: + * + * ppSrc - *ppSrc is updated to the start of next non empty block + * pDst - Pointer to the reconstructed 2x2 ChromaDC coefficients buffer; + * must be aligned on a 4-byte boundary. + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - ppSrc or pDst is NULL. + * - pDst is not 4-byte aligned. + * - QP is not in the range of [0-51]. + * + */ +OMXResult omxVCM4P10_TransformDequantChromaDCFromPair ( + const OMX_U8 **ppSrc, + OMX_S16 *pDst, + OMX_INT QP +); + + + +/** + * Function: omxVCM4P10_DequantTransformResidualFromPairAndAdd (6.3.4.2.3) + * + * Description: + * Reconstruct the 4x4 residual block from coefficient-position pair buffer, + * perform dequantization and integer inverse transformation for 4x4 block of + * residuals with previous intra prediction or motion compensation data, and + * update the pair buffer pointer to next non-empty block. If pDC == NULL, + * there re 16 non-zero AC coefficients at most in the packed buffer starting + * from 4x4 block position 0; If pDC != NULL, there re 15 non-zero AC + * coefficients at most in the packet buffer starting from 4x4 block position + * 1. + * + * Input Arguments: + * + * ppSrc - Double pointer to residual coefficient-position pair buffer + * output by CALVC decoding + * pPred - Pointer to the predicted 4x4 block; must be aligned on a 4-byte + * boundary + * predStep - Predicted frame step size in bytes; must be a multiple of 4 + * dstStep - Destination frame step in bytes; must be a multiple of 4 + * pDC - Pointer to the DC coefficient of this block, NULL if it doesn't + * exist + * QP - QP Quantization parameter. It should be QpC in chroma 4x4 block + * decoding, otherwise it should be QpY. + * AC - Flag indicating if at least one non-zero AC coefficient exists + * + * Output Arguments: + * + * pDst - pointer to the reconstructed 4x4 block data; must be aligned on a + * 4-byte boundary + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - pPred or pDst is NULL. + * - pPred or pDst is not 4-byte aligned. + * - predStep or dstStep is not a multiple of 4. + * - AC !=0 and Qp is not in the range of [0-51] or ppSrc == NULL. + * - AC ==0 && pDC ==NULL. + * + */ +OMXResult omxVCM4P10_DequantTransformResidualFromPairAndAdd ( + const OMX_U8 **ppSrc, + const OMX_U8 *pPred, + const OMX_S16 *pDC, + OMX_U8 *pDst, + OMX_INT predStep, + OMX_INT dstStep, + OMX_INT QP, + OMX_INT AC +); + + + +/** + * Function: omxVCM4P10_MEGetBufSize (6.3.5.1.1) + * + * Description: + * Computes the size, in bytes, of the vendor-specific specification + * structure for the omxVCM4P10 motion estimation functions BlockMatch_Integer + * and MotionEstimationMB. + * + * Input Arguments: + * + * MEmode - motion estimation mode; available modes are defined by the + * enumerated type OMXVCM4P10MEMode + * pMEParams -motion estimation parameters + * + * Output Arguments: + * + * pSize - pointer to the number of bytes required for the motion + * estimation specification structure + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - pMEParams or pSize is NULL. + * - an invalid MEMode is specified. + * + */ +OMXResult omxVCM4P10_MEGetBufSize ( + OMXVCM4P10MEMode MEmode, + const OMXVCM4P10MEParams *pMEParams, + OMX_U32 *pSize +); + + + +/** + * Function: omxVCM4P10_MEInit (6.3.5.1.2) + * + * Description: + * Initializes the vendor-specific specification structure required for the + * omxVCM4P10 motion estimation functions: BlockMatch_Integer and + * MotionEstimationMB. Memory for the specification structure *pMESpec must be + * allocated prior to calling the function, and should be aligned on a 4-byte + * boundary. The number of bytes required for the specification structure can + * be determined using the function omxVCM4P10_MEGetBufSize. Following + * initialization by this function, the vendor-specific structure *pMESpec + * should contain an implementation-specific representation of all motion + * estimation parameters received via the structure pMEParams, for example + * searchRange16x16, searchRange8x8, etc. + * + * Input Arguments: + * + * MEmode - motion estimation mode; available modes are defined by the + * enumerated type OMXVCM4P10MEMode + * pMEParams - motion estimation parameters + * pMESpec - pointer to the uninitialized ME specification structure + * + * Output Arguments: + * + * pMESpec - pointer to the initialized ME specification structure + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - pMEParams or pSize is NULL. + * - an invalid value was specified for the parameter MEmode + * - a negative or zero value was specified for one of the search ranges + * (e.g., pMBParams >searchRange8x8, pMEParams->searchRange16x16, etc.) + * - either in isolation or in combination, one or more of the enables or + * search ranges in the structure *pMEParams were configured such + * that the requested behavior fails to comply with [ISO14496-10]. + * + */ +OMXResult omxVCM4P10_MEInit ( + OMXVCM4P10MEMode MEmode, + const OMXVCM4P10MEParams *pMEParams, + void *pMESpec +); + + + +/** + * Function: omxVCM4P10_BlockMatch_Integer (6.3.5.2.1) + * + * Description: + * Performs integer block match. Returns best MV and associated cost. + * + * Input Arguments: + * + * pSrcOrgY - Pointer to the top-left corner of the current block: + * If iBlockWidth==4, 4-byte alignment required. + * If iBlockWidth==8, 8-byte alignment required. + * If iBlockWidth==16, 16-byte alignment required. + * pSrcRefY - Pointer to the top-left corner of the co-located block in the + * reference picture: + * If iBlockWidth==4, 4-byte alignment required. + * If iBlockWidth==8, 8-byte alignment required. + * If iBlockWidth==16, 16-byte alignment required. + * nSrcOrgStep - Stride of the original picture plane, expressed in terms + * of integer pixels; must be a multiple of iBlockWidth. + * nSrcRefStep - Stride of the reference picture plane, expressed in terms + * of integer pixels + * pRefRect - pointer to the valid reference rectangle inside the reference + * picture plane + * nCurrPointPos - position of the current block in the current plane + * iBlockWidth - Width of the current block, expressed in terms of integer + * pixels; must be equal to either 4, 8, or 16. + * iBlockHeight - Height of the current block, expressed in terms of + * integer pixels; must be equal to either 4, 8, or 16. + * nLamda - Lamda factor; used to compute motion cost + * pMVPred - Predicted MV; used to compute motion cost, expressed in terms + * of 1/4-pel units + * pMVCandidate - Candidate MV; used to initialize the motion search, + * expressed in terms of integer pixels + * pMESpec - pointer to the ME specification structure + * + * Output Arguments: + * + * pDstBestMV - Best MV resulting from integer search, expressed in terms + * of 1/4-pel units + * pBestCost - Motion cost associated with the best MV; computed as + * SAD+Lamda*BitsUsedByMV + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - any of the following pointers are NULL: + * pSrcOrgY, pSrcRefY, pRefRect, pMVPred, pMVCandidate, or pMESpec. + * - Either iBlockWidth or iBlockHeight are values other than 4, 8, or 16. + * - Any alignment restrictions are violated + * + */ +OMXResult omxVCM4P10_BlockMatch_Integer ( + const OMX_U8 *pSrcOrgY, + OMX_S32 nSrcOrgStep, + const OMX_U8 *pSrcRefY, + OMX_S32 nSrcRefStep, + const OMXRect *pRefRect, + const OMXVCM4P2Coordinate *pCurrPointPos, + OMX_U8 iBlockWidth, + OMX_U8 iBlockHeight, + OMX_U32 nLamda, + const OMXVCMotionVector *pMVPred, + const OMXVCMotionVector *pMVCandidate, + OMXVCMotionVector *pBestMV, + OMX_S32 *pBestCost, + void *pMESpec +); + + + +/** + * Function: omxVCM4P10_BlockMatch_Half (6.3.5.2.2) + * + * Description: + * Performs a half-pel block match using results from a prior integer search. + * Returns the best MV and associated cost. This function estimates the + * half-pixel motion vector by interpolating the integer resolution motion + * vector referenced by the input parameter pSrcDstBestMV, i.e., the initial + * integer MV is generated externally. The function + * omxVCM4P10_BlockMatch_Integer may be used for integer motion estimation. + * + * Input Arguments: + * + * pSrcOrgY - Pointer to the current position in original picture plane: + * If iBlockWidth==4, 4-byte alignment required. + * If iBlockWidth==8, 8-byte alignment required. + * If iBlockWidth==16, 16-byte alignment required. + * pSrcRefY - Pointer to the top-left corner of the co-located block in the + * reference picture: + * If iBlockWidth==4, 4-byte alignment required. + * If iBlockWidth==8, 8-byte alignment required. + * If iBlockWidth==16, 16-byte alignment required. + * nSrcOrgStep - Stride of the original picture plane in terms of full + * pixels; must be a multiple of iBlockWidth. + * nSrcRefStep - Stride of the reference picture plane in terms of full + * pixels + * iBlockWidth - Width of the current block in terms of full pixels; must + * be equal to either 4, 8, or 16. + * iBlockHeight - Height of the current block in terms of full pixels; must + * be equal to either 4, 8, or 16. + * nLamda - Lamda factor, used to compute motion cost + * pMVPred - Predicted MV, represented in terms of 1/4-pel units; used to + * compute motion cost + * pSrcDstBestMV - The best MV resulting from a prior integer search, + * represented in terms of 1/4-pel units + * + * Output Arguments: + * + * pSrcDstBestMV - Best MV resulting from the half-pel search, expressed in + * terms of 1/4-pel units + * pBestCost - Motion cost associated with the best MV; computed as + * SAD+Lamda*BitsUsedByMV + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - any of the following pointers is NULL: pSrcOrgY, pSrcRefY, + * pSrcDstBestMV, pMVPred, pBestCost + * - iBlockWidth or iBlockHeight are equal to values other than 4, 8, or 16. + * - Any alignment restrictions are violated + * + */ +OMXResult omxVCM4P10_BlockMatch_Half ( + const OMX_U8 *pSrcOrgY, + OMX_S32 nSrcOrgStep, + const OMX_U8 *pSrcRefY, + OMX_S32 nSrcRefStep, + OMX_U8 iBlockWidth, + OMX_U8 iBlockHeight, + OMX_U32 nLamda, + const OMXVCMotionVector *pMVPred, + OMXVCMotionVector *pSrcDstBestMV, + OMX_S32 *pBestCost +); + + + +/** + * Function: omxVCM4P10_BlockMatch_Quarter (6.3.5.2.3) + * + * Description: + * Performs a quarter-pel block match using results from a prior half-pel + * search. Returns the best MV and associated cost. This function estimates + * the quarter-pixel motion vector by interpolating the half-pel resolution + * motion vector referenced by the input parameter pSrcDstBestMV, i.e., the + * initial half-pel MV is generated externally. The function + * omxVCM4P10_BlockMatch_Half may be used for half-pel motion estimation. + * + * Input Arguments: + * + * pSrcOrgY - Pointer to the current position in original picture plane: + * If iBlockWidth==4, 4-byte alignment required. + * If iBlockWidth==8, 8-byte alignment required. + * If iBlockWidth==16, 16-byte alignment required. + * pSrcRefY - Pointer to the top-left corner of the co-located block in the + * reference picture: + * If iBlockWidth==4, 4-byte alignment required. + * If iBlockWidth==8, 8-byte alignment required. + * If iBlockWidth==16, 16-byte alignment required. + * nSrcOrgStep - Stride of the original picture plane in terms of full + * pixels; must be a multiple of iBlockWidth. + * nSrcRefStep - Stride of the reference picture plane in terms of full + * pixels + * iBlockWidth - Width of the current block in terms of full pixels; must + * be equal to either 4, 8, or 16. + * iBlockHeight - Height of the current block in terms of full pixels; must + * be equal to either 4, 8, or 16. + * nLamda - Lamda factor, used to compute motion cost + * pMVPred - Predicted MV, represented in terms of 1/4-pel units; used to + * compute motion cost + * pSrcDstBestMV - The best MV resulting from a prior half-pel search, + * represented in terms of 1/4 pel units + * + * Output Arguments: + * + * pSrcDstBestMV - Best MV resulting from the quarter-pel search, expressed + * in terms of 1/4-pel units + * pBestCost - Motion cost associated with the best MV; computed as + * SAD+Lamda*BitsUsedByMV + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - One or more of the following pointers is NULL: + * pSrcOrgY, pSrcRefY, pSrcDstBestMV, pMVPred, pBestCost + * - iBlockWidth or iBlockHeight are equal to values other than 4, 8, or 16. + * - Any alignment restrictions are violated + * + */ +OMXResult omxVCM4P10_BlockMatch_Quarter ( + const OMX_U8 *pSrcOrgY, + OMX_S32 nSrcOrgStep, + const OMX_U8 *pSrcRefY, + OMX_S32 nSrcRefStep, + OMX_U8 iBlockWidth, + OMX_U8 iBlockHeight, + OMX_U32 nLamda, + const OMXVCMotionVector *pMVPred, + OMXVCMotionVector *pSrcDstBestMV, + OMX_S32 *pBestCost +); + + + +/** + * Function: omxVCM4P10_MotionEstimationMB (6.3.5.3.1) + * + * Description: + * Performs MB-level motion estimation and selects best motion estimation + * strategy from the set of modes supported in baseline profile [ISO14496-10]. + * + * Input Arguments: + * + * pSrcCurrBuf - Pointer to the current position in original picture plane; + * 16-byte alignment required + * pSrcRefBufList - Pointer to an array with 16 entries. Each entry points + * to the top-left corner of the co-located MB in a reference + * picture. The array is filled from low-to-high with valid + * reference frame pointers; the unused high entries should be set + * to NULL. Ordering of the reference frames should follow + * [ISO14496-10] subclause 8.2.4 Decoding Process for Reference + * Picture Lists. The entries must be 16-byte aligned. + * pSrcRecBuf - Pointer to the top-left corner of the co-located MB in the + * reconstructed picture; must be 16-byte aligned. + * SrcCurrStep - Width of the original picture plane in terms of full + * pixels; must be a multiple of 16. + * SrcRefStep - Width of the reference picture plane in terms of full + * pixels; must be a multiple of 16. + * SrcRecStep - Width of the reconstructed picture plane in terms of full + * pixels; must be a multiple of 16. + * pRefRect - Pointer to the valid reference rectangle; relative to the + * image origin. + * pCurrPointPos - Position of the current macroblock in the current plane. + * Lambda - Lagrange factor for computing the cost function + * pMESpec - Pointer to the motion estimation specification structure; must + * have been allocated and initialized prior to calling this + * function. + * pMBInter - Array, of dimension four, containing pointers to information + * associated with four adjacent type INTER MBs (Left, Top, + * Top-Left, Top-Right). Any pointer in the array may be set equal + * to NULL if the corresponding MB doesn t exist or is not of type + * INTER. + * - pMBInter[0] - Pointer to left MB information + * - pMBInter[1] - Pointer to top MB information + * - pMBInter[2] - Pointer to top-left MB information + * - pMBInter[3] - Pointer to top-right MB information + * pMBIntra - Array, of dimension four, containing pointers to information + * associated with four adjacent type INTRA MBs (Left, Top, + * Top-Left, Top-Right). Any pointer in the array may be set equal + * to NULL if the corresponding MB doesn t exist or is not of type + * INTRA. + * - pMBIntra[0] - Pointer to left MB information + * - pMBIntra[1] - Pointer to top MB information + * - pMBIntra[2] - Pointer to top-left MB information + * - pMBIntra[3] - Pointer to top-right MB information + * pSrcDstMBCurr - Pointer to information structure for the current MB. + * The following entries should be set prior to calling the + * function: sliceID - the number of the slice the to which the + * current MB belongs. + * + * Output Arguments: + * + * pDstCost - Pointer to the minimum motion cost for the current MB. + * pDstBlockSAD - Pointer to the array of SADs for each of the sixteen luma + * 4x4 blocks in each MB. The block SADs are in scan order for + * each MB. For implementations that cannot compute the SAD values + * individually, the maximum possible value (0xffff) is returned + * for each of the 16 block SAD entries. + * pSrcDstMBCurr - Pointer to updated information structure for the current + * MB after MB-level motion estimation has been completed. The + * following fields are updated by the ME function. The following + * parameter set quantifies the MB-level ME search results: + * - MbType + * - subMBType[4] + * - pMV0[4][4] + * - pMVPred[4][4] + * - pRefL0Idx[4] + * - Intra16x16PredMode + * - pIntra4x4PredMode[4][4] + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - One or more of the following pointers is NULL: pSrcCurrBuf, + * pSrcRefBufList, pSrcRecBuf, pRefRect, pCurrPointPos, pMESpec, + * pMBInter, pMBIntra,pSrcDstMBCurr, pDstCost, pSrcRefBufList[0] + * - SrcRefStep, SrcRecStep are not multiples of 16 + * - iBlockWidth or iBlockHeight are values other than 4, 8, or 16. + * - Any alignment restrictions are violated + * + */ +OMXResult omxVCM4P10_MotionEstimationMB ( + const OMX_U8 *pSrcCurrBuf, + OMX_S32 SrcCurrStep, + const OMX_U8 *pSrcRefBufList[15], + OMX_S32 SrcRefStep, + const OMX_U8 *pSrcRecBuf, + OMX_S32 SrcRecStep, + const OMXRect *pRefRect, + const OMXVCM4P2Coordinate *pCurrPointPos, + OMX_U32 Lambda, + void *pMESpec, + const OMXVCM4P10MBInfoPtr *pMBInter, + const OMXVCM4P10MBInfoPtr *pMBIntra, + OMXVCM4P10MBInfoPtr pSrcDstMBCurr, + OMX_INT *pDstCost, + OMX_U16 *pDstBlockSAD +); + + + +/** + * Function: omxVCM4P10_SAD_4x (6.3.5.4.1) + * + * Description: + * This function calculates the SAD for 4x8 and 4x4 blocks. + * + * Input Arguments: + * + * pSrcOrg -Pointer to the original block; must be aligned on a 4-byte + * boundary. + * iStepOrg -Step of the original block buffer; must be a multiple of 4. + * pSrcRef -Pointer to the reference block + * iStepRef -Step of the reference block buffer + * iHeight -Height of the block; must be equal to either 4 or 8. + * + * Output Arguments: + * + * pDstSAD -Pointer of result SAD + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - One or more of the following pointers is NULL: + * pSrcOrg, pSrcRef, or pDstSAD + * - iHeight is not equal to either 4 or 8. + * - iStepOrg is not a multiple of 4 + * - Any alignment restrictions are violated + * + */ +OMXResult omxVCM4P10_SAD_4x ( + const OMX_U8 *pSrcOrg, + OMX_U32 iStepOrg, + const OMX_U8 *pSrcRef, + OMX_U32 iStepRef, + OMX_S32 *pDstSAD, + OMX_U32 iHeight +); + + + +/** + * Function: omxVCM4P10_SADQuar_4x (6.3.5.4.2) + * + * Description: + * This function calculates the SAD between one block (pSrc) and the average + * of the other two (pSrcRef0 and pSrcRef1) for 4x8 or 4x4 blocks. Rounding + * is applied according to the convention (a+b+1)>>1. + * + * Input Arguments: + * + * pSrc - Pointer to the original block; must be aligned on a 4-byte + * boundary. + * pSrcRef0 - Pointer to reference block 0 + * pSrcRef1 - Pointer to reference block 1 + * iSrcStep - Step of the original block buffer; must be a multiple of 4. + * iRefStep0 - Step of reference block 0 + * iRefStep1 - Step of reference block 1 + * iHeight - Height of the block; must be equal to either 4 or 8. + * + * Output Arguments: + * + * pDstSAD - Pointer of result SAD + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - iHeight is not equal to either 4 or 8. + * - One or more of the following pointers is NULL: pSrc, pSrcRef0, + * pSrcRef1, pDstSAD. + * - iSrcStep is not a multiple of 4 + * - Any alignment restrictions are violated + * + */ +OMXResult omxVCM4P10_SADQuar_4x ( + const OMX_U8 *pSrc, + const OMX_U8 *pSrcRef0, + const OMX_U8 *pSrcRef1, + OMX_U32 iSrcStep, + OMX_U32 iRefStep0, + OMX_U32 iRefStep1, + OMX_U32 *pDstSAD, + OMX_U32 iHeight +); + + + +/** + * Function: omxVCM4P10_SADQuar_8x (6.3.5.4.3) + * + * Description: + * This function calculates the SAD between one block (pSrc) and the average + * of the other two (pSrcRef0 and pSrcRef1) for 8x16, 8x8, or 8x4 blocks. + * Rounding is applied according to the convention (a+b+1)>>1. + * + * Input Arguments: + * + * pSrc - Pointer to the original block; must be aligned on an 8-byte + * boundary. + * pSrcRef0 - Pointer to reference block 0 + * pSrcRef1 - Pointer to reference block 1 + * iSrcStep - Step of the original block buffer; must be a multiple of 8. + * iRefStep0 - Step of reference block 0 + * iRefStep1 - Step of reference block 1 + * iHeight - Height of the block; must be equal either 4, 8, or 16. + * + * Output Arguments: + * + * pDstSAD - Pointer of result SAD + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - iHeight is not equal to either 4, 8, or 16. + * - One or more of the following pointers is NULL: pSrc, pSrcRef0, + * pSrcRef1, pDstSAD. + * - iSrcStep is not a multiple of 8 + * - Any alignment restrictions are violated + * + */ +OMXResult omxVCM4P10_SADQuar_8x ( + const OMX_U8 *pSrc, + const OMX_U8 *pSrcRef0, + const OMX_U8 *pSrcRef1, + OMX_U32 iSrcStep, + OMX_U32 iRefStep0, + OMX_U32 iRefStep1, + OMX_U32 *pDstSAD, + OMX_U32 iHeight +); + + + +/** + * Function: omxVCM4P10_SADQuar_16x (6.3.5.4.4) + * + * Description: + * This function calculates the SAD between one block (pSrc) and the average + * of the other two (pSrcRef0 and pSrcRef1) for 16x16 or 16x8 blocks. + * Rounding is applied according to the convention (a+b+1)>>1. + * + * Input Arguments: + * + * pSrc - Pointer to the original block; must be aligned on a 16-byte + * boundary. + * pSrcRef0 - Pointer to reference block 0 + * pSrcRef1 - Pointer to reference block 1 + * iSrcStep - Step of the original block buffer; must be a multiple of 16 + * iRefStep0 - Step of reference block 0 + * iRefStep1 - Step of reference block 1 + * iHeight - Height of the block; must be equal to either 8 or 16 + * + * Output Arguments: + * + * pDstSAD -Pointer of result SAD + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - iHeight is not equal to either 8 or 16. + * - One or more of the following pointers is NULL: pSrc, pSrcRef0, + * pSrcRef1, pDstSAD. + * - iSrcStep is not a multiple of 16 + * - Any alignment restrictions are violated + * + */ +OMXResult omxVCM4P10_SADQuar_16x ( + const OMX_U8 *pSrc, + const OMX_U8 *pSrcRef0, + const OMX_U8 *pSrcRef1, + OMX_U32 iSrcStep, + OMX_U32 iRefStep0, + OMX_U32 iRefStep1, + OMX_U32 *pDstSAD, + OMX_U32 iHeight +); + + + +/** + * Function: omxVCM4P10_SATD_4x4 (6.3.5.4.5) + * + * Description: + * This function calculates the sum of absolute transform differences (SATD) + * for a 4x4 block by applying a Hadamard transform to the difference block + * and then calculating the sum of absolute coefficient values. + * + * Input Arguments: + * + * pSrcOrg - Pointer to the original block; must be aligned on a 4-byte + * boundary + * iStepOrg - Step of the original block buffer; must be a multiple of 4 + * pSrcRef - Pointer to the reference block; must be aligned on a 4-byte + * boundary + * iStepRef - Step of the reference block buffer; must be a multiple of 4 + * + * Output Arguments: + * + * pDstSAD - pointer to the resulting SAD + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: + * pSrcOrg, pSrcRef, or pDstSAD either pSrcOrg + * - pSrcRef is not aligned on a 4-byte boundary + * - iStepOrg <= 0 or iStepOrg is not a multiple of 4 + * - iStepRef <= 0 or iStepRef is not a multiple of 4 + * + */ +OMXResult omxVCM4P10_SATD_4x4 ( + const OMX_U8 *pSrcOrg, + OMX_U32 iStepOrg, + const OMX_U8 *pSrcRef, + OMX_U32 iStepRef, + OMX_U32 *pDstSAD +); + + + +/** + * Function: omxVCM4P10_InterpolateHalfHor_Luma (6.3.5.5.1) + * + * Description: + * This function performs interpolation for two horizontal 1/2-pel positions + * (-1/2,0) and (1/2, 0) - around a full-pel position. + * + * Input Arguments: + * + * pSrc - Pointer to the top-left corner of the block used to interpolate in + * the reconstruction frame plane. + * iSrcStep - Step of the source buffer. + * iDstStep - Step of the destination(interpolation) buffer; must be a + * multiple of iWidth. + * iWidth - Width of the current block; must be equal to either 4, 8, or 16 + * iHeight - Height of the current block; must be equal to 4, 8, or 16 + * + * Output Arguments: + * + * pDstLeft -Pointer to the interpolation buffer of the left -pel position + * (-1/2, 0) + * If iWidth==4, 4-byte alignment required. + * If iWidth==8, 8-byte alignment required. + * If iWidth==16, 16-byte alignment required. + * pDstRight -Pointer to the interpolation buffer of the right -pel + * position (1/2, 0) + * If iWidth==4, 4-byte alignment required. + * If iWidth==8, 8-byte alignment required. + * If iWidth==16, 16-byte alignment required. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: + * pSrc, pDstLeft, or pDstRight + * - iWidth or iHeight have values other than 4, 8, or 16 + * - iWidth==4 but pDstLeft and/or pDstRight is/are not aligned on a 4-byte boundary + * - iWidth==8 but pDstLeft and/or pDstRight is/are not aligned on a 8-byte boundary + * - iWidth==16 but pDstLeft and/or pDstRight is/are not aligned on a 16-byte boundary + * - any alignment restrictions are violated + * + */ +OMXResult omxVCM4P10_InterpolateHalfHor_Luma ( + const OMX_U8 *pSrc, + OMX_U32 iSrcStep, + OMX_U8 *pDstLeft, + OMX_U8 *pDstRight, + OMX_U32 iDstStep, + OMX_U32 iWidth, + OMX_U32 iHeight +); + + + +/** + * Function: omxVCM4P10_InterpolateHalfVer_Luma (6.3.5.5.2) + * + * Description: + * This function performs interpolation for two vertical 1/2-pel positions - + * (0, -1/2) and (0, 1/2) - around a full-pel position. + * + * Input Arguments: + * + * pSrc - Pointer to top-left corner of block used to interpolate in the + * reconstructed frame plane + * iSrcStep - Step of the source buffer. + * iDstStep - Step of the destination (interpolation) buffer; must be a + * multiple of iWidth. + * iWidth - Width of the current block; must be equal to either 4, 8, or 16 + * iHeight - Height of the current block; must be equal to either 4, 8, or 16 + * + * Output Arguments: + * + * pDstUp -Pointer to the interpolation buffer of the -pel position above + * the current full-pel position (0, -1/2) + * If iWidth==4, 4-byte alignment required. + * If iWidth==8, 8-byte alignment required. + * If iWidth==16, 16-byte alignment required. + * pDstDown -Pointer to the interpolation buffer of the -pel position below + * the current full-pel position (0, 1/2) + * If iWidth==4, 4-byte alignment required. + * If iWidth==8, 8-byte alignment required. + * If iWidth==16, 16-byte alignment required. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: + * pSrc, pDstUp, or pDstDown + * - iWidth or iHeight have values other than 4, 8, or 16 + * - iWidth==4 but pDstUp and/or pDstDown is/are not aligned on a 4-byte boundary + * - iWidth==8 but pDstUp and/or pDstDown is/are not aligned on a 8-byte boundary + * - iWidth==16 but pDstUp and/or pDstDown is/are not aligned on a 16-byte boundary + * + */ +OMXResult omxVCM4P10_InterpolateHalfVer_Luma ( + const OMX_U8 *pSrc, + OMX_U32 iSrcStep, + OMX_U8 *pDstUp, + OMX_U8 *pDstDown, + OMX_U32 iDstStep, + OMX_U32 iWidth, + OMX_U32 iHeight +); + + + +/** + * Function: omxVCM4P10_Average_4x (6.3.5.5.3) + * + * Description: + * This function calculates the average of two 4x4, 4x8 blocks. The result + * is rounded according to (a+b+1)/2. + * + * Input Arguments: + * + * pPred0 - Pointer to the top-left corner of reference block 0 + * pPred1 - Pointer to the top-left corner of reference block 1 + * iPredStep0 - Step of reference block 0; must be a multiple of 4. + * iPredStep1 - Step of reference block 1; must be a multiple of 4. + * iDstStep - Step of the destination buffer; must be a multiple of 4. + * iHeight - Height of the blocks; must be either 4 or 8. + * + * Output Arguments: + * + * pDstPred - Pointer to the destination buffer. 4-byte alignment required. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: + * pPred0, pPred1, or pDstPred + * - pDstPred is not aligned on a 4-byte boundary + * - iPredStep0 <= 0 or iPredStep0 is not a multiple of 4 + * - iPredStep1 <= 0 or iPredStep1 is not a multiple of 4 + * - iDstStep <= 0 or iDstStep is not a multiple of 4 + * - iHeight is not equal to either 4 or 8 + * + */ +OMXResult omxVCM4P10_Average_4x ( + const OMX_U8 *pPred0, + const OMX_U8 *pPred1, + OMX_U32 iPredStep0, + OMX_U32 iPredStep1, + OMX_U8 *pDstPred, + OMX_U32 iDstStep, + OMX_U32 iHeight +); + + + +/** + * Function: omxVCM4P10_TransformQuant_ChromaDC (6.3.5.6.1) + * + * Description: + * This function performs 2x2 Hadamard transform of chroma DC coefficients + * and then quantizes the coefficients. + * + * Input Arguments: + * + * pSrcDst - Pointer to the 2x2 array of chroma DC coefficients. 8-byte + * alignment required. + * iQP - Quantization parameter; must be in the range [0,51]. + * bIntra - Indicate whether this is an INTRA block. 1-INTRA, 0-INTER + * + * Output Arguments: + * + * pSrcDst - Pointer to transformed and quantized coefficients. 8-byte + * alignment required. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: + * pSrcDst + * - pSrcDst is not aligned on an 8-byte boundary + * + */ +OMXResult omxVCM4P10_TransformQuant_ChromaDC ( + OMX_S16 *pSrcDst, + OMX_U32 iQP, + OMX_U8 bIntra +); + + + +/** + * Function: omxVCM4P10_TransformQuant_LumaDC (6.3.5.6.2) + * + * Description: + * This function performs a 4x4 Hadamard transform of luma DC coefficients + * and then quantizes the coefficients. + * + * Input Arguments: + * + * pSrcDst - Pointer to the 4x4 array of luma DC coefficients. 16-byte + * alignment required. + * iQP - Quantization parameter; must be in the range [0,51]. + * + * Output Arguments: + * + * pSrcDst - Pointer to transformed and quantized coefficients. 16-byte + * alignment required. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: pSrcDst + * - pSrcDst is not aligned on an 16-byte boundary + * + */ +OMXResult omxVCM4P10_TransformQuant_LumaDC ( + OMX_S16 *pSrcDst, + OMX_U32 iQP +); + + + +/** + * Function: omxVCM4P10_InvTransformDequant_LumaDC (6.3.5.6.3) + * + * Description: + * This function performs inverse 4x4 Hadamard transform and then dequantizes + * the coefficients. + * + * Input Arguments: + * + * pSrc - Pointer to the 4x4 array of the 4x4 Hadamard-transformed and + * quantized coefficients. 16 byte alignment required. + * iQP - Quantization parameter; must be in the range [0,51]. + * + * Output Arguments: + * + * pDst - Pointer to inverse-transformed and dequantized coefficients. + * 16-byte alignment required. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: pSrc + * - pSrc or pDst is not aligned on a 16-byte boundary + * + */ +OMXResult omxVCM4P10_InvTransformDequant_LumaDC ( + const OMX_S16 *pSrc, + OMX_S16 *pDst, + OMX_U32 iQP +); + + + +/** + * Function: omxVCM4P10_InvTransformDequant_ChromaDC (6.3.5.6.4) + * + * Description: + * This function performs inverse 2x2 Hadamard transform and then dequantizes + * the coefficients. + * + * Input Arguments: + * + * pSrc - Pointer to the 2x2 array of the 2x2 Hadamard-transformed and + * quantized coefficients. 8 byte alignment required. + * iQP - Quantization parameter; must be in the range [0,51]. + * + * Output Arguments: + * + * pDst - Pointer to inverse-transformed and dequantized coefficients. + * 8-byte alignment required. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: pSrc + * - pSrc or pDst is not aligned on an 8-byte boundary + * + */ +OMXResult omxVCM4P10_InvTransformDequant_ChromaDC ( + const OMX_S16 *pSrc, + OMX_S16 *pDst, + OMX_U32 iQP +); + + + +/** + * Function: omxVCM4P10_InvTransformResidualAndAdd (6.3.5.7.1) + * + * Description: + * This function performs inverse an 4x4 integer transformation to produce + * the difference signal and then adds the difference to the prediction to get + * the reconstructed signal. + * + * Input Arguments: + * + * pSrcPred - Pointer to prediction signal. 4-byte alignment required. + * pDequantCoeff - Pointer to the transformed coefficients. 8-byte + * alignment required. + * iSrcPredStep - Step of the prediction buffer; must be a multiple of 4. + * iDstReconStep - Step of the destination reconstruction buffer; must be a + * multiple of 4. + * bAC - Indicate whether there is AC coefficients in the coefficients + * matrix. + * + * Output Arguments: + * + * pDstRecon -Pointer to the destination reconstruction buffer. 4-byte + * alignment required. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: + * pSrcPred, pDequantCoeff, pDstRecon + * - pSrcPred is not aligned on a 4-byte boundary + * - iSrcPredStep or iDstReconStep is not a multiple of 4. + * - pDequantCoeff is not aligned on an 8-byte boundary + * + */ +OMXResult omxVCM4P10_InvTransformResidualAndAdd ( + const OMX_U8 *pSrcPred, + const OMX_S16 *pDequantCoeff, + OMX_U8 *pDstRecon, + OMX_U32 iSrcPredStep, + OMX_U32 iDstReconStep, + OMX_U8 bAC +); + + + +/** + * Function: omxVCM4P10_SubAndTransformQDQResidual (6.3.5.8.1) + * + * Description: + * This function subtracts the prediction signal from the original signal to + * produce the difference signal and then performs a 4x4 integer transform and + * quantization. The quantized transformed coefficients are stored as + * pDstQuantCoeff. This function can also output dequantized coefficients or + * unquantized DC coefficients optionally by setting the pointers + * pDstDeQuantCoeff, pDCCoeff. + * + * Input Arguments: + * + * pSrcOrg - Pointer to original signal. 4-byte alignment required. + * pSrcPred - Pointer to prediction signal. 4-byte alignment required. + * iSrcOrgStep - Step of the original signal buffer; must be a multiple of + * 4. + * iSrcPredStep - Step of the prediction signal buffer; must be a multiple + * of 4. + * pNumCoeff -Number of non-zero coefficients after quantization. If this + * parameter is not required, it is set to NULL. + * nThreshSAD - Zero-block early detection threshold. If this parameter is + * not required, it is set to 0. + * iQP - Quantization parameter; must be in the range [0,51]. + * bIntra - Indicates whether this is an INTRA block, either 1-INTRA or + * 0-INTER + * + * Output Arguments: + * + * pDstQuantCoeff - Pointer to the quantized transformed coefficients. + * 8-byte alignment required. + * pDstDeQuantCoeff - Pointer to the dequantized transformed coefficients + * if this parameter is not equal to NULL. 8-byte alignment + * required. + * pDCCoeff - Pointer to the unquantized DC coefficient if this parameter + * is not equal to NULL. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: + * pSrcOrg, pSrcPred, pNumCoeff, pDstQuantCoeff, + * pDstDeQuantCoeff, pDCCoeff + * - pSrcOrg is not aligned on a 4-byte boundary + * - pSrcPred is not aligned on a 4-byte boundary + * - iSrcOrgStep is not a multiple of 4 + * - iSrcPredStep is not a multiple of 4 + * - pDstQuantCoeff or pDstDeQuantCoeff is not aligned on an 8-byte boundary + * + */ +OMXResult omxVCM4P10_SubAndTransformQDQResidual ( + const OMX_U8 *pSrcOrg, + const OMX_U8 *pSrcPred, + OMX_U32 iSrcOrgStep, + OMX_U32 iSrcPredStep, + OMX_S16 *pDstQuantCoeff, + OMX_S16 *pDstDeQuantCoeff, + OMX_S16 *pDCCoeff, + OMX_S8 *pNumCoeff, + OMX_U32 nThreshSAD, + OMX_U32 iQP, + OMX_U8 bIntra +); + + + +/** + * Function: omxVCM4P10_GetVLCInfo (6.3.5.9.1) + * + * Description: + * This function extracts run-length encoding (RLE) information from the + * coefficient matrix. The results are returned in an OMXVCM4P10VLCInfo + * structure. + * + * Input Arguments: + * + * pSrcCoeff - pointer to the transform coefficient matrix. 8-byte + * alignment required. + * pScanMatrix - pointer to the scan order definition matrix. For a luma + * block the scan matrix should follow [ISO14496-10] section 8.5.4, + * and should contain the values 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, + * 10, 7, 11, 14, 15. For a chroma block, the scan matrix should + * contain the values 0, 1, 2, 3. + * bAC - indicates presence of a DC coefficient; 0 = DC coefficient + * present, 1= DC coefficient absent. + * MaxNumCoef - specifies the number of coefficients contained in the + * transform coefficient matrix, pSrcCoeff. The value should be 16 + * for blocks of type LUMADC, LUMAAC, LUMALEVEL, and CHROMAAC. The + * value should be 4 for blocks of type CHROMADC. + * + * Output Arguments: + * + * pDstVLCInfo - pointer to structure that stores information for + * run-length coding. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: + * pSrcCoeff, pScanMatrix, pDstVLCInfo + * - pSrcCoeff is not aligned on an 8-byte boundary + * + */ +OMXResult omxVCM4P10_GetVLCInfo ( + const OMX_S16 *pSrcCoeff, + const OMX_U8 *pScanMatrix, + OMX_U8 bAC, + OMX_U32 MaxNumCoef, + OMXVCM4P10VLCInfo*pDstVLCInfo +); + + + +#ifdef __cplusplus +} +#endif + +#endif /** end of #define _OMXVC_H_ */ + +/** EOF */ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/omxVC_s.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/omxVC_s.h new file mode 100644 index 0000000000000000000000000000000000000000..be974d52bd5fd66d468762e147fe6c45a450e8ba --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/omxVC_s.h @@ -0,0 +1,129 @@ +;/****************************************************************************** +;// Copyright (c) 1999-2005 The Khronos Group Inc. All Rights Reserved +;// +;// +;// +;// +;// +;// +;// +;// +;******************************************************************************/ + +;/** =============== Structure Definition for Sample Generation ============== */ +;/** transparent status */ + +;enum { +OMX_VIDEO_TRANSPARENT EQU 0; /** Wholly transparent */ +OMX_VIDEO_PARTIAL EQU 1; /** Partially transparent */ +OMX_VIDEO_OPAQUE EQU 2; /** Opaque */ +;} + +;/** direction */ +;enum { +OMX_VIDEO_NONE EQU 0; +OMX_VIDEO_HORIZONTAL EQU 1; +OMX_VIDEO_VERTICAL EQU 2; +;} + +;/** bilinear interpolation type */ +;enum { +OMX_VIDEO_INTEGER_PIXEL EQU 0; /** case ¡°a¡± */ +OMX_VIDEO_HALF_PIXEL_X EQU 1; /** case ¡°b¡± */ +OMX_VIDEO_HALF_PIXEL_Y EQU 2; /** case ¡°c¡± */ +OMX_VIDEO_HALF_PIXEL_XY EQU 3; /** case ¡°d¡± */ +;} + +;enum { +OMX_UPPER EQU 1; /** set if the above macroblock is available */ +OMX_LEFT EQU 2; /** set if the left macroblock is available */ +OMX_CENTER EQU 4; +OMX_RIGHT EQU 8; +OMX_LOWER EQU 16; +OMX_UPPER_LEFT EQU 32; /** set if the above-left macroblock is available */ +OMX_UPPER_RIGHT EQU 64; /** set if the above-right macroblock is available */ +OMX_LOWER_LEFT EQU 128; +OMX_LOWER_RIGHT EQU 256 +;} + +;enum { +OMX_VIDEO_LUMINANCE EQU 0; /** Luminance component */ +OMX_VIDEO_CHROMINANCE EQU 1; /** chrominance component */ +OMX_VIDEO_ALPHA EQU 2; /** Alpha component */ +;} + +;enum { +OMX_VIDEO_INTER EQU 0; /** P picture or P-VOP */ +OMX_VIDEO_INTER_Q EQU 1; /** P picture or P-VOP */ +OMX_VIDEO_INTER4V EQU 2; /** P picture or P-VOP */ +OMX_VIDEO_INTRA EQU 3; /** I and P picture; I- and P-VOP */ +OMX_VIDEO_INTRA_Q EQU 4; /** I and P picture; I- and P-VOP */ +OMX_VIDEO_INTER4V_Q EQU 5; /** P picture or P-VOP (H.263)*/ +OMX_VIDEO_DIRECT EQU 6; /** B picture or B-VOP (MPEG-4 only) */ +OMX_VIDEO_INTERPOLATE EQU 7; /** B picture or B-VOP */ +OMX_VIDEO_BACKWARD EQU 8; /** B picture or B-VOP */ +OMX_VIDEO_FORWARD EQU 9; /** B picture or B-VOP */ +OMX_VIDEO_NOTCODED EQU 10; /** B picture or B-VOP */ +;} + +;enum { +OMX_16X16_VERT EQU 0; /** Intra_16x16_Vertical (prediction mode) */ +OMX_16X16_HOR EQU 1; /** Intra_16x16_Horizontal (prediction mode) */ +OMX_16X16_DC EQU 2; /** Intra_16x16_DC (prediction mode) */ +OMX_16X16_PLANE EQU 3; /** Intra_16x16_Plane (prediction mode) */ +;} + +;enum { +OMX_4x4_VERT EQU 0; /** Intra_4x4_Vertical (prediction mode) */ +OMX_4x4_HOR EQU 1; /** Intra_4x4_Horizontal (prediction mode) */ +OMX_4x4_DC EQU 2; /** Intra_4x4_DC (prediction mode) */ +OMX_4x4_DIAG_DL EQU 3; /** Intra_4x4_Diagonal_Down_Left (prediction mode) */ +OMX_4x4_DIAG_DR EQU 4; /** Intra_4x4_Diagonal_Down_Right (prediction mode) */ +OMX_4x4_VR EQU 5; /** Intra_4x4_Vertical_Right (prediction mode) */ +OMX_4x4_HD EQU 6; /** Intra_4x4_Horizontal_Down (prediction mode) */ +OMX_4x4_VL EQU 7; /** Intra_4x4_Vertical_Left (prediction mode) */ +OMX_4x4_HU EQU 8; /** Intra_4x4_Horizontal_Up (prediction mode) */ +;} + +;enum { +OMX_CHROMA_DC EQU 0; /** Intra_Chroma_DC (prediction mode) */ +OMX_CHROMA_HOR EQU 1; /** Intra_Chroma_Horizontal (prediction mode) */ +OMX_CHROMA_VERT EQU 2; /** Intra_Chroma_Vertical (prediction mode) */ +OMX_CHROMA_PLANE EQU 3; /** Intra_Chroma_Plane (prediction mode) */ +;} + +;typedef struct { +x EQU 0; +y EQU 4; +;}OMXCoordinate; + +;typedef struct { +dx EQU 0; +dy EQU 2; +;}OMXMotionVector; + +;typedef struct { +xx EQU 0; +yy EQU 4; +width EQU 8; +height EQU 12; +;}OMXiRect; + +;typedef enum { +OMX_VC_INTER EQU 0; /** P picture or P-VOP */ +OMX_VC_INTER_Q EQU 1; /** P picture or P-VOP */ +OMX_VC_INTER4V EQU 2; /** P picture or P-VOP */ +OMX_VC_INTRA EQU 3; /** I and P picture, I- and P-VOP */ +OMX_VC_INTRA_Q EQU 4; /** I and P picture, I- and P-VOP */ +OMX_VC_INTER4V_Q EQU 5; /** P picture or P-VOP (H.263)*/ +;} OMXVCM4P2MacroblockType; + +;enum { +OMX_VC_NONE EQU 0 +OMX_VC_HORIZONTAL EQU 1 +OMX_VC_VERTICAL EQU 2 +;}; + + + END + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/comm/src/omxVCCOMM_Copy16x16_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/comm/src/omxVCCOMM_Copy16x16_s.s new file mode 100644 index 0000000000000000000000000000000000000000..2663a70a4b51c5ee4603757bf8fb2239db102522 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/comm/src/omxVCCOMM_Copy16x16_s.s @@ -0,0 +1,148 @@ + ;/** + ; * Function: omxVCCOMM_Copy16x16 + ; * + ; * Description: + ; * Copies the reference 16x16 block to the current block. + ; * Parameters: + ; * [in] pSrc - pointer to the reference block in the source frame; must be aligned on an 16-byte boundary. + ; * [in] step - distance between the starts of consecutive lines in the reference frame, in bytes; + ; * must be a multiple of 16 and must be larger than or equal to 16. + ; * [out] pDst - pointer to the destination block; must be aligned on an 8-byte boundary. + ; * Return Value: + ; * OMX_Sts_NoErr - no error + ; * OMX_Sts_BadArgErr - bad arguments; returned under any of the following conditions: + ; * - one or more of the following pointers is NULL: pSrc, pDst + ; * - one or more of the following pointers is not aligned on an 16-byte boundary: pSrc, pDst + ; * - step <16 or step is not a multiple of 16. + ; */ + + INCLUDE omxtypes_s.h + + + M_VARIANTS ARM1136JS + + + + + IF ARM1136JS + +;//Input Arguments +pSrc RN 0 +pDst RN 1 +step RN 2 + +;//Local Variables +Count RN 14 +X0 RN 2 +X1 RN 4 + +Return RN 0 + + M_START omxVCCOMM_Copy16x16,r5 + + + + SUB Count,step,#8 ;//Count=step-8 + LDRD X0,[pSrc],#8 ;//pSrc after loading pSrc=pSrc+8 + LDRD X1,[pSrc],Count ;//pSrc after loading pSrc=pSrc+step + + ;// loading 16 bytes and storing + STRD X0,[pDst],#8 + LDRD X0,[pSrc],#8 + STRD X1,[pDst],#8 + LDRD X1,[pSrc],Count + + ;// loading 16 bytes and storing + STRD X0,[pDst],#8 + LDRD X0,[pSrc],#8 + STRD X1,[pDst],#8 + LDRD X1,[pSrc],Count + + ;// loading 16 bytes and storing + STRD X0,[pDst],#8 + LDRD X0,[pSrc],#8 + STRD X1,[pDst],#8 + LDRD X1,[pSrc],Count + + ;// loading 16 bytes and storing + STRD X0,[pDst],#8 + LDRD X0,[pSrc],#8 + STRD X1,[pDst],#8 + LDRD X1,[pSrc],Count + + ;// loading 16 bytes and storing + STRD X0,[pDst],#8 + LDRD X0,[pSrc],#8 + STRD X1,[pDst],#8 + LDRD X1,[pSrc],Count + + ;// loading 16 bytes and storing + STRD X0,[pDst],#8 + LDRD X0,[pSrc],#8 + STRD X1,[pDst],#8 + LDRD X1,[pSrc],Count + + ;// loading 16 bytes and storing + STRD X0,[pDst],#8 + LDRD X0,[pSrc],#8 + STRD X1,[pDst],#8 + LDRD X1,[pSrc],Count + + ;// loading 16 bytes and storing + STRD X0,[pDst],#8 + LDRD X0,[pSrc],#8 + STRD X1,[pDst],#8 + LDRD X1,[pSrc],Count + + ;// loading 16 bytes and storing + STRD X0,[pDst],#8 + LDRD X0,[pSrc],#8 + STRD X1,[pDst],#8 + LDRD X1,[pSrc],Count + + ;// loading 16 bytes and storing + STRD X0,[pDst],#8 + LDRD X0,[pSrc],#8 + STRD X1,[pDst],#8 + LDRD X1,[pSrc],Count + + ;// loading 16 bytes and storing + STRD X0,[pDst],#8 + LDRD X0,[pSrc],#8 + STRD X1,[pDst],#8 + LDRD X1,[pSrc],Count + + ;// loading 16 bytes and storing + STRD X0,[pDst],#8 + LDRD X0,[pSrc],#8 + STRD X1,[pDst],#8 + LDRD X1,[pSrc],Count + + ;// loading 16 bytes and storing + STRD X0,[pDst],#8 + LDRD X0,[pSrc],#8 + STRD X1,[pDst],#8 + LDRD X1,[pSrc],Count + + ;// loading 16 bytes and storing + STRD X0,[pDst],#8 + LDRD X0,[pSrc],#8 + STRD X1,[pDst],#8 + LDRD X1,[pSrc],Count + + ;// loading 16 bytes and storing + STRD X0,[pDst],#8 + LDRD X0,[pSrc],#8 + STRD X1,[pDst],#8 + LDRD X1,[pSrc],Count + + STRD X0,[pDst],#8 + MOV Return,#OMX_Sts_NoErr + STRD X1,[pDst],#8 + + + M_END + ENDIF + + END + \ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/comm/src/omxVCCOMM_Copy8x8_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/comm/src/omxVCCOMM_Copy8x8_s.s new file mode 100644 index 0000000000000000000000000000000000000000..993873c858c496033172fd975763ce97d865347d --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/comm/src/omxVCCOMM_Copy8x8_s.s @@ -0,0 +1,72 @@ + ;/** + ; * Function: omxVCCOMM_Copy8x8 + ; * + ; * Description: + ; * Copies the reference 8x8 block to the current block. + ; * Parameters: + ; * [in] pSrc - pointer to the reference block in the source frame; must be aligned on an 8-byte boundary. + ; * [in] step - distance between the starts of consecutive lines in the reference frame, in bytes; + ; * must be a multiple of 8 and must be larger than or equal to 8. + ; * [out] pDst - pointer to the destination block; must be aligned on an 8-byte boundary. + ; * Return Value: + ; * OMX_Sts_NoErr - no error + ; * OMX_Sts_BadArgErr - bad arguments; returned under any of the following conditions: + ; * - one or more of the following pointers is NULL: pSrc, pDst + ; * - one or more of the following pointers is not aligned on an 8-byte boundary: pSrc, pDst + ; * - step <8 or step is not a multiple of 8. + ; */ + + INCLUDE omxtypes_s.h + + + M_VARIANTS ARM1136JS + + + + + IF ARM1136JS + +;//Input Arguments +pSrc RN 0 +pDst RN 1 +step RN 2 + +;//Local Variables +Count RN 14 +X0 RN 2 +X1 RN 4 +Return RN 0 + M_START omxVCCOMM_Copy8x8,r5 + + + + MOV Count,step ;//Count=step + + LDRD X0,[pSrc],Count ;//pSrc after loading : pSrc=pSrc+step + LDRD X1,[pSrc],Count + + STRD X0,[pDst],#8 + LDRD X0,[pSrc],Count + STRD X1,[pDst],#8 + LDRD X1,[pSrc],Count + + STRD X0,[pDst],#8 + LDRD X0,[pSrc],Count + STRD X1,[pDst],#8 + LDRD X1,[pSrc],Count + + STRD X0,[pDst],#8 + LDRD X0,[pSrc],Count + STRD X1,[pDst],#8 + LDRD X1,[pSrc],Count + + STRD X0,[pDst],#8 + MOV Return,#OMX_Sts_NoErr + STRD X1,[pDst],#8 + + + M_END + ENDIF + + END + \ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/comm/src/omxVCCOMM_ExpandFrame_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/comm/src/omxVCCOMM_ExpandFrame_I_s.s new file mode 100644 index 0000000000000000000000000000000000000000..02b4b0838395d1c6879095d6d74199466150273a --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/comm/src/omxVCCOMM_ExpandFrame_I_s.s @@ -0,0 +1,189 @@ +;// +;// +;// File Name: omxVCCOMM_ExpandFrame_I_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// +;// Description: +;// This function will Expand Frame boundary pixels into Plane +;// +;// + +;// Include standard headers + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS ARM1136JS + +;// Import symbols required from other files +;// (For example tables) + + +;// Set debugging level +DEBUG_ON SETL {FALSE} + + + + + + + +;// Guarding implementation by the processor name + + IF ARM1136JS + +;//Input Registers + +pSrcDstPlane RN 0 +iFrameWidth RN 1 +iFrameHeight RN 2 +iExpandPels RN 3 + + +;//Output Registers + +result RN 0 + +;//Local Scratch Registers + +iPlaneStep RN 4 +pTop RN 5 +pBottom RN 6 +pBottomIndex RN 7 +x RN 8 +y RN 9 +tempTop RN 10 +tempBot RN 11 +ColStep RN 12 +pLeft RN 5 +pRight RN 6 +pRightIndex RN 7 +tempLeft1 RN 10 +tempRight1 RN 11 +tempLeft2 RN 14 +tempRight2 RN 2 +indexY RN 14 +RowStep RN 12 +expandTo4bytes RN 1 ;// copy a byte to 4 bytes of a word + + ;// Allocate stack memory required by the function + + + ;// Write function header + M_START omxVCCOMM_ExpandFrame_I,r11 + + ;// Define stack arguments + M_ARG iPlaneStepOnStack, 4 + + ;// Load argument from the stack + M_LDR iPlaneStep, iPlaneStepOnStack + + MUL pTop,iExpandPels,iPlaneStep + MLA pBottom,iFrameHeight,iPlaneStep,pSrcDstPlane + SUB x,iFrameWidth,#4 + MOV indexY,pTop + ADD ColStep,indexY,#4 + SUB pBottomIndex,pBottom,iPlaneStep + SUB pTop,pSrcDstPlane,pTop + + + ADD pTop,pTop,x + ADD pBottom,pBottom,x + + ;//------------------------------------------------------------------------ + ;// The following improves upon the C implmentation + ;// The x and y loops are interchanged: This ensures that the values of + ;// pSrcDstPlane [x] and pSrcDstPlane [(iFrameHeight - 1) * iPlaneStep + x] + ;// which depend only on loop variable 'x' are loaded once and used in + ;// multiple stores in the 'Y' loop + ;//------------------------------------------------------------------------ + + ;// xloop +ExpandFrameTopBotXloop + + LDR tempTop,[pSrcDstPlane,x] + ;//------------------------------------------------------------------------ + ;// pSrcDstPlane [(iFrameHeight - 1) * iPlaneStep + x] is simplified as: + ;// pSrcDstPlane + (iFrameHeight * iPlaneStep) - iPlaneStep + x == + ;// pBottom - iPlaneStep + x == pBottomIndex [x] + ;// The value of pBottomIndex is calculated above this 'x' loop + ;//------------------------------------------------------------------------ + LDR tempBot,[pBottomIndex,x] + + ;// yloop + MOV y,iExpandPels + +ExpandFrameTopBotYloop + SUBS y,y,#1 + M_STR tempTop,[pTop],iPlaneStep + M_STR tempBot,[pBottom],iPlaneStep + BGT ExpandFrameTopBotYloop + + SUBS x,x,#4 + SUB pTop,pTop,ColStep + SUB pBottom,pBottom,ColStep + BGE ExpandFrameTopBotXloop + + + ;// y loop + ;// The product is already calculated above : Reuse + ;//MUL indexY,iExpandPels,iPlaneStep + + SUB pSrcDstPlane,pSrcDstPlane,indexY + SUB pLeft,pSrcDstPlane,iExpandPels ;// pLeft->points to the top left of the expanded block + ADD pRight,pSrcDstPlane,iFrameWidth + SUB pRightIndex,pRight,#1 + + ADD y,iFrameHeight,iExpandPels,LSL #1 + LDR expandTo4bytes,=0x01010101 + + RSB RowStep,iExpandPels,iPlaneStep,LSL #1 + + ;// The Y Loop is unrolled twice +ExpandFrameLeftRightYloop + LDRB tempLeft2,[pSrcDstPlane,iPlaneStep] ;// PreLoad the values + LDRB tempRight2,[pRightIndex,iPlaneStep] + M_LDRB tempLeft1,[pSrcDstPlane],iPlaneStep,LSL #1 ;// PreLoad the values + M_LDRB tempRight1,[pRightIndex],iPlaneStep,LSL #1 + + SUB x,iExpandPels,#4 + MUL tempLeft2,tempLeft2,expandTo4bytes ;// Copy the single byte to 4 bytes + MUL tempRight2,tempRight2,expandTo4bytes + MUL tempLeft1,tempLeft1,expandTo4bytes ;// Copy the single byte to 4 bytes + MUL tempRight1,tempRight1,expandTo4bytes + + + ;// x loop +ExpandFrameLeftRightXloop + SUBS x,x,#4 + STR tempLeft2,[pLeft,iPlaneStep] ;// Store the 4 bytes at one go + STR tempRight2,[pRight,iPlaneStep] + STR tempLeft1,[pLeft],#4 ;// Store the 4 bytes at one go + STR tempRight1,[pRight],#4 + BGE ExpandFrameLeftRightXloop + + SUBS y,y,#2 + ADD pLeft,pLeft,RowStep + ADD pRight,pRight,RowStep + BGT ExpandFrameLeftRightYloop + + + ;// Set return value + + MOV result,#OMX_Sts_NoErr +End + + ;// Write function tail + + M_END + + ENDIF ;//ARM1136JS + + + END \ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/api/armVCM4P10_CAVLCTables.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/api/armVCM4P10_CAVLCTables.h new file mode 100644 index 0000000000000000000000000000000000000000..4340f2aaeb2abd9daeafd79bc57b556385912988 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/api/armVCM4P10_CAVLCTables.h @@ -0,0 +1,30 @@ +/* ---------------------------------------------------------------- + * + * + * File Name: armVCM4P10_CAVLCTables.h + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Header file for optimized H.264 CALVC tables + * + */ + +#ifndef ARMVCM4P10_CAVLCTABLES_H +#define ARMVCM4P10_CAVLCTABLES_H + +/* CAVLC tables */ + +extern const OMX_U16 *armVCM4P10_CAVLCCoeffTokenTables[18]; +extern const OMX_U16 *armVCM4P10_CAVLCTotalZeroTables[15]; +extern const OMX_U16 *armVCM4P10_CAVLCTotalZeros2x2Tables[3]; +extern const OMX_U16 *armVCM4P10_CAVLCRunBeforeTables[15]; +extern const OMX_U8 armVCM4P10_ZigZag_4x4[16]; +extern const OMX_U8 armVCM4P10_ZigZag_2x2[4]; +extern const OMX_S8 armVCM4P10_SuffixToLevel[7]; + +#endif diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_Average_4x_Align_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_Average_4x_Align_unsafe_s.s new file mode 100644 index 0000000000000000000000000000000000000000..b2cd9d1c89559885ffbb6f12af790e5b284a3971 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_Average_4x_Align_unsafe_s.s @@ -0,0 +1,222 @@ +;// +;// +;// File Name: armVCM4P10_Average_4x_Align_unsafe_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + +;// Functions: +;// armVCM4P10_Average_4x4_Align_unsafe +;// +;// Implements Average of 4x4 with equation c = (a+b+1)>>1. +;// First operand will be at offset ALIGNMENT from aligned address +;// Second operand will be at aligned location and will be used as output. +;// destination pointed by (pDst) for vertical interpolation. +;// This function needs to copy 4 bytes in horizontal direction +;// +;// Registers used as input for this function +;// r0,r1,r2,r3 where r2 containings aligned memory pointer and r3 step size +;// +;// Registers preserved for top level function +;// r4,r5,r6,r8,r9,r14 +;// +;// Registers modified by the function +;// r7,r10,r11,r12 +;// +;// Output registers +;// r2 - pointer to the aligned location +;// r3 - step size to this aligned location + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS ARM1136JS + + EXPORT armVCM4P10_Average_4x4_Align0_unsafe + EXPORT armVCM4P10_Average_4x4_Align2_unsafe + EXPORT armVCM4P10_Average_4x4_Align3_unsafe + +DEBUG_ON SETL {FALSE} + +;// Declare input registers +pPred0 RN 0 +iPredStep0 RN 1 +pPred1 RN 2 +iPredStep1 RN 3 +pDstPred RN 2 +iDstStep RN 3 + +;// Declare other intermediate registers +iPredA0 RN 10 +iPredA1 RN 11 +iPredB0 RN 12 +iPredB1 RN 14 +Temp1 RN 4 +Temp2 RN 5 +ResultA RN 5 +ResultB RN 4 +r0x80808080 RN 7 + + IF ARM1136JS + + ;// This function calculates average of 4x4 block + ;// pPred0 is at alignment offset 0 and pPred1 is alignment 4 + + ;// Function header + M_START armVCM4P10_Average_4x4_Align0_unsafe, r6 + + ;// Code start + LDR r0x80808080, =0x80808080 + + ;// 1st load + M_LDR iPredB0, [pPred1] + M_LDR iPredA0, [pPred0], iPredStep0 + M_LDR iPredB1, [pPred1, iPredStep1] + M_LDR iPredA1, [pPred0], iPredStep0 + + ;// (a+b+1)/2 = (a+256-(255-b))/2 = (a-(255-b))/2 + 128 + MVN iPredB0, iPredB0 + MVN iPredB1, iPredB1 + UHSUB8 ResultA, iPredA0, iPredB0 + UHSUB8 ResultB, iPredA1, iPredB1 + EOR ResultA, ResultA, r0x80808080 + M_STR ResultA, [pDstPred], iDstStep + EOR ResultB, ResultB, r0x80808080 + M_STR ResultB, [pDstPred], iDstStep + + ;// 2nd load + M_LDR iPredA0, [pPred0], iPredStep0 + M_LDR iPredB0, [pPred1] + M_LDR iPredA1, [pPred0], iPredStep0 + M_LDR iPredB1, [pPred1, iPredStep1] + + MVN iPredB0, iPredB0 + UHSUB8 ResultA, iPredA0, iPredB0 + MVN iPredB1, iPredB1 + UHSUB8 ResultB, iPredA1, iPredB1 + EOR ResultA, ResultA, r0x80808080 + M_STR ResultA, [pDstPred], iDstStep + EOR ResultB, ResultB, r0x80808080 + M_STR ResultB, [pDstPred], iDstStep +End0 + M_END + + ;// This function calculates average of 4x4 block + ;// pPred0 is at alignment offset 2 and pPred1 is alignment 4 + + ;// Function header + M_START armVCM4P10_Average_4x4_Align2_unsafe, r6 + + ;// Code start + LDR r0x80808080, =0x80808080 + + ;// 1st load + LDR Temp1, [pPred0, #4] + M_LDR iPredA0, [pPred0], iPredStep0 + M_LDR iPredB0, [pPred1] + M_LDR iPredB1, [pPred1, iPredStep1] + M_LDR Temp2, [pPred0, #4] + M_LDR iPredA1, [pPred0], iPredStep0 + MVN iPredB0, iPredB0 + MVN iPredB1, iPredB1 + MOV iPredA0, iPredA0, LSR #16 + ORR iPredA0, iPredA0, Temp1, LSL #16 + MOV iPredA1, iPredA1, LSR #16 + ORR iPredA1, iPredA1, Temp2, LSL #16 + + ;// (a+b+1)/2 = (a+256-(255-b))/2 = (a-(255-b))/2 + 128 + UHSUB8 ResultA, iPredA0, iPredB0 + UHSUB8 ResultB, iPredA1, iPredB1 + EOR ResultA, ResultA, r0x80808080 + M_STR ResultA, [pDstPred], iDstStep + EOR ResultB, ResultB, r0x80808080 + M_STR ResultB, [pDstPred], iDstStep + + ;// 2nd load + LDR Temp1, [pPred0, #4] + M_LDR iPredA0, [pPred0], iPredStep0 + LDR iPredB0, [pPred1] + LDR iPredB1, [pPred1, iPredStep1] + LDR Temp2, [pPred0, #4] + M_LDR iPredA1, [pPred0], iPredStep0 + MVN iPredB0, iPredB0 + MVN iPredB1, iPredB1 + MOV iPredA0, iPredA0, LSR #16 + ORR iPredA0, iPredA0, Temp1, LSL #16 + MOV iPredA1, iPredA1, LSR #16 + ORR iPredA1, iPredA1, Temp2, LSL #16 + + UHSUB8 ResultA, iPredA0, iPredB0 + UHSUB8 ResultB, iPredA1, iPredB1 + EOR ResultA, ResultA, r0x80808080 + M_STR ResultA, [pDstPred], iDstStep + EOR ResultB, ResultB, r0x80808080 + M_STR ResultB, [pDstPred], iDstStep +End2 + M_END + + + ;// This function calculates average of 4x4 block + ;// pPred0 is at alignment offset 3 and pPred1 is alignment 4 + + ;// Function header + M_START armVCM4P10_Average_4x4_Align3_unsafe, r6 + + ;// Code start + LDR r0x80808080, =0x80808080 + + ;// 1st load + LDR Temp1, [pPred0, #4] + M_LDR iPredA0, [pPred0], iPredStep0 + LDR iPredB0, [pPred1] + LDR iPredB1, [pPred1, iPredStep1] + LDR Temp2, [pPred0, #4] + M_LDR iPredA1, [pPred0], iPredStep0 + + MVN iPredB0, iPredB0 + MVN iPredB1, iPredB1 + MOV iPredA0, iPredA0, LSR #24 + ORR iPredA0, iPredA0, Temp1, LSL #8 + MOV iPredA1, iPredA1, LSR #24 + ORR iPredA1, iPredA1, Temp2, LSL #8 + UHSUB8 ResultA, iPredA0, iPredB0 + UHSUB8 ResultB, iPredA1, iPredB1 + EOR ResultA, ResultA, r0x80808080 + M_STR ResultA, [pDstPred], iDstStep + EOR ResultB, ResultB, r0x80808080 + M_STR ResultB, [pDstPred], iDstStep + + ;// 2nd load + LDR Temp1, [pPred0, #4] + M_LDR iPredA0, [pPred0], iPredStep0 + LDR iPredB0, [pPred1] + LDR iPredB1, [pPred1, iPredStep1] + LDR Temp2, [pPred0, #4] + M_LDR iPredA1, [pPred0], iPredStep0 + + MVN iPredB0, iPredB0 + MVN iPredB1, iPredB1 + MOV iPredA0, iPredA0, LSR #24 + ORR iPredA0, iPredA0, Temp1, LSL #8 + MOV iPredA1, iPredA1, LSR #24 + ORR iPredA1, iPredA1, Temp2, LSL #8 + + UHSUB8 ResultA, iPredA0, iPredB0 + UHSUB8 ResultB, iPredA1, iPredB1 + EOR ResultA, ResultA, r0x80808080 + M_STR ResultA, [pDstPred], iDstStep + EOR ResultB, ResultB, r0x80808080 + M_STR ResultB, [pDstPred], iDstStep +End3 + M_END + + ENDIF + + END + \ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_CAVLCTables.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_CAVLCTables.c new file mode 100644 index 0000000000000000000000000000000000000000..17fe51839d7b80dbe441bac777f8e38a210bc80e --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_CAVLCTables.c @@ -0,0 +1,327 @@ +/* ---------------------------------------------------------------- + * + * + * File Name: armVCM4P10_CAVLCTables.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Optimized CAVLC tables for H.264 + * + */ + +#include "omxtypes.h" +#include "armOMX.h" + +#include "armVCM4P10_CAVLCTables.h" + +/* 4x4 DeZigZag table */ + +const OMX_U8 armVCM4P10_ZigZag_4x4[16] = +{ + 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 +}; + +/* 2x2 DeZigZag table */ + +const OMX_U8 armVCM4P10_ZigZag_2x2[4] = +{ + 0, 1, 2, 3 +}; + + +/* + * Suffix To Level table + * We increment the suffix length if + * ((LevelCode>>1)+1)>(3<<(SuffixLength-1)) && SuffixLength<6 + * (LevelCode>>1)>=(3<<(SuffixLength-1)) && SuffixLength<6 + * LevelCode >= 3<= (3<>3)) + ;// + ;// Calculate A = (((q0-p0)<<2) + (p1-q1) + 4)>>3 + ;// = (4*q0 - 4*p0 + p1 - q1 + 4)>>3 + ;// = ((p1-p0) - (q1-q0) - 3*(p0-q0) + 4)>>3 + + USUB8 t1, p_1, p_0 + MUL tC0, t2, m01 + + USUB8 t2, q_1, q_0 + SSUB8 t1, t1, t2 + + USUB8 t2, p_0, q_0 + AND t2, t2, m01 + SHSUB8 t1, t1, t2 + UHSUB8 t5, p_0, q_0 + SSUB8 t1, t1, t2 + SHSUB8 t1, t1, t5 + MOV m00, #0 + SADD8 t1, t1, m01 + SHSUB8 t1, t1, t5 + + ;// tC = tC0 + ;// if (ap < beta) tC++; + ;// if (aq < beta) tC++; + USUB8 t5, filt, m01 + SEL tC0, tC0, m00 + UQADD8 tC, tC0, apflg + SSUB8 t1, t1, m00 + UQADD8 tC, tC, aqflg + + ;// Split into positive and negative part and clip + SEL pos, t1, m00 + USUB8 neg, pos, t1 + USUB8 t3, pos, tC + SEL pos, tC, pos + USUB8 t3, neg, tC + SEL neg, tC, neg + + ;//Reload m01 + LDR m01,=MASK_1 + + UQADD8 P0a, p_0, pos + UQSUB8 Q0a, q_0, pos + UQSUB8 P0a, P0a, neg + UQADD8 Q0a, Q0a, neg + + ;// Choose to store the filtered + ;// value or the original pixel + USUB8 t1, filt, m01 + SEL P0a, P0a, p_0 + SEL Q0a, Q0a, q_0 + + ;// delta = (p2 + ((p0+q0+1)>>1) - (p1<<1))>>1; + ;// u1 = (p0 + q0 + 1)>>1 + ;// u1 = ( (q_0 - p_0')>>1 ) ^ 0x80 + MVN p_0, p_0 + UHSUB8 u1, q_0, p_0 + UQADD8 max, p_1, tC0 + EOR u1, u1, m01 ,LSL #7 + + ;// Calculate A = (p2+u1)>>1 + ;// Then delta = Clip3( -tC0, tC0, A - p1) + + ;// Clip P1 + UHADD8 P1a, p_2, u1 + UQSUB8 min, p_1, tC0 + USUB8 t4, P1a, max + SEL P1a, max, P1a + USUB8 t4, P1a, min + SEL P1a, P1a, min + + ;// Clip Q1 + UHADD8 Q1a, q_2, u1 + UQADD8 max, q_1, tC0 + UQSUB8 min, q_1, tC0 + USUB8 t0, Q1a, max + SEL Q1a, max, Q1a + USUB8 t0, Q1a, min + SEL Q1a, Q1a, min + + ;// Choose to store the filtered + ;// value or the original pixel + USUB8 t0, apflg, m01 + SEL P1a, P1a, p_1 + USUB8 t0, aqflg, m01 + SEL t3, Q1a, q_1 + + M_END + +;// Register usage for - armVCM4P10_DeblockingLumabSGE4_unsafe() +;// +;// Inputs - 3,4,5,8,9,10 - Input Pixels (p0-p2,q0-q2) +;// - 2 - filt, 0 - apflg,aqflg +;// - 1 - ap0q0, 6 - alpha +;// - 7 - m00, 11 - m01 +;// +;// Outputs - 6,7,1,9,0,2 - Output Pixels(P0b,P1b,P2b, Q0b,Q1b,Q2b) +;// +;// Registers Corrupted - 0-3,5-12,14 + + M_START armVCM4P10_DeblockingLumabSGE4_unsafe, lr + + ;// apflg = apflg && |p0-q0|<((alpha>>2)+2) + ;// apflg = aqflg && |p0-q0|<((alpha>>2)+2) + + M_ARG pDummy,4 + M_ARG pQ_3,4 + M_ARG pP_3,4 + + UHADD8 alpha, alpha, m00 + USUB8 t9, p_2, p_0 ;//t9 = dp2p0 + UHADD8 alpha, alpha, m00 + ADD alpha, alpha, m01, LSL #1 + USUB8 ap0q0, ap0q0, alpha + SEL apqflg, m00, apflg + + ;// P0 = (p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4)>>3 + ;// = ((p2-p0) + 2*(p1-p0) + (q1-q0) + 3*(q0-p0) + 8*p0 + 4)>>3 + ;// = p0 + (((p2-p0) + 2*(p1-p0) + (q1-q0) - 3*(p0-q0) + 4)>>3) + + ;// P1 = (p2 + p1 + q0 + p0 + 2)>>2 + ;// = p0 + (((p2-p0) + (p1-p0) - (p0-q0) + 2)>>2) + + ;// P2 = (2*p3 + 3*p2 + p1 + p0 + q0 + 4)>>3 + ;// = (2*(p3-p0) + 3*(p2-p0) + (p1-p0) - (p0-q0) + 8*p0 + 4)>>3 + ;// = p0 + (((p3-p0) + (p2-p0) + t2 + 2)>>2) + + ;// Compute P0b + USUB8 t2, p_0, q_0 + SSUB8 t5, t9, t2 + + USUB8 t8, q_1, q_0 + SHADD8 t8, t5, t8 + + USUB8 t9, p_1, p_0 + SADD8 t8, t8, t9 + SHSUB8 t8, t8, t2 + SHADD8 t5, t5, t9 + SHADD8 t8, t8, m01 + SHADD8 t9, t5, m01 + SADD8 P0b, p_0, t8 + ;// P0b ready + + ;// Compute P1b + M_LDR p_3b, pP_3 + SADD8 P1b, p_0, t9 + ;// P1b ready + + ;// Compute P2b + USUB8 t9, p_2, p_0 + SADD8 t5, t5, t9 + UHSUB8 t9, p_3b, p_0 + EOR a, p_3b, p_0 + AND a, a, m01 + SHADD8 t5, t5, a + UHADD8 a, p_0, q_1 + SADD8 t5, t5, m01 + SHADD8 t5, t5, t9 + MVN t9, p_1 + SADD8 P2b, p_0, t5 + ;// P2b ready + + UHSUB8 a, a, t9 + ORR t9, apqflg, m01 + USUB8 t9, apqflg, t9 + + EOR a, a, m01, LSL #7 + SEL P0b, P0b, a + SEL P1b, P1b, p_1 + SEL P2b, P2b, p_2 + + USUB8 t4, filt, m01 + SEL P0b, P0b, p_0 + + + ;// Q0 = (q2 + 2*q1 + 2*q0 + 2*p0 + p1 + 4)>>3 + ;// = ((q2-q0) + 2*(q1-q0) + (p1-p0) + 3*(p0-q0) + 8*q0 + 4)>>3 + ;// = q0 + (((q2-q0) + 2*(q1-q0) + (p1-p0) + 3*(p0-q0) + 4)>>3) + + ;// Q1 = (q2 + q1 + p0 + q0 + 2)>>2 + ;// = q0 + (((q2-q0) + (q1-q0) + (p0-q0) + 2)>>2) + + ;// Q2 = (2*q3 + 3*q2 + q1 + q0 + p0 + 4)>>3 + ;// = (2*(q3-q0) + 3*(q2-q0) + (q1-q0) + (p0-q0) + 8*q0 + 4)>>3 + ;// = q0 + (((q3-q0) + (q2-q0) + t2 + 2)>>2) + + + ;// Compute Q0b Q1b + USUB8 t4, q_2, q_0 + USUB8 a, p_0, q_0 + USUB8 t9, p_1, p_0 + SADD8 t0, t4, a + SHADD8 t9, t0, t9 + UHADD8 t10, q_0, p_1 + SADD8 t9, t9, a + USUB8 a, q_1, q_0 + SHADD8 t9, t9, a + SHADD8 t0, t0, a + SHADD8 t9, t9, m01 + SHADD8 a, t0, m01 + SADD8 t9, q_0, t9 + ;// Q0b ready - t9 + + MOV t4, #0 + UHADD8 apqflg, apqflg, t4 + + SADD8 Q1b, q_0, a + ;// Q1b ready + + USUB8 t4, apqflg, m01 + SEL Q1b, Q1b, q_1 + MVN t11, q_1 + UHSUB8 t10, t10, t11 + M_LDR q_3b, pQ_3 + EOR t10, t10, m01, LSL #7 + SEL t9, t9, t10 + + ;// Compute Q2b + USUB8 t4, q_2, q_0 + SADD8 t4, t0, t4 + EOR t0, q_3b, q_0 + AND t0, t0, m01 + SHADD8 t4, t4, t0 + UHSUB8 t10, q_3b, q_0 + SADD8 t4, t4, m01 + SHADD8 t4, t4, t10 + + USUB8 t10, filt, m01 + SEL Q0b, t9, q_0 + + SADD8 t4, q_0, t4 + ;// Q2b ready - t4 + + USUB8 t10, apqflg, m01 + SEL Q2b, t4, q_2 + + M_END + + ENDIF + + END \ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair_s.s new file mode 100644 index 0000000000000000000000000000000000000000..ac448a0179ee87393c4ce9ee43d2dc5ad5f6615b --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair_s.s @@ -0,0 +1,325 @@ +;// +;// +;// File Name: armVCM4P10_DecodeCoeffsToPair_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + INCLUDE armCOMM_BitDec_s.h + + IMPORT armVCM4P10_CAVLCCoeffTokenTables + IMPORT armVCM4P10_CAVLCTotalZeroTables + IMPORT armVCM4P10_CAVLCTotalZeros2x2Tables + IMPORT armVCM4P10_CAVLCRunBeforeTables + IMPORT armVCM4P10_SuffixToLevel + IMPORT armVCM4P10_ZigZag_4x4 + IMPORT armVCM4P10_ZigZag_2x2 + + M_VARIANTS ARM1136JS + +;//DEBUG_ON SETL {TRUE} + +LAST_COEFF EQU 0x20 ;// End of block flag +TWO_BYTE_COEFF EQU 0x10 + +;// Declare input registers + +ppBitStream RN 0 +pOffset RN 1 +pNumCoeff RN 2 +ppPosCoefbuf RN 3 +nC RN 4 ;// number of coeffs or 17 for chroma +sMaxNumCoeff RN 5 + +;// Declare inner loop registers + +;// Level loop +Count RN 0 +TrailingOnes RN 1 +pLevel RN 2 +LevelSuffix RN 3 +SuffixLength RN 4 +TotalCoeff RN 5 + +pVLDTable RN 6 +Symbol RN 7 +T1 RN 8 +T2 RN 9 +RBitStream RN 10 +RBitBuffer RN 11 +RBitCount RN 12 +lr RN 14 + +;// Run loop +Count RN 0 +ZerosLeft RN 1 +pLevel RN 2 +ppRunTable RN 3 +pRun RN 4 +TotalCoeff RN 5 + +pVLDTable RN 6 +Symbol RN 7 +T1 RN 8 +T2 RN 9 +RBitStream RN 10 +RBitBuffer RN 11 +RBitCount RN 12 +lr RN 14 + +;// Fill in coefficients loop +pPosCoefbuf RN 0 +temp RN 1 +pLevel RN 2 +ppPosCoefbuf RN 3 +pRun RN 4 +TotalCoeff RN 5 +pZigZag RN 6 + +T1 RN 8 +T2 RN 9 +RBitStream RN 10 +RBitBuffer RN 11 +RBitCount RN 12 +CoeffNum RN 14 + + + + IF ARM1136JS + + ;// Allocate stack memory required by the function + M_ALLOC4 pppBitStream, 4 + M_ALLOC4 ppOffset, 4 + M_ALLOC4 pppPosCoefbuf, 4 + M_ALLOC4 ppLevel, 16*2 + M_ALLOC4 ppRun, 16 + + ;// Write function header + M_START armVCM4P10_DecodeCoeffsToPair, r11 + + ;// Define stack arguments + M_ARG pNC, 4 + M_ARG pSMaxNumCoeff,4 + + ;// Code start + M_BD_INIT0 ppBitStream, pOffset, RBitStream, RBitBuffer, RBitCount + LDR pVLDTable, =armVCM4P10_CAVLCCoeffTokenTables + M_LDR nC, pNC + + M_BD_INIT1 T1, T2, lr + LDR pVLDTable, [pVLDTable, nC, LSL #2] ;// Find VLD table + + M_BD_INIT2 T1, T2, lr + + ;// Decode Symbol = TotalCoeff*4 + TrailingOnes + M_BD_VLD Symbol, T1, T2, pVLDTable, 4, 2 + + MOVS TotalCoeff, Symbol, LSR #2 + STRB TotalCoeff, [pNumCoeff] + M_PRINTF "TotalCoeff=%d\n", TotalCoeff + BEQ.W EndNoError ;// Finished if no coefficients + + CMP Symbol, #17*4 + BGE.W EndBadSymbol ;// Error if bad symbol + + ;// Save bitstream pointers + M_STR ppBitStream, pppBitStream + M_STR pOffset, ppOffset + M_STR ppPosCoefbuf, pppPosCoefbuf + + ;// Decode Trailing Ones + ANDS TrailingOnes, Symbol, #3 + M_ADR pLevel, ppLevel + M_PRINTF "TrailingOnes=%d\n", TrailingOnes + BEQ TrailingOnesDone + MOV Count, TrailingOnes +TrailingOnesLoop + M_BD_READ8 Symbol, 1, T1 + SUBS Count, Count, #1 + MOV T1, #1 + SUB T1, T1, Symbol, LSL #1 + M_PRINTF "Level=%d\n", T1 + STRH T1, [pLevel], #2 + BGT TrailingOnesLoop +TrailingOnesDone + + ;// Decode level values + SUBS Count, TotalCoeff, TrailingOnes ;// Number of levels to read + BEQ DecodeRuns ;// None left + + MOV SuffixLength, #1 + CMP TotalCoeff, #10 + MOVLE SuffixLength, #0 + CMP TrailingOnes, #3 ;// if (TrailingOnes<3) + MOVLT TrailingOnes, #4 ;// then TrailingOnes = +4 + MOVGE TrailingOnes, #2 ;// else TrailingOnes = +2 + MOVGE SuffixLength, #0 ;// SuffixLength = 0 + +LevelLoop + M_BD_CLZ16 Symbol, T1, T2 ;// Symbol=LevelPrefix + CMP Symbol,#16 + BGE EndBadSymbol + + MOVS lr, SuffixLength ;// if LevelSuffixSize==0 + TEQEQ Symbol, #14 ;// and LevelPrefix==14 + MOVEQ lr, #4 ;// then LevelSuffixSize=4 + TEQ Symbol, #15 ;// if LevelSuffixSize==15 + MOVEQ lr, #12 ;// then LevelSuffixSize=12 + + TEQEQ SuffixLength,#0 + ADDEQ Symbol,Symbol,#15 + + TEQ lr, #0 ;// if LevelSuffixSize==0 + BEQ LevelCodeRead ;// LevelCode = LevelPrefix + + M_BD_VREAD16 LevelSuffix, lr, T1, T2 ;// Read Level Suffix + + MOV Symbol, Symbol, LSL SuffixLength + ADD Symbol, LevelSuffix, Symbol + +LevelCodeRead + ;// Symbol = LevelCode + ADD Symbol, Symbol, TrailingOnes ;// +4 if level cannot be +/-1, +2 o/w + MOV TrailingOnes, #2 + MOVS T1, Symbol, LSR #1 + RSBCS T1, T1, #0 ;// If Symbol odd then negate + M_PRINTF "Level=%d\n", T1 + STRH T1, [pLevel], #2 ;// Store level. + + LDR T2, =armVCM4P10_SuffixToLevel + LDRSB T1, [T2, SuffixLength] ;// Find increment level + TEQ SuffixLength, #0 + MOVEQ SuffixLength, #1 + CMP Symbol, T1 + ADDCS SuffixLength, SuffixLength, #1 + SUBS Count, Count, #1 + BGT LevelLoop + +DecodeRuns + ;// Find number of zeros + M_LDR T1, pSMaxNumCoeff ;// sMaxNumCoeff + SUB Count, TotalCoeff, #1 ;// Number of runs excluding last + SUBS ZerosLeft, T1, TotalCoeff ;// Maximum number of zeros there could be + M_ADR pRun, ppRun + MOV CoeffNum,TotalCoeff + SUB CoeffNum,CoeffNum,#1 + BEQ NoZerosLeft + + ;// Unpack number of zeros from bitstream + TEQ T1, #4 + LDREQ pVLDTable, =(armVCM4P10_CAVLCTotalZeros2x2Tables-4) + LDRNE pVLDTable, =(armVCM4P10_CAVLCTotalZeroTables-4) + LDR pVLDTable, [pVLDTable, TotalCoeff, LSL #2] + + M_BD_VLD Symbol, T1, T2, pVLDTable, 4, 2 ;// Symbol = ZerosLeft + CMP Symbol,#16 + BGE EndBadSymbol + + LDR ppRunTable, =(armVCM4P10_CAVLCRunBeforeTables-4) + M_ADR pRun, ppRun + MOVS ZerosLeft, Symbol + + ADD CoeffNum,CoeffNum,ZerosLeft + + BEQ NoZerosLeft + + ;// Decode runs while zeros are left and more than one coefficient +RunLoop + SUBS Count, Count, #1 + LDR pVLDTable, [ppRunTable, ZerosLeft, LSL#2] + BLT LastRun + M_BD_VLD Symbol, T1, T2, pVLDTable, 3, 2 ;// Symbol = Run + CMP Symbol,#15 + BGE EndBadSymbol + + SUBS ZerosLeft, ZerosLeft, Symbol + M_PRINTF "Run=%d\n", Symbol + STRB Symbol, [pRun], #1 + BGT RunLoop + + ;// Decode runs while no zeros are left +NoZerosLeft + SUBS Count, Count, #1 + M_PRINTF "Run=%d\n", ZerosLeft + STRGEB ZerosLeft, [pRun], #1 + BGT NoZerosLeft + +LastRun + ;// Final run length is remaining zeros + M_PRINTF "LastRun=%d\n", ZerosLeft + STRB ZerosLeft, [pRun], #1 + + ;// Write coefficients to output array + M_LDR T1, pSMaxNumCoeff ;// sMaxNumCoeff + TEQ T1, #15 + ADDEQ CoeffNum,CoeffNum,#1 + + + SUB pRun,pRun,TotalCoeff + SUB pLevel,pLevel,TotalCoeff + SUB pLevel,pLevel,TotalCoeff + + M_LDR ppPosCoefbuf, pppPosCoefbuf + LDR pPosCoefbuf, [ppPosCoefbuf] + TEQ T1, #4 + LDREQ pZigZag, =armVCM4P10_ZigZag_2x2 + LDRNE pZigZag, =armVCM4P10_ZigZag_4x4 + + + +OutputLoop + + LDRB T2, [pRun],#1 + LDRB T1, [pZigZag, CoeffNum] + SUB CoeffNum, CoeffNum, #1 ;// Skip Non zero + SUB CoeffNum, CoeffNum, T2 ;// Skip Zero run + + LDRSH T2, [pLevel],#2 + + SUBS TotalCoeff, TotalCoeff, #1 + ORREQ T1, T1, #LAST_COEFF + + ADD temp, T2, #128 + CMP temp, #256 + ORRCS T1, T1, #TWO_BYTE_COEFF + + + TEQ TotalCoeff, #0 ;// Preserves carry + + M_PRINTF "Output=%02x %04x\n", T1, T2 + STRB T1, [pPosCoefbuf], #1 + STRB T2, [pPosCoefbuf], #1 + MOV T2, T2, LSR #8 + STRCSB T2, [pPosCoefbuf], #1 + BNE OutputLoop + + ;// Finished + STR pPosCoefbuf, [ppPosCoefbuf] + M_LDR ppBitStream, pppBitStream + M_LDR pOffset, ppOffset + B EndNoError + +EndBadSymbol + MOV r0, #OMX_Sts_Err + B End + +EndNoError + ;// Finished reading from the bitstream + M_BD_FINI ppBitStream, pOffset + + ;// Set return value + MOV r0, #OMX_Sts_NoErr +End + M_END + + ENDIF + + END + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DequantTables_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DequantTables_s.s new file mode 100644 index 0000000000000000000000000000000000000000..b16f188b8c0b0b6456a358325d45856ab385cba3 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DequantTables_s.s @@ -0,0 +1,123 @@ +;// +;// +;// File Name: armVCM4P10_DequantTables_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + EXPORT armVCM4P10_QPDivTable + EXPORT armVCM4P10_VMatrixQPModTable + EXPORT armVCM4P10_PosToVCol4x4 + EXPORT armVCM4P10_PosToVCol2x2 + EXPORT armVCM4P10_VMatrix + EXPORT armVCM4P10_QPModuloTable + EXPORT armVCM4P10_VMatrixU16 + +;// Define the processor variants supported by this file + + M_VARIANTS ARM1136JS + + +;// Guarding implementation by the processor name + + + IF ARM1136JS :LOR: CortexA8 + + + M_TABLE armVCM4P10_PosToVCol4x4 + DCB 0, 2, 0, 2 + DCB 2, 1, 2, 1 + DCB 0, 2, 0, 2 + DCB 2, 1, 2, 1 + + + M_TABLE armVCM4P10_PosToVCol2x2 + DCB 0, 2 + DCB 2, 1 + + + M_TABLE armVCM4P10_VMatrix + DCB 10, 16, 13 + DCB 11, 18, 14 + DCB 13, 20, 16 + DCB 14, 23, 18 + DCB 16, 25, 20 + DCB 18, 29, 23 + +;//------------------------------------------------------- +;// This table evaluates the expression [(INT)(QP/6)], +;// for values of QP from 0 to 51 (inclusive). +;//------------------------------------------------------- + + M_TABLE armVCM4P10_QPDivTable + DCB 0, 0, 0, 0, 0, 0 + DCB 1, 1, 1, 1, 1, 1 + DCB 2, 2, 2, 2, 2, 2 + DCB 3, 3, 3, 3, 3, 3 + DCB 4, 4, 4, 4, 4, 4 + DCB 5, 5, 5, 5, 5, 5 + DCB 6, 6, 6, 6, 6, 6 + DCB 7, 7, 7, 7, 7, 7 + DCB 8, 8, 8, 8, 8, 8 + +;//---------------------------------------------------- +;// This table contains armVCM4P10_VMatrix[QP%6][0] entires, +;// for values of QP from 0 to 51 (inclusive). +;//---------------------------------------------------- + + M_TABLE armVCM4P10_VMatrixQPModTable + DCB 10, 11, 13, 14, 16, 18 + DCB 10, 11, 13, 14, 16, 18 + DCB 10, 11, 13, 14, 16, 18 + DCB 10, 11, 13, 14, 16, 18 + DCB 10, 11, 13, 14, 16, 18 + DCB 10, 11, 13, 14, 16, 18 + DCB 10, 11, 13, 14, 16, 18 + DCB 10, 11, 13, 14, 16, 18 + DCB 10, 11, 13, 14, 16, 18 + +;//------------------------------------------------------- +;// This table evaluates the modulus expression [QP%6]*6, +;// for values of QP from 0 to 51 (inclusive). +;//------------------------------------------------------- + + M_TABLE armVCM4P10_QPModuloTable + DCB 0, 6, 12, 18, 24, 30 + DCB 0, 6, 12, 18, 24, 30 + DCB 0, 6, 12, 18, 24, 30 + DCB 0, 6, 12, 18, 24, 30 + DCB 0, 6, 12, 18, 24, 30 + DCB 0, 6, 12, 18, 24, 30 + DCB 0, 6, 12, 18, 24, 30 + DCB 0, 6, 12, 18, 24, 30 + DCB 0, 6, 12, 18, 24, 30 + +;//------------------------------------------------------- +;// This table contains the invidual byte values stored as +;// halfwords. This avoids unpacking inside the function +;//------------------------------------------------------- + + M_TABLE armVCM4P10_VMatrixU16 + DCW 10, 16, 13 + DCW 11, 18, 14 + DCW 13, 20, 16 + DCW 14, 23, 18 + DCW 16, 25, 20 + DCW 18, 29, 23 + + ENDIF ;//ARM1136JS + + + + + END \ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_Align_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_Align_unsafe_s.s new file mode 100644 index 0000000000000000000000000000000000000000..82b954237deae7d2a001df74d9228eae3408691c --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_Align_unsafe_s.s @@ -0,0 +1,236 @@ +;// +;// +;// File Name: armVCM4P10_InterpolateLuma_Align_unsafe_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS ARM1136JS + + EXPORT armVCM4P10_InterpolateLuma_HorAlign9x_unsafe + EXPORT armVCM4P10_InterpolateLuma_VerAlign4x_unsafe + +DEBUG_ON SETL {FALSE} + + IF ARM1136JS + +;// Declare input registers +pSrc RN 0 +srcStep RN 1 +pDst RN 8 +iHeight RN 9 + +;// Declare inner loop registers +x RN 7 +x0 RN 7 +x1 RN 10 +x2 RN 11 +Scratch RN 12 + +;// Function: +;// armVCM4P10_InterpolateLuma_HorAlign9x_unsafe +;// +;// Implements copy from an arbitrary aligned source memory location (pSrc) to a 4 byte aligned +;// destination pointed by (pDst) for horizontal interpolation. +;// This function needs to copy 9 bytes in horizontal direction. +;// +;// Registers used as input for this function +;// r0,r1,r8,r9 where r8 containings aligned memory pointer and r9 no rows to copy +;// +;// Registers preserved for top level function +;// r2,r3,r4,r5,r6 +;// +;// Registers modified by the function +;// r7,r8,r9,r10,r11,r12 +;// +;// Output registers +;// r0 - pointer to the new aligned location which will be used as pSrc +;// r1 - step size to this aligned location + + ;// Function header + M_START armVCM4P10_InterpolateLuma_HorAlign9x_unsafe + + ;// Copy pDst to scratch + MOV Scratch, pDst + +StartAlignedStackCopy + AND x, pSrc, #3 + BIC pSrc, pSrc, #3 + + M_SWITCH x + M_CASE Copy0toAligned + M_CASE Copy1toAligned + M_CASE Copy2toAligned + M_CASE Copy3toAligned + M_ENDSWITCH + +Copy0toAligned + LDM pSrc, {x0, x1, x2} + SUBS iHeight, iHeight, #1 + ADD pSrc, pSrc, srcStep + + ;// One cycle stall + + STM pDst!, {x0, x1, x2} ;// Store aligned output row + BGT Copy0toAligned + B CopyEnd + +Copy1toAligned + LDM pSrc, {x0, x1, x2} + SUBS iHeight, iHeight, #1 + ADD pSrc, pSrc, srcStep + + ;// One cycle stall + + MOV x0, x0, LSR #8 + ORR x0, x0, x1, LSL #24 + MOV x1, x1, LSR #8 + ORR x1, x1, x2, LSL #24 + MOV x2, x2, LSR #8 + STM pDst!, {x0, x1, x2} ;// Store aligned output row + BGT Copy1toAligned + B CopyEnd + +Copy2toAligned + LDM pSrc, {x0, x1, x2} + SUBS iHeight, iHeight, #1 + ADD pSrc, pSrc, srcStep + + ;// One cycle stall + + MOV x0, x0, LSR #16 + ORR x0, x0, x1, LSL #16 + MOV x1, x1, LSR #16 + ORR x1, x1, x2, LSL #16 + MOV x2, x2, LSR #16 + STM pDst!, {x0, x1, x2} ;// Store aligned output row + BGT Copy2toAligned + B CopyEnd + +Copy3toAligned + LDM pSrc, {x0, x1, x2} + SUBS iHeight, iHeight, #1 + ADD pSrc, pSrc, srcStep + + ;// One cycle stall + + MOV x0, x0, LSR #24 + ORR x0, x0, x1, LSL #8 + MOV x1, x1, LSR #24 + ORR x1, x1, x2, LSL #8 + MOV x2, x2, LSR #24 + STM pDst!, {x0, x1, x2} ;// Store aligned output row + BGT Copy3toAligned + +CopyEnd + + MOV pSrc, Scratch + MOV srcStep, #12 + + M_END + + +;// Function: +;// armVCM4P10_InterpolateLuma_VerAlign4x_unsafe +;// +;// Implements copy from an arbitrary aligned source memory location (pSrc) to an aligned +;// destination pointed by (pDst) for vertical interpolation. +;// This function needs to copy 4 bytes in horizontal direction +;// +;// Registers used as input for this function +;// r0,r1,r8,r9 where r8 containings aligned memory pointer and r9 no of rows to copy +;// +;// Registers preserved for top level function +;// r2,r3,r4,r5,r6 +;// +;// Registers modified by the function +;// r7,r8,r9,r10,r11,r12 +;// +;// Output registers +;// r0 - pointer to the new aligned location which will be used as pSrc +;// r1 - step size to this aligned location + + ;// Function header + M_START armVCM4P10_InterpolateLuma_VerAlign4x_unsafe + + ;// Copy pSrc to stack +StartVAlignedStackCopy + AND x, pSrc, #3 + BIC pSrc, pSrc, #3 + + + M_SWITCH x + M_CASE Copy0toVAligned + M_CASE Copy1toVAligned + M_CASE Copy2toVAligned + M_CASE Copy3toVAligned + M_ENDSWITCH + +Copy0toVAligned + M_LDR x0, [pSrc], srcStep + SUBS iHeight, iHeight, #1 + + ;// One cycle stall + + STR x0, [pDst], #4 ;// Store aligned output row + BGT Copy0toVAligned + B CopyVEnd + +Copy1toVAligned + LDR x1, [pSrc, #4] + M_LDR x0, [pSrc], srcStep + SUBS iHeight, iHeight, #1 + + ;// One cycle stall + + MOV x1, x1, LSL #24 + ORR x0, x1, x0, LSR #8 + STR x0, [pDst], #4 ;// Store aligned output row + BGT Copy1toVAligned + B CopyVEnd + +Copy2toVAligned + LDR x1, [pSrc, #4] + M_LDR x0, [pSrc], srcStep + SUBS iHeight, iHeight, #1 + + ;// One cycle stall + + MOV x1, x1, LSL #16 + ORR x0, x1, x0, LSR #16 + STR x0, [pDst], #4 ;// Store aligned output row + BGT Copy2toVAligned + B CopyVEnd + +Copy3toVAligned + LDR x1, [pSrc, #4] + M_LDR x0, [pSrc], srcStep + SUBS iHeight, iHeight, #1 + + ;// One cycle stall + + MOV x1, x1, LSL #8 + ORR x0, x1, x0, LSR #24 + STR x0, [pDst], #4 ;// Store aligned output row + BGT Copy3toVAligned + +CopyVEnd + + SUB pSrc, pDst, #28 + MOV srcStep, #4 + + M_END + + + ENDIF + + END + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_Copy_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_Copy_unsafe_s.s new file mode 100644 index 0000000000000000000000000000000000000000..bc0b6ecb6568b07ba13fb9b6b22947c001aa6368 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_Copy_unsafe_s.s @@ -0,0 +1,149 @@ +;// +;// +;// File Name: armVCM4P10_InterpolateLuma_Copy_unsafe_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + +;// Function: +;// armVCM4P10_InterpolateLuma_Copy4x4_unsafe +;// +;// Implements copy from an arbitrary aligned source memory location (pSrc) to an aligned +;// destination pointed by (pDst) +;// +;// Registers preserved for top level function +;// r1,r3,r4,r5,r6,r7,r10,r11,r14 +;// +;// Registers modified by the function +;// r0,r2,r8,r9,r12 + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS ARM1136JS + + EXPORT armVCM4P10_InterpolateLuma_Copy4x4_unsafe + +;// Declare input registers +pSrc RN 0 +srcStep RN 1 +pDst RN 2 +dstStep RN 3 + +;// Declare other intermediate registers +x0 RN 4 +x1 RN 5 +x2 RN 8 +x3 RN 9 +Temp RN 12 + + IF ARM1136JS + + M_START armVCM4P10_InterpolateLuma_Copy4x4_unsafe, r6 + +Copy4x4Start + ;// Do Copy and branch to EndOfInterpolation + AND Temp, pSrc, #3 + BIC pSrc, pSrc, #3 + + M_SWITCH Temp + M_CASE Copy4x4Align0 + M_CASE Copy4x4Align1 + M_CASE Copy4x4Align2 + M_CASE Copy4x4Align3 + M_ENDSWITCH + +Copy4x4Align0 + M_LDR x0, [pSrc], srcStep + M_LDR x1, [pSrc], srcStep + M_STR x0, [pDst], dstStep + M_LDR x2, [pSrc], srcStep + M_STR x1, [pDst], dstStep + M_LDR x3, [pSrc], srcStep + M_STR x2, [pDst], dstStep + M_STR x3, [pDst], dstStep + B Copy4x4End + +Copy4x4Align1 + LDR x1, [pSrc, #4] + M_LDR x0, [pSrc], srcStep + LDR x3, [pSrc, #4] + M_LDR x2, [pSrc], srcStep + MOV x0, x0, LSR #8 + ORR x0, x0, x1, LSL #24 + M_STR x0, [pDst], dstStep + MOV x2, x2, LSR #8 + ORR x2, x2, x3, LSL #24 + LDR x1, [pSrc, #4] + M_LDR x0, [pSrc], srcStep + M_STR x2, [pDst], dstStep + LDR x3, [pSrc, #4] + M_LDR x2, [pSrc], srcStep + MOV x0, x0, LSR #8 + ORR x0, x0, x1, LSL #24 + M_STR x0, [pDst], dstStep + MOV x2, x2, LSR #8 + ORR x2, x2, x3, LSL #24 + M_STR x2, [pDst], dstStep + B Copy4x4End + +Copy4x4Align2 + LDR x1, [pSrc, #4] + M_LDR x0, [pSrc], srcStep + LDR x3, [pSrc, #4] + M_LDR x2, [pSrc], srcStep + MOV x0, x0, LSR #16 + ORR x0, x0, x1, LSL #16 + M_STR x0, [pDst], dstStep + MOV x2, x2, LSR #16 + ORR x2, x2, x3, LSL #16 + M_STR x2, [pDst], dstStep + + LDR x1, [pSrc, #4] + M_LDR x0, [pSrc], srcStep + LDR x3, [pSrc, #4] + M_LDR x2, [pSrc], srcStep + MOV x0, x0, LSR #16 + ORR x0, x0, x1, LSL #16 + M_STR x0, [pDst], dstStep + MOV x2, x2, LSR #16 + ORR x2, x2, x3, LSL #16 + M_STR x2, [pDst], dstStep + B Copy4x4End + +Copy4x4Align3 + LDR x1, [pSrc, #4] + M_LDR x0, [pSrc], srcStep + LDR x3, [pSrc, #4] + M_LDR x2, [pSrc], srcStep + MOV x0, x0, LSR #24 + ORR x0, x0, x1, LSL #8 + M_STR x0, [pDst], dstStep + MOV x2, x2, LSR #24 + ORR x2, x2, x3, LSL #8 + M_STR x2, [pDst], dstStep + + LDR x1, [pSrc, #4] + M_LDR x0, [pSrc], srcStep + LDR x3, [pSrc, #4] + M_LDR x2, [pSrc], srcStep + MOV x0, x0, LSR #24 + ORR x0, x0, x1, LSL #8 + M_STR x0, [pDst], dstStep + MOV x2, x2, LSR #24 + ORR x2, x2, x3, LSL #8 + M_STR x2, [pDst], dstStep + B Copy4x4End + +Copy4x4End + M_END + + ENDIF + + END + \ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s new file mode 100644 index 0000000000000000000000000000000000000000..66cfe5ef9266ad6d8d41885bf5c783233859033f --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s @@ -0,0 +1,178 @@ +;// +;// +;// File Name: armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS ARM1136JS + + EXPORT armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe + EXPORT armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe + +;// Functions: +;// armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe and +;// armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe +;// +;// Implements re-arrangement of data from temporary buffer to a buffer pointed by pBuf. +;// This will do the convertion of data from 16 bit to 8 bit and it also +;// remove offset and check for saturation. +;// +;// Registers used as input for this function +;// r0,r1,r7 where r0 is input pointer and r2 its step size, r7 is output pointer +;// +;// Registers preserved for top level function +;// r4,r5,r6,r8,r9,r14 +;// +;// Registers modified by the function +;// r7,r10,r11,r12 +;// +;// Output registers +;// r0 - pointer to the destination location +;// r1 - step size to this destination location + + +DEBUG_ON SETL {FALSE} + +MASK EQU 0x80808080 ;// Mask is used to implement (a+b+1)/2 + +;// Declare input registers + +pSrc0 RN 0 +srcStep0 RN 1 + +;// Declare other intermediate registers +Temp1 RN 4 +Temp2 RN 5 +Temp3 RN 10 +Temp4 RN 11 +pBuf RN 7 +r0x0fe00fe0 RN 6 +r0x00ff00ff RN 12 +Count RN 14 +ValueA0 RN 10 +ValueA1 RN 11 + + IF ARM1136JS + + + ;// Function header + M_START armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe, r6 + + ;// Code start + MOV Count, #4 + LDR r0x0fe00fe0, =0x0fe00fe0 + LDR r0x00ff00ff, =0x00ff00ff +LoopStart1 + LDR Temp4, [pSrc0, #12] + LDR Temp3, [pSrc0, #8] + LDR Temp2, [pSrc0, #4] + M_LDR Temp1, [pSrc0], srcStep0 + UQSUB16 Temp4, Temp4, r0x0fe00fe0 + UQSUB16 Temp3, Temp3, r0x0fe00fe0 + UQSUB16 Temp2, Temp2, r0x0fe00fe0 + UQSUB16 Temp1, Temp1, r0x0fe00fe0 + USAT16 Temp4, #13, Temp4 + USAT16 Temp3, #13, Temp3 + USAT16 Temp2, #13, Temp2 + USAT16 Temp1, #13, Temp1 + AND Temp4, r0x00ff00ff, Temp4, LSR #5 + AND Temp3, r0x00ff00ff, Temp3, LSR #5 + AND Temp2, r0x00ff00ff, Temp2, LSR #5 + AND Temp1, r0x00ff00ff, Temp1, LSR #5 + ORR ValueA1, Temp3, Temp4, LSL #8 + ORR ValueA0, Temp1, Temp2, LSL #8 + SUBS Count, Count, #1 + STRD ValueA0, [pBuf], #8 + BGT LoopStart1 +End1 + SUB pSrc0, pBuf, #32 + MOV srcStep0, #8 + + M_END + + + ;// Function header + M_START armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe, r6 + + ;// Code start + LDR r0x0fe00fe0, =0x0fe00fe0 + LDR r0x00ff00ff, =0x00ff00ff + MOV Count, #2 + +LoopStart + LDR Temp4, [pSrc0, #12] + LDR Temp3, [pSrc0, #8] + LDR Temp2, [pSrc0, #4] + M_LDR Temp1, [pSrc0], srcStep0 + + UQSUB16 Temp4, Temp4, r0x0fe00fe0 + UQSUB16 Temp3, Temp3, r0x0fe00fe0 + UQSUB16 Temp2, Temp2, r0x0fe00fe0 + UQSUB16 Temp1, Temp1, r0x0fe00fe0 + + USAT16 Temp4, #13, Temp4 + USAT16 Temp3, #13, Temp3 + USAT16 Temp2, #13, Temp2 + USAT16 Temp1, #13, Temp1 + + AND Temp4, r0x00ff00ff, Temp4, LSR #5 + AND Temp3, r0x00ff00ff, Temp3, LSR #5 + AND Temp2, r0x00ff00ff, Temp2, LSR #5 + AND Temp1, r0x00ff00ff, Temp1, LSR #5 + ORR ValueA1, Temp3, Temp4, LSL #8 ;// [d2 c2 d0 c0] + ORR ValueA0, Temp1, Temp2, LSL #8 ;// [b2 a2 b0 a0] + + PKHBT Temp1, ValueA0, ValueA1, LSL #16 ;// [d0 c0 b0 a0] + + STR Temp1, [pBuf], #8 + PKHTB Temp2, ValueA1, ValueA0, ASR #16 ;// [d2 c2 b2 a2] + STR Temp2, [pBuf], #-4 + + LDR Temp4, [pSrc0, #12] + LDR Temp3, [pSrc0, #8] + LDR Temp2, [pSrc0, #4] + M_LDR Temp1, [pSrc0], srcStep0 + + UQSUB16 Temp4, Temp4, r0x0fe00fe0 + UQSUB16 Temp3, Temp3, r0x0fe00fe0 + UQSUB16 Temp2, Temp2, r0x0fe00fe0 + UQSUB16 Temp1, Temp1, r0x0fe00fe0 + + USAT16 Temp4, #13, Temp4 + USAT16 Temp3, #13, Temp3 + USAT16 Temp2, #13, Temp2 + USAT16 Temp1, #13, Temp1 + + AND Temp4, r0x00ff00ff, Temp4, LSR #5 + AND Temp3, r0x00ff00ff, Temp3, LSR #5 + AND Temp2, r0x00ff00ff, Temp2, LSR #5 + AND Temp1, r0x00ff00ff, Temp1, LSR #5 + ORR ValueA1, Temp3, Temp4, LSL #8 ;// [d2 c2 d0 c0] + ORR ValueA0, Temp1, Temp2, LSL #8 ;// [b2 a2 b0 a0] + + PKHBT Temp1, ValueA0, ValueA1, LSL #16 ;// [d0 c0 b0 a0] + SUBS Count, Count, #1 + STR Temp1, [pBuf], #8 + PKHTB Temp2, ValueA1, ValueA0, ASR #16 ;// [d2 c2 b2 a2] + STR Temp2, [pBuf], #4 + + BGT LoopStart +End2 + SUB pSrc0, pBuf, #32-8 + MOV srcStep0, #4 + + M_END + + ENDIF + + END + \ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s new file mode 100644 index 0000000000000000000000000000000000000000..851ff6abbdc020ff42f84ccdf10b1048270d9c92 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s @@ -0,0 +1,296 @@ +;// +;// +;// File Name: armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + EXPORT armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe + + M_VARIANTS ARM1136JS + + + + IF ARM1136JS + + + M_ALLOC8 ppDstArgs, 8 + M_ALLOC8 pTempResult1, 8 + M_ALLOC8 pTempResult2, 8 + M_ALLOC4 ppSrc, 4 + M_ALLOC4 ppDst, 4 + M_ALLOC4 pDstStep, 4 + M_ALLOC4 pSrcStep, 4 + M_ALLOC4 pCounter, 4 + + ;// Function header + ;// Function: + ;// armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe + ;// + ;// Implements diagonal interpolation for a block of size 4x4. Input and output should + ;// be aligned. + ;// + ;// Registers used as input for this function + ;// r0,r1,r2,r3, r8 where r0,r2 input pointer and r1,r3 step size, r8 intermediate-buf pointer + ;// + ;// Registers preserved for top level function + ;// r0,r1,r2,r3,r4,r5,r6,r14 + ;// + ;// Registers modified by the function + ;// r7,r8,r9,r10,r11,r12 + ;// + ;// Output registers + ;// None. Function will preserve r0-r3 + + M_START armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe, r6 + +;// Declare input registers +pSrc RN 0 +srcStep RN 1 +pDst RN 2 +dstStep RN 3 + +;// Declare inner loop registers +Acc0 RN 4 +Acc1 RN 5 +Acc2 RN 6 +Acc3 RN 7 + +ValA RN 4 +ValB RN 5 +ValC RN 6 +ValD RN 7 +ValE RN 8 +ValF RN 9 +ValG RN 12 +ValH RN 14 +ValI RN 1 + +Temp1 RN 3 +Temp2 RN 1 +Temp3 RN 12 +Temp4 RN 7 +Temp5 RN 5 +r0x0fe00fe0 RN 3 ;// [0 (16*255 - 16) 0 (16*255 - 16)] +r0x00ff00ff RN 10 ;// [0 255 0 255] where 255 is offset +Counter RN 11 +pInterBuf RN 8 + +ValCA RN 8 +ValDB RN 9 +ValGE RN 10 +ValHF RN 11 +r0x00140001 RN 12 +r0x0014fffb RN 14 + +r0x0001fc00 RN 11 + +Accx RN 8 +Accy RN 9 +Temp6 RN 14 + + M_STRD pDst, dstStep, ppDstArgs + + MOV pDst, pInterBuf + MOV dstStep, #16 + + ;// Set up counter of format, [0] [0] [1 (height)] [8 (width)] + MOV Counter, #4 + M_STR dstStep, pDstStep + M_STR srcStep, pSrcStep + LDR r0x00ff00ff, =0x00ff00ff ;// [0 255 0 255] 255 is offset to avoid negative results + +HeightLoop +NextTwoRowsLoop + LDR ValD, [pSrc, srcStep] ;// Load row 1 [d1 c1 b1 a1] + LDR ValA, [pSrc], #4 ;// Load row 0 [d0 c0 b0 a0] + LDR ValH, [pSrc, srcStep] ;// Load [h1 g1 f1 e1] + LDR ValE, [pSrc], #4 ;// Load [h0 g0 f0 e0] + LDRB Temp2, [pSrc, srcStep] ;// Load row 1 [l1 k1 j1 i1] + LDRB Temp1, [pSrc], #-8 ;// Load row 0 [l0 k0 j0 i0] + + PKHBT ValB, ValA, ValD, LSL #16 ;// [b1 a1 b0 a0] + PKHTB ValD, ValD, ValA, ASR #16 ;// [d1 c1 d0 c0] + UXTAB16 ValA, r0x00ff00ff, ValB ;// [00 a1 00 a0] + [0 255 0 255] + UXTAB16 ValC, r0x00ff00ff, ValD ;// [00 c1 00 c0] + [0 255 0 255] + PKHBT ValI, Temp1, Temp2, LSL #16 ;// [00 i1 00 i0] + PKHBT ValF, ValE, ValH, LSL #16 ;// [f1 e1 f0 e0] + PKHTB ValH, ValH, ValE, ASR #16 ;// [h1 g1 h0 g0] + UXTAB16 ValE, r0x00ff00ff, ValF ;// [00 e1 00 e0] + [0 255 0 255] + + ;// Calculate Acc0 + ;// Acc0 = a - 5*b + 20*c + 20*d - 5*e + f + UXTAB16 Temp1, ValC, ValD, ROR #8 + UXTAB16 Temp3, ValE, ValB, ROR #8 + RSB Temp1, Temp3, Temp1, LSL #2 + UXTAB16 Acc0, ValA, ValF, ROR #8 + ADD Temp1, Temp1, Temp1, LSL #2 + ADD Acc0, Acc0, Temp1 + + ;// Calculate Acc1 + ;// Acc1 = b - 5*c + 20*d + 20*e - 5*f + g + UXTAB16 Temp1, ValE, ValD, ROR #8 + UXTAB16 Temp3, ValC, ValF, ROR #8 + RSB Temp1, Temp3, Temp1, LSL #2 + UXTAB16 ValG, r0x00ff00ff, ValH ;// [00 g1 00 g0] + [0 255 0 255] + ADD Temp1, Temp1, Temp1, LSL #2 + UXTAB16 Acc1, ValG, ValB, ROR #8 + ADD Acc1, Acc1, Temp1 + + UXTAB16 Acc2, ValC, ValH, ROR #8 + ADD ValI, r0x00ff00ff, ValI ;// [00 i1 00 i0] + [0 255 0 255] + + ;// Calculate Acc2 + ;// Acc2 = c - 5*d + 20*e + 20*f - 5*g + h + UXTAB16 Temp1, ValG, ValD, ROR #8 + UXTAB16 Acc3, ValI, ValD, ROR #8 + UXTAB16 Temp2, ValE, ValF, ROR #8 + + RSB Temp1, Temp1, Temp2, LSL #2 + UXTAB16 Temp2, ValG, ValF, ROR #8 + ADD Temp1, Temp1, Temp1, LSL #2 + ADD Acc2, Acc2, Temp1 + + ;// Calculate Acc3 + ;// Acc3 = d - 5*e + 20*f + 20*g - 5*h + i + UXTAB16 Temp1, ValE, ValH, ROR #8 + RSB Temp1, Temp1, Temp2, LSL #2 + ADD Temp1, Temp1, Temp1, LSL #2 + ADD Acc3, Acc3, Temp1 + + M_LDR dstStep, pDstStep + M_LDR srcStep, pSrcStep + + ;// If Counter is even store Acc0-Acc3 in a temporary buffer + ;// If Counter is off store Acc0-Acc3 and previous Acc0-Acc3 in a intermediate buf + ANDS Temp3, Counter, #1 + BEQ NoProcessing + + ;// Packing previous and current Acc0-Acc3 values + M_LDRD Accx, Accy, pTempResult1 + PKHBT Temp6, Accx, Acc0, LSL #16 ;//[0 a2 0 a0] = [0 a3 0 a2] [0 a1 0 a0] + PKHTB Acc0, Acc0, Accx, ASR #16 ;//[0 a3 0 a1] = [0 a1 0 a0] [0 a3 0 a2] + STR Acc0, [pDst, dstStep] + STR Temp6, [pDst], #4 + PKHBT Temp6, Accy, Acc1, LSL #16 ;//[0 b2 0 b0] = [0 b3 0 b2] [0 b1 0 b0] + PKHTB Acc1, Acc1, Accy, ASR #16 ;//[0 b3 0 b1] = [0 b1 0 b0] [0 b3 0 b2] + M_LDRD Accx, Accy, pTempResult2 + STR Acc1, [pDst, dstStep] + STR Temp6, [pDst], #4 + + PKHBT Temp6, Accx, Acc2, LSL #16 ;//[0 c2 0 c0] = [0 c3 0 c2] [0 c1 0 c0] + PKHTB Acc2, Acc2, Accx, ASR #16 ;//[0 c3 0 c1] = [0 c1 0 c0] [0 c3 0 c2] + STR Acc2, [pDst, dstStep] + STR Temp6, [pDst], #4 + PKHBT Temp6, Accy, Acc3, LSL #16 ;//[0 d2 0 d0] = [0 d3 0 d2] [0 d1 0 d0] + PKHTB Acc3, Acc3, Accy, ASR #16 ;//[0 d3 0 d1] = [0 d1 0 d0] [0 d3 0 d2] + STR Acc3, [pDst, dstStep] + STR Temp6, [pDst], #-12 + ADD pDst, pDst, dstStep, LSL #1 + B AfterStore + +NoProcessing + M_STRD Acc0, Acc1, pTempResult1 + M_STRD Acc2, Acc3, pTempResult2 +AfterStore + SUBS Counter, Counter, #1 ;// Loop till height is 10 + ADD pSrc, pSrc, srcStep, LSL #1 + BPL HeightLoop + + STR Acc0, [pDst], #4 ;//[0 a1 0 a0] + STR Acc1, [pDst], #4 + STR Acc2, [pDst], #4 + STR Acc3, [pDst], #-12 + + ;// + ;// Horizontal interpolation using multiplication + ;// + + SUB pSrc, pDst, dstStep, LSL #2 + MOV srcStep, #16 + M_LDRD pDst, dstStep, ppDstArgs + + MOV Counter, #4 + LDR r0x0014fffb, =0x0014fffb + LDR r0x00140001, =0x00140001 + +HeightLoop1 + M_STR Counter, pCounter + + M_LDR ValCA, [pSrc], srcStep ;// Load [0 c 0 a] + M_LDR ValDB, [pSrc], srcStep ;// Load [0 d 0 b] + M_LDR ValGE, [pSrc], srcStep ;// Load [0 g 0 e] + M_LDR ValHF, [pSrc], srcStep ;// Load [0 h 0 f] + + + ;// Acc0 = smuad ([0 20 0 1], add([0 c 0 a] + [0 d 0 f])) - (5 * (b + e)) + ;// Acc1 = smuad ([0 20 0 1], add([0 e 0 g] + [0 d 0 b])) - (5 * (c + f)) + ;// Acc2 = smuad ([0 1 0 20], add([0 c 0 e] + [0 h 0 f])) - (5 * (d + g)) + ;// Acc3 = smuad ([0 20 0 1], add([0 d 0 f] + [0 i 0 g])) - (5 * (e + h)) + + SMUAD Acc0, ValCA, r0x00140001 ;// Acc0 = [0 c 0 a] * [0 20 0 1] + SMUAD Acc1, ValDB, r0x00140001 ;// Acc1 = [0 c 0 a] * [0 20 0 1] + SMUADX Acc2, ValGE, r0x0014fffb ;// Acc2 = [0 g 0 e] * [0 20 0 -5] + SMUAD Acc3, ValGE, r0x0014fffb ;// Acc3 = [0 g 0 e] * [0 20 0 -5] + + SMLAD Acc0, ValDB, r0x0014fffb, Acc0 ;// Acc0 += [0 d 0 b] * [0 20 0 -5] + SMLADX Acc1, ValGE, r0x00140001, Acc1 ;// Acc1 += [0 g 0 e] * [0 20 0 1] + SMLADX Acc2, ValHF, r0x00140001, Acc2 ;// Acc2 += [0 h 0 f] * [0 20 0 1] + SMLADX Acc3, ValHF, r0x0014fffb, Acc3 ;// Acc3 += [0 h 0 f] * [0 20 0 -5] + + SMLABB Acc0, ValGE, r0x0014fffb, Acc0 ;// Acc0 += [0 g 0 e] * [0 0 0 -5] + SMLATB Acc1, ValCA, r0x0014fffb, Acc1 ;// Acc1 += [0 d 0 b] * [0 0 0 -5] + SMLATB Acc2, ValCA, r0x00140001, Acc2 ;// Acc2 += [0 c 0 a] * [0 0 0 1] + SMLATB Acc3, ValDB, r0x00140001, Acc3 ;// Acc3 += [0 c 0 a] * [0 0 0 1] + + LDRH ValCA, [pSrc], #4 ;// 8 = srcStep - 16 + SMLABB Acc0, ValHF, r0x00140001, Acc0 ;// Acc0 += [0 h 0 f] * [0 0 0 1] + SMLABB Acc1, ValHF, r0x0014fffb, Acc1 ;// Acc1 += [0 h 0 f] * [0 0 0 -5] + SMLATB Acc2, ValDB, r0x0014fffb, Acc2 ;// Acc2 += [0 d 0 b] * [0 0 0 -5] + SMLABB Acc3, ValCA, r0x00140001, Acc3 ;// Acc3 += [0 d 0 b] * [0 0 0 1] + + LDR r0x0001fc00, =0x0001fc00 ;// (0xff * 16 * 32) - 512 + SUB Acc0, Acc0, r0x0001fc00 + SUB Acc1, Acc1, r0x0001fc00 + SUB Acc2, Acc2, r0x0001fc00 + SUB Acc3, Acc3, r0x0001fc00 + + USAT Acc0, #18, Acc0 + USAT Acc1, #18, Acc1 + USAT Acc2, #18, Acc2 + USAT Acc3, #18, Acc3 + + MOV Acc0, Acc0, LSR #10 + M_STRB Acc0, [pDst], dstStep + MOV Acc1, Acc1, LSR #10 + M_STRB Acc1, [pDst], dstStep + MOV Acc2, Acc2, LSR #10 + M_STRB Acc2, [pDst], dstStep + MOV Acc3, Acc3, LSR #10 + M_STRB Acc3, [pDst], dstStep + + + M_LDR Counter, pCounter + SUB pDst, pDst, dstStep, LSL #2 + SUB pSrc, pSrc, srcStep, LSL #2 + ADD pDst, pDst, #1 + SUBS Counter, Counter, #1 + BGT HeightLoop1 +End + SUB pDst, pDst, #4 + SUB pSrc, pSrc, #16 + + M_END + + ENDIF + + END + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s new file mode 100644 index 0000000000000000000000000000000000000000..2f48e13d52b17aca5d6ac41390dc5b0be8c33c82 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s @@ -0,0 +1,276 @@ +;// +;// +;// File Name: armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + EXPORT armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe + + M_VARIANTS ARM1136JS + + + + + + IF ARM1136JS + + M_ALLOC8 ppDstArgs, 8 + M_ALLOC4 ppSrc, 4 + M_ALLOC4 ppDst, 4 + M_ALLOC4 pCounter, 4 + + ;// Function header + ;// Function: + ;// armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe + ;// + ;// Implements diagonal interpolation for a block of size 4x4. Input and output should + ;// be aligned. + ;// + ;// Registers used as input for this function + ;// r0,r1,r2,r3, r8 where r0,r2 input pointer and r1,r3 step size, r8 intermediate-buf pointer + ;// + ;// Registers preserved for top level function + ;// r0,r1,r2,r3,r4,r5,r6,r14 + ;// + ;// Registers modified by the function + ;// r7,r8,r9,r10,r11,r12 + ;// + ;// Output registers + ;// None. Function will preserve r0-r3 + + M_START armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe, r6 + +;// Declare input registers +pSrc RN 0 +srcStep RN 1 +pDst RN 2 +dstStep RN 3 + +;// Declare inner loop registers +ValA RN 5 +ValA0 RN 4 +ValA1 RN 5 +ValAF0 RN 4 +ValAF1 RN 5 + +ValB RN 11 + +ValC RN 5 +ValC0 RN 4 +ValC1 RN 5 +ValCD0 RN 12 +ValCD1 RN 14 +ValCF0 RN 4 +ValCF1 RN 5 + +ValD RN 10 + +ValE RN 7 +ValE0 RN 6 +ValE1 RN 7 +ValEB0 RN 10 +ValEB1 RN 11 +ValED0 RN 6 +ValED1 RN 7 + +ValF RN 10 + +ValG RN 14 +ValG0 RN 12 +ValG1 RN 14 +ValGB0 RN 12 +ValGB1 RN 14 + +Acc0 RN 4 +Acc1 RN 5 +Acc2 RN 6 +Acc3 RN 7 + +Temp RN 7 +Step RN 6 + +pInterBuf RN 8 +Counter RN 8 +r0x00ff00ff RN 9 ;// [0 255 0 255] where 255 is offset +r0x0001fc00 RN 10 ;// [0 (16*255 - 16) 0 (16*255 - 16)] + + +;// Declare inner loop registers +ValCA RN 8 +ValDB RN 9 +ValGE RN 10 +ValHF RN 11 +r0x00140001 RN 12 +r0x0014fffb RN 14 + +r0x00000200 RN 12 +r0x000000ff RN 12 + + M_STRD pDst, dstStep, ppDstArgs + MOV pDst, pInterBuf + MOV dstStep, #24 + + ;// Set up counter of format, [0] [0] [1 (height)] [8 (width)] + MOV Counter, #1 + MOV Temp, #8 + ADD Counter, Temp, Counter, LSL #8 ;// [0 0 H W] + + LDR r0x00ff00ff, =0x00ff00ff ;// [0 255 0 255] 255 is offset to avoid negative results +WidthLoop + M_STR pSrc, ppSrc + M_STR pDst, ppDst +HeightLoop +TwoRowsLoop + M_LDR ValC, [pSrc], srcStep ;// Load [c3 c2 c1 c0] + M_LDR ValD, [pSrc], srcStep ;// Load [d3 d2 d1 d0] + M_LDR ValE, [pSrc], srcStep ;// Load [e3 e2 e1 e0] + SUB pSrc, pSrc, srcStep, LSL #2 + UXTAB16 ValC0, r0x00ff00ff, ValC ;// [0 c2 0 c0] + [0 255 0 255] + UXTAB16 ValC1, r0x00ff00ff, ValC, ROR #8 ;// [0 c3 0 c1] + [0 255 0 255] + LDR ValB, [pSrc] ;// Load [b3 b2 b1 b0] + UXTAB16 ValE0, r0x00ff00ff, ValE ;// [0 e2 0 e0] + [0 255 0 255] + UXTAB16 ValE1, r0x00ff00ff, ValE, ROR #8 ;// [0 e3 0 e1] + [0 255 0 255] + UXTAB16 ValCD0, ValC0, ValD ;// [0 c2 0 c0] + [0 255 0 255] + [0 d2 0 d0] + UXTAB16 ValCD1, ValC1, ValD, ROR #8 ;// [0 c3 0 c1] + [0 255 0 255] + [0 d3 0 d1] + UXTAB16 ValEB0, ValE0, ValB ;// [0 e2 0 e0] + [0 255 0 255] + [0 b2 0 b0] + RSB ValCD0, ValEB0, ValCD0, LSL #2 ;// 4*(Off+C+D) - (Off+B+E) + + LDR ValD, [pSrc, srcStep, LSL #1] ;// Load [d3 d2 d1 d0] + UXTAB16 ValEB1, ValE1, ValB, ROR #8 ;// [0 e3 0 e1] + [0 255 0 255] + [0 b3 0 b1] + RSB ValCD1, ValEB1, ValCD1, LSL #2 + + UXTAB16 ValED0, ValE0, ValD ;// [0 e2 0 e0] + [0 255 0 255] + [0 d2 0 d0] + UXTAB16 ValED1, ValE1, ValD, ROR #8 ;// [0 e3 0 e1] + [0 255 0 255] + [0 d3 0 d1] + LDR ValF, [pSrc, srcStep, LSL #2] ;// Load [f3 f2 f1 f0] + M_LDR ValB, [pSrc], srcStep ;// Load [b3 b2 b1 b0] + ADD ValCD0, ValCD0, ValCD0, LSL #2 ;// 5 * [4*(Off+C+D) - (Off+B+E)] + ADD ValCD1, ValCD1, ValCD1, LSL #2 + UXTAB16 ValCF1, ValC1, ValF, ROR #8 ;// [0 c3 0 c1] + [0 255 0 255] + [0 f3 0 f1] + UXTAB16 ValCF0, ValC0, ValF ;// [0 c2 0 c0] + [0 255 0 255] + [0 f2 0 f0] + RSB ValED1, ValCF1, ValED1, LSL #2 + + SUB ValA, pSrc, srcStep, LSL #1 + LDR ValA, [ValA] ;// Load [a3 a2 a1 a0] + RSB ValED0, ValCF0, ValED0, LSL #2 ;// 4*(Off+E+D) - (Off+C+F) + ADD ValED1, ValED1, ValED1, LSL #2 + ADD ValED0, ValED0, ValED0, LSL #2 ;// 5 * [4*(Off+E+D) - (Off+C+F)] + UXTAB16 ValA0, r0x00ff00ff, ValA ;// [0 a2 0 a0] + [0 255 0 255] + UXTAB16 ValA1, r0x00ff00ff, ValA, ROR #8 ;// [0 a3 0 a1] + [0 255 0 255] + UXTAB16 ValAF0, ValA0, ValF ;// [0 a2 0 a0] + [0 255 0 255] + [0 f2 0 f0] + UXTAB16 ValAF1, ValA1, ValF, ROR #8 ;// [0 a3 0 a1] + [0 255 0 255] + [0 f3 0 f1] + ADD Acc1, ValCD1, ValAF1 + + LDR ValG, [pSrc, srcStep, LSL #2] ;// Load [g3 g2 g1 g0] + ADD Acc0, ValCD0, ValAF0 ;// Acc0 = 16*Off + (A+F) + 20*(C+D) - 5*(B+E) + STR Acc1, [pDst, #4] ;// Store result & adjust pointer + M_STR Acc0, [pDst], dstStep ;// Store result & adjust pointer + UXTAB16 ValG0, r0x00ff00ff, ValG ;// [0 g2 0 g0] + [0 255 0 255] + UXTAB16 ValG1, r0x00ff00ff, ValG, ROR #8 ;// [0 g3 0 g1] + [0 255 0 255] + UXTAB16 ValGB0, ValG0, ValB ;// [0 g2 0 g0] + [0 255 0 255] + [0 b2 0 b0] + UXTAB16 ValGB1, ValG1, ValB, ROR #8 ;// [0 g3 0 g1] + [0 255 0 255] + [0 b3 0 b1] + ADD Acc2, ValED0, ValGB0 ;// Acc2 = 16*Off + (B+G) + 20*(D+E) - 5*(C+F) + ADD Acc3, ValED1, ValGB1 + + STR Acc3, [pDst, #4] ;// Store result & adjust pointer + M_STR Acc2, [pDst], dstStep ;// Store result & adjust pointer + + SUBS Counter, Counter, #1 << 8 ;// Loop till height is 10 + ADD pSrc, pSrc, srcStep, LSL #1 + BPL HeightLoop + + M_LDR pSrc, ppSrc + M_LDR pDst, ppDst + ADDS Counter, Counter, #(1 << 8)-4 ;// Loop till width is 12 + ADD pSrc, pSrc, #4 + ADD pDst, pDst, #8 + ADD Counter, Counter, #1<<8 + BPL WidthLoop + + ;// + ;// Horizontal interpolation using multiplication + ;// + + SUB pSrc, pDst, #24 + MOV srcStep, #24 + M_LDRD pDst, dstStep, ppDstArgs + + MOV Counter, #4 + LDR r0x0014fffb, =0x0014fffb + LDR r0x00140001, =0x00140001 + +HeightLoop1 + M_STR Counter, pCounter + + + LDR ValCA, [pSrc], #4 ;// Load [0 c 0 a] + LDR ValDB, [pSrc], #4 ;// Load [0 d 0 b] + LDR ValGE, [pSrc], #4 ;// Load [0 g 0 e] + LDR ValHF, [pSrc], #4 ;// Load [0 h 0 f] + + ;// Acc0 = smuad ([0 20 0 1], add([0 c 0 a] + [0 d 0 f])) - (5 * (b + e)) + ;// Acc1 = smuad ([0 20 0 1], add([0 e 0 g] + [0 d 0 b])) - (5 * (c + f)) + ;// Acc2 = smuad ([0 1 0 20], add([0 c 0 e] + [0 h 0 f])) - (5 * (d + g)) + ;// Acc3 = smuad ([0 20 0 1], add([0 d 0 f] + [0 i 0 g])) - (5 * (e + h)) + SMUAD Acc0, ValCA, r0x00140001 ;// Acc0 = [0 c 0 a] * [0 20 0 1] + SMUAD Acc1, ValDB, r0x00140001 ;// Acc1 = [0 c 0 a] * [0 20 0 1] + SMUADX Acc2, ValGE, r0x0014fffb ;// Acc2 = [0 g 0 e] * [0 20 0 -5] + SMUAD Acc3, ValGE, r0x0014fffb ;// Acc3 = [0 g 0 e] * [0 20 0 -5] + + SMLAD Acc0, ValDB, r0x0014fffb, Acc0 ;// Acc0 += [0 d 0 b] * [0 20 0 -5] + SMLADX Acc1, ValGE, r0x00140001, Acc1 ;// Acc1 += [0 g 0 e] * [0 20 0 1] + SMLADX Acc2, ValHF, r0x00140001, Acc2 ;// Acc2 += [0 h 0 f] * [0 20 0 1] + SMLADX Acc3, ValHF, r0x0014fffb, Acc3 ;// Acc3 += [0 h 0 f] * [0 20 0 -5] + + SMLABB Acc0, ValGE, r0x0014fffb, Acc0 ;// Acc0 += [0 g 0 e] * [0 0 0 -5] + SMLATB Acc1, ValCA, r0x0014fffb, Acc1 ;// Acc1 += [0 d 0 b] * [0 0 0 -5] + SMLATB Acc2, ValCA, r0x00140001, Acc2 ;// Acc2 += [0 c 0 a] * [0 0 0 1] + SMLATB Acc3, ValDB, r0x00140001, Acc3 ;// Acc3 += [0 c 0 a] * [0 0 0 1] + + LDRH ValCA, [pSrc], #8 ;// 8 = srcStep - 16 + SMLABB Acc0, ValHF, r0x00140001, Acc0 ;// Acc0 += [0 h 0 f] * [0 0 0 1] + SMLABB Acc1, ValHF, r0x0014fffb, Acc1 ;// Acc1 += [0 h 0 f] * [0 0 0 -5] + SMLATB Acc2, ValDB, r0x0014fffb, Acc2 ;// Acc2 += [0 d 0 b] * [0 0 0 -5] + SMLABB Acc3, ValCA, r0x00140001, Acc3 ;// Acc3 += [0 d 0 b] * [0 0 0 1] + + LDR r0x0001fc00, =0x0001fc00 ;// (0xff * 16 * 32) - 512 + SUB Acc0, Acc0, r0x0001fc00 + SUB Acc1, Acc1, r0x0001fc00 + SUB Acc2, Acc2, r0x0001fc00 + SUB Acc3, Acc3, r0x0001fc00 + + USAT Acc0, #18, Acc0 + USAT Acc1, #18, Acc1 + USAT Acc2, #18, Acc2 + USAT Acc3, #18, Acc3 + + MOV Acc0, Acc0, LSR #10 + MOV Acc1, Acc1, LSR #10 + MOV Acc2, Acc2, LSR #10 + MOV Acc3, Acc3, LSR #10 + + M_LDR Counter, pCounter + ORR Acc0, Acc0, Acc1, LSL #8 + ORR Acc2, Acc2, Acc3, LSL #8 + SUBS Counter, Counter, #1 + ORR Acc0, Acc0, Acc2, LSL #16 + M_STR Acc0, [pDst], dstStep + BGT HeightLoop1 +End + SUB pDst, pDst, dstStep, LSL #2 + SUB pSrc, pSrc, srcStep, LSL #2 + + M_END + + ENDIF + + END + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s new file mode 100644 index 0000000000000000000000000000000000000000..6690ced7ba58df21935ea3912c818a19ce06e673 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s @@ -0,0 +1,239 @@ +;// +;// +;// File Name: armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS ARM1136JS + + EXPORT armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe + +DEBUG_ON SETL {FALSE} + + + IF ARM1136JS + +;// Function: +;// armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe +;// +;// Implements horizontal interpolation for a block of size 4x4. Input and output should +;// be aligned. +;// +;// Registers used as input for this function +;// r0,r1,r2,r3 where r0,r2 input pointer and r1,r3 corresponding step size +;// +;// Registers preserved for top level function +;// r0,r1,r2,r3,r4,r5,r6,r14 +;// +;// Registers modified by the function +;// r7,r8,r9,r10,r11,r12 +;// +;// Output registers +;// None. Function will preserve r0-r3 + + +;// Declare input registers +pSrc RN 0 +srcStep RN 1 +pDst RN 2 +dstStep RN 3 + +;// Declare inner loop registers +Acc0 RN 4 +Acc1 RN 5 +Acc2 RN 6 +Acc3 RN 7 + +ValA RN 4 +ValB RN 5 +ValC RN 6 +ValD RN 7 +ValE RN 8 +ValF RN 9 +ValG RN 12 +ValH RN 14 +ValI RN 1 + +Temp1 RN 3 +Temp2 RN 1 +Temp3 RN 12 +Temp4 RN 7 +Temp5 RN 5 +r0x0fe00fe0 RN 3 ;// [0 (16*255 - 16) 0 (16*255 - 16)] +r0x00ff00ff RN 10 ;// [0 255 0 255] where 255 is offset +Counter RN 11 + +Height RN 3 + + M_ALLOC4 pDstStep, 4 + M_ALLOC4 pSrcStep, 4 + + ;// Function header + M_START armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe, r6 + + MOV Counter, #2 + M_STR dstStep, pDstStep + M_STR srcStep, pSrcStep + LDR r0x00ff00ff, =0x00ff00ff ;// [0 255 0 255] 255 is offset to avoid negative results + +NextTwoRowsLoop + LDR ValD, [pSrc, srcStep] ;// Load row 1 [d1 c1 b1 a1] + LDR ValA, [pSrc], #4 ;// Load row 0 [d0 c0 b0 a0] + LDR ValH, [pSrc, srcStep] ;// Load [h1 g1 f1 e1] + LDR ValE, [pSrc], #4 ;// Load [h0 g0 f0 e0] + LDRB Temp2, [pSrc, srcStep] ;// Load row 1 [l1 k1 j1 i1] + LDRB Temp1, [pSrc], #-8 ;// Load row 0 [l0 k0 j0 i0] + + PKHBT ValB, ValA, ValD, LSL #16 ;// [b1 a1 b0 a0] + PKHTB ValD, ValD, ValA, ASR #16 ;// [d1 c1 d0 c0] + UXTAB16 ValA, r0x00ff00ff, ValB ;// [00 a1 00 a0] + [0 255 0 255] + UXTAB16 ValC, r0x00ff00ff, ValD ;// [00 c1 00 c0] + [0 255 0 255] + PKHBT ValI, Temp1, Temp2, LSL #16 ;// [00 i1 00 i0] + PKHBT ValF, ValE, ValH, LSL #16 ;// [f1 e1 f0 e0] + PKHTB ValH, ValH, ValE, ASR #16 ;// [h1 g1 h0 g0] + UXTAB16 ValE, r0x00ff00ff, ValF ;// [00 e1 00 e0] + [0 255 0 255] + + ;// Calculate Acc0 + ;// Acc0 = a - 5*b + 20*c + 20*d - 5*e + f + UXTAB16 Temp1, ValC, ValD, ROR #8 + UXTAB16 Temp3, ValE, ValB, ROR #8 + RSB Temp1, Temp3, Temp1, LSL #2 + UXTAB16 Acc0, ValA, ValF, ROR #8 + ADD Temp1, Temp1, Temp1, LSL #2 + ADD Acc0, Acc0, Temp1 + + ;// Calculate Acc1 + ;// Acc1 = b - 5*c + 20*d + 20*e - 5*f + g + UXTAB16 Temp1, ValE, ValD, ROR #8 + UXTAB16 Temp3, ValC, ValF, ROR #8 + RSB Temp1, Temp3, Temp1, LSL #2 + UXTAB16 ValG, r0x00ff00ff, ValH ;// [00 g1 00 g0] + [0 255 0 255] + ADD Temp1, Temp1, Temp1, LSL #2 + UXTAB16 Acc1, ValG, ValB, ROR #8 + ADD Acc1, Acc1, Temp1 + + LDR r0x0fe00fe0, =0x0fe00fe0 ;// 0x0fe00fe0 = (16 * Offset) - 16 where Offset is 255 + UXTAB16 Acc2, ValC, ValH, ROR #8 + ADD ValI, r0x00ff00ff, ValI ;// [00 i1 00 i0] + [0 255 0 255] + UQSUB16 Acc0, Acc0, r0x0fe00fe0 + UQSUB16 Acc1, Acc1, r0x0fe00fe0 + USAT16 Acc0, #13, Acc0 + USAT16 Acc1, #13, Acc1 + + ;// Calculate Acc2 + ;// Acc2 = c - 5*d + 20*e + 20*f - 5*g + h + UXTAB16 Temp1, ValG, ValD, ROR #8 + UXTAB16 Acc3, ValI, ValD, ROR #8 + UXTAB16 Temp2, ValE, ValF, ROR #8 + AND Acc1, r0x00ff00ff, Acc1, LSR #5 + AND Acc0, r0x00ff00ff, Acc0, LSR #5 + ORR Acc0, Acc0, Acc1, LSL #8 + RSB Temp5, Temp1, Temp2, LSL #2 + UXTAB16 Temp2, ValG, ValF, ROR #8 + ADD Temp5, Temp5, Temp5, LSL #2 + ADD Acc2, Acc2, Temp5 + + ;// Calculate Acc3 + ;// Acc3 = d - 5*e + 20*f + 20*g - 5*h + i + UXTAB16 Temp5, ValE, ValH, ROR #8 + RSB Temp5, Temp5, Temp2, LSL #2 + LDR r0x0fe00fe0, =0x0fe00fe0 + ADD Temp5, Temp5, Temp5, LSL #2 + ADD Acc3, Acc3, Temp5 + + UQSUB16 Acc3, Acc3, r0x0fe00fe0 + UQSUB16 Acc2, Acc2, r0x0fe00fe0 + USAT16 Acc3, #13, Acc3 + USAT16 Acc2, #13, Acc2 + + M_LDR dstStep, pDstStep + AND Acc3, r0x00ff00ff, Acc3, LSR #5 + AND Acc2, r0x00ff00ff, Acc2, LSR #5 + ORR Acc2, Acc2, Acc3, LSL #8 + + SUBS Counter, Counter, #1 + M_LDR srcStep, pSrcStep + PKHBT Acc1, Acc0, Acc2, LSL #16 + M_STR Acc1, [pDst], dstStep ;// Store result1 + PKHTB Acc2, Acc2, Acc0, ASR #16 + M_STR Acc2, [pDst], dstStep ;// Store result2 + ADD pSrc, pSrc, srcStep, LSL #1 + + BGT NextTwoRowsLoop +End + SUB pDst, pDst, dstStep, LSL #2 + SUB pSrc, pSrc, srcStep, LSL #2 + + M_END + + ENDIF + + END + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s new file mode 100644 index 0000000000000000000000000000000000000000..007cd0de8b60938eb531e4e8f76a6863fae126ce --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s @@ -0,0 +1,185 @@ +;// +;// +;// File Name: armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS ARM1136JS + + EXPORT armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe + + + + IF ARM1136JS + + ;// Function header + + ;// Function: + ;// armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe + ;// + ;// Implements vertical interpolation for a block of size 4x4. Input and output should + ;// be aligned. + ;// + ;// Registers used as input for this function + ;// r0,r1,r2,r3 where r0,r2 input pointer and r1,r3 corresponding step size + ;// + ;// Registers preserved for top level function + ;// r0,r1,r2,r3,r4,r5,r6,r14 + ;// + ;// Registers modified by the function + ;// r7,r8,r9,r10,r11,r12 + ;// + ;// Output registers + ;// None. Function will preserve r0-r3 + M_START armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe, r6 + +;// Declare input registers +pSrc RN 0 +srcStep RN 1 +pDst RN 2 +dstStep RN 3 + +;// Declare inner loop registers +ValA RN 5 +ValA0 RN 4 +ValA1 RN 5 +ValAF0 RN 4 +ValAF1 RN 5 + +ValB RN 11 + +ValC RN 5 +ValC0 RN 4 +ValC1 RN 5 +ValCD0 RN 12 +ValCD1 RN 14 +ValCF0 RN 4 +ValCF1 RN 5 + +ValD RN 10 + +ValE RN 7 +ValE0 RN 6 +ValE1 RN 7 +ValEB0 RN 10 +ValEB1 RN 11 +ValED0 RN 6 +ValED1 RN 7 + +ValF RN 10 + +ValG RN 14 +ValG0 RN 12 +ValG1 RN 14 +ValGB0 RN 12 +ValGB1 RN 14 + +Acc0 RN 4 +Acc1 RN 5 +Acc2 RN 6 +Acc3 RN 7 + +Temp RN 7 +Height RN 3 +Step RN 6 + +Counter RN 8 +r0x00ff00ff RN 9 ;// [0 255 0 255] where 255 is offset +r0x0fe00fe0 RN 10 ;// [0 (16*255 - 16) 0 (16*255 - 16)] + + + LDR r0x00ff00ff, =0x00ff00ff ;// [0 255 0 255] 255 is offset to avoid negative results + MOV Counter, #2 + +TwoRowsLoop + M_LDR ValC, [pSrc], srcStep ;// Load [c3 c2 c1 c0] + M_LDR ValD, [pSrc], srcStep ;// Load [d3 d2 d1 d0] + M_LDR ValE, [pSrc], srcStep ;// Load [e3 e2 e1 e0] + SUB pSrc, pSrc, srcStep, LSL #2 + LDR ValB, [pSrc] ;// Load [b3 b2 b1 b0] + UXTAB16 ValC0, r0x00ff00ff, ValC ;// [0 c2 0 c0] + [0 255 0 255] + UXTAB16 ValC1, r0x00ff00ff, ValC, ROR #8 ;// [0 c3 0 c1] + [0 255 0 255] + + UXTAB16 ValE0, r0x00ff00ff, ValE ;// [0 e2 0 e0] + [0 255 0 255] + UXTAB16 ValE1, r0x00ff00ff, ValE, ROR #8 ;// [0 e3 0 e1] + [0 255 0 255] + UXTAB16 ValCD0, ValC0, ValD ;// [0 c2 0 c0] + [0 255 0 255] + [0 d2 0 d0] + UXTAB16 ValCD1, ValC1, ValD, ROR #8 ;// [0 c3 0 c1] + [0 255 0 255] + [0 d3 0 d1] + UXTAB16 ValEB0, ValE0, ValB ;// [0 e2 0 e0] + [0 255 0 255] + [0 b2 0 b0] + RSB ValCD0, ValEB0, ValCD0, LSL #2 ;// 4*(Off+C+D) - (Off+B+E) + + LDR ValD, [pSrc, srcStep, LSL #1] ;// Load [d3 d2 d1 d0] + UXTAB16 ValEB1, ValE1, ValB, ROR #8 ;// [0 e3 0 e1] + [0 255 0 255] + [0 b3 0 b1] + RSB ValCD1, ValEB1, ValCD1, LSL #2 + ;// One cycle stall + UXTAB16 ValED0, ValE0, ValD ;// [0 e2 0 e0] + [0 255 0 255] + [0 d2 0 d0] + UXTAB16 ValED1, ValE1, ValD, ROR #8 ;// [0 e3 0 e1] + [0 255 0 255] + [0 d3 0 d1] + + LDR ValF, [pSrc, srcStep, LSL #2] ;// Load [f3 f2 f1 f0] + M_LDR ValB, [pSrc], srcStep ;// Load [b3 b2 b1 b0] + ADD ValCD0, ValCD0, ValCD0, LSL #2 ;// 5 * [4*(Off+C+D) - (Off+B+E)] + ADD ValCD1, ValCD1, ValCD1, LSL #2 + UXTAB16 ValCF1, ValC1, ValF, ROR #8 ;// [0 c3 0 c1] + [0 255 0 255] + [0 f3 0 f1] + UXTAB16 ValCF0, ValC0, ValF ;// [0 c2 0 c0] + [0 255 0 255] + [0 f2 0 f0] + RSB ValED1, ValCF1, ValED1, LSL #2 + + SUB ValA, pSrc, srcStep, LSL #1 + LDR ValA, [ValA] ;// Load [a3 a2 a1 a0] + RSB ValED0, ValCF0, ValED0, LSL #2 ;// 4*(Off+E+D) - (Off+C+F) + ADD ValED1, ValED1, ValED1, LSL #2 + ADD ValED0, ValED0, ValED0, LSL #2 ;// 5 * [4*(Off+E+D) - (Off+C+F)] + UXTAB16 ValA0, r0x00ff00ff, ValA ;// [0 a2 0 a0] + [0 255 0 255] + UXTAB16 ValA1, r0x00ff00ff, ValA, ROR #8 ;// [0 a3 0 a1] + [0 255 0 255] + UXTAB16 ValAF0, ValA0, ValF ;// [0 a2 0 a0] + [0 255 0 255] + [0 f2 0 f0] + UXTAB16 ValAF1, ValA1, ValF, ROR #8 ;// [0 a3 0 a1] + [0 255 0 255] + [0 f3 0 f1] + + LDR r0x0fe00fe0, =0x0fe00fe0 ;// [0 255 0 255] 255 is offset to avoid negative results + ADD Acc1, ValCD1, ValAF1 + + LDR ValG, [pSrc, srcStep, LSL #2] ;// Load [g3 g2 g1 g0] + ADD Acc0, ValCD0, ValAF0 ;// Acc0 = 16*Off + (A+F) + 20*(C+D) - 5*(B+E) + UQSUB16 Acc1, Acc1, r0x0fe00fe0 ;// Acc1 -= (16*Off - 16) + UQSUB16 Acc0, Acc0, r0x0fe00fe0 + UXTAB16 ValG0, r0x00ff00ff, ValG ;// [0 g2 0 g0] + [0 255 0 255] + UXTAB16 ValG1, r0x00ff00ff, ValG, ROR #8 ;// [0 g3 0 g1] + [0 255 0 255] + UXTAB16 ValGB0, ValG0, ValB ;// [0 g2 0 g0] + [0 255 0 255] + [0 b2 0 b0] + UXTAB16 ValGB1, ValG1, ValB, ROR #8 ;// [0 g3 0 g1] + [0 255 0 255] + [0 b3 0 b1] + ADD Acc2, ValED0, ValGB0 ;// Acc2 = 16*Off + (B+G) + 20*(D+E) - 5*(C+F) + ADD Acc3, ValED1, ValGB1 + UQSUB16 Acc3, Acc3, r0x0fe00fe0 ;// Acc3 -= (16*Off - 16) + UQSUB16 Acc2, Acc2, r0x0fe00fe0 + USAT16 Acc1, #13, Acc1 ;// Saturate to 8+5 = 13 bits + USAT16 Acc0, #13, Acc0 + USAT16 Acc3, #13, Acc3 + USAT16 Acc2, #13, Acc2 + AND Acc1, r0x00ff00ff, Acc1, LSR #5 ;// [0 a3 0 a1] + AND Acc0, r0x00ff00ff, Acc0, LSR #5 ;// [0 a2 0 a0] + ORR Acc0, Acc0, Acc1, LSL #8 ;// [a3 a2 a1 a0] + AND Acc3, r0x00ff00ff, Acc3, LSR #5 ;// [0 b3 0 b1] + AND Acc2, r0x00ff00ff, Acc2, LSR #5 ;// [0 b2 0 b0] + + M_STR Acc0, [pDst], dstStep ;// Store result & adjust pointer + ORR Acc2, Acc2, Acc3, LSL #8 ;// [b3 b2 b1 b0] + M_STR Acc2, [pDst], dstStep ;// Store result & adjust pointer + ADD pSrc, pSrc, srcStep, LSL #1 + + SUBS Counter, Counter, #1 + BGT TwoRowsLoop +End + SUB pDst, pDst, dstStep, LSL #2 + SUB pSrc, pSrc, srcStep, LSL #2 + + M_END + + ENDIF + + END + \ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_Interpolate_Chroma_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_Interpolate_Chroma_s.s new file mode 100644 index 0000000000000000000000000000000000000000..b1ad17c6fd3f170390ccfbf62675eab5c50a29d7 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_Interpolate_Chroma_s.s @@ -0,0 +1,273 @@ +;// +;// +;// File Name: armVCM4P10_Interpolate_Chroma_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS ARM1136JS + + IF ARM1136JS + +;// input registers + +pSrc RN 0 +iSrcStep RN 1 +pDst RN 2 +iDstStep RN 3 +iWidth RN 4 +iHeight RN 5 +dx RN 6 +dy RN 7 + + +;// local variable registers +temp RN 11 +r0x20 RN 12 +tmp0x20 RN 14 +return RN 0 +dxPlusdy RN 10 +EightMinusdx RN 8 +EightMinusdy RN 9 +dxEightMinusdx RN 8 +BACoeff RN 6 +DCCoeff RN 7 + +iDstStepx2MinusWidth RN 8 +iSrcStepx2MinusWidth RN 9 +iSrcStep1 RN 10 + +pSrc1 RN 1 +pSrc2 RN 8 +pDst1 RN 8 +pDst2 RN 12 + +pix00 RN 8 +pix01 RN 9 +pix10 RN 10 +pix11 RN 11 + +Out0100 RN 8 +Out1110 RN 10 + +x00 RN 8 +x01 RN 10 +x02 RN 12 +x10 RN 9 +x11 RN 11 +x12 RN 14 +x20 RN 10 +x21 RN 12 +x22 RN 14 + +x01x00 RN 8 +x02x01 RN 10 +x11x10 RN 9 +x12x11 RN 11 +x21x20 RN 10 +x22x21 RN 12 + +OutRow00 RN 12 +OutRow01 RN 14 +OutRow10 RN 10 +OutRow11 RN 12 + +OutRow0100 RN 12 +OutRow1110 RN 12 + +;//----------------------------------------------------------------------------------------------- +;// armVCM4P10_Interpolate_Chroma_asm starts +;//----------------------------------------------------------------------------------------------- + + ;// Write function header + M_START armVCM4P10_Interpolate_Chroma, r11 + + ;// Define stack arguments + M_ARG Width, 4 + M_ARG Height, 4 + M_ARG Dx, 4 + M_ARG Dy, 4 + + ;// Load argument from the stack + ;// M_STALL ARM1136JS=4 + + M_LDR iWidth, Width + M_LDR iHeight, Height + M_LDR dx, Dx + M_LDR dy, Dy + + ;// EightMinusdx = 8 - dx + ;// EightMinusdy = 8 - dy + + ;// ACoeff = EightMinusdx * EightMinusdy + ;// BCoeff = dx * EightMinusdy + ;// CCoeff = EightMinusdx * dy + ;// DCoeff = dx * dy + + ADD pSrc1, pSrc, iSrcStep + SUB temp, iWidth, #1 + RSB EightMinusdx, dx, #8 + RSB EightMinusdy, dy, #8 + CMN dx,dy + ADD dxEightMinusdx, EightMinusdx, dx, LSL #16 + ORR iWidth, iWidth, temp, LSL #16 + + ;// Packed Coeffs. + + MUL BACoeff, dxEightMinusdx, EightMinusdy + MUL DCCoeff, dxEightMinusdx, dy + + + ;// Checking either of dx and dy being non-zero + + BEQ MVIsZero + +;// Pixel layout: +;// +;// x00 x01 x02 +;// x10 x11 x12 +;// x20 x21 x22 + +;// If fractionl mv is not (0, 0) + +OuterLoopMVIsNotZero + +InnerLoopMVIsNotZero + + LDRB x00, [pSrc, #+0] + LDRB x10, [pSrc1, #+0] + LDRB x01, [pSrc, #+1] + LDRB x11, [pSrc1, #+1] + LDRB x02, [pSrc, #+2]! + LDRB x12, [pSrc1, #+2]! + + ORR x01x00, x00, x01, LSL #16 + ;// M_STALL ARM1136JS=1 + ORR x02x01, x01, x02, LSL #16 + MOV r0x20, #32 + ORR x11x10, x10, x11, LSL #16 + ORR x12x11, x11, x12, LSL #16 + + SMLAD x01x00, x01x00, BACoeff, r0x20 + SMLAD x02x01, x02x01, BACoeff, r0x20 + + ;// iWidth packed with MSB (top 16 bits) + ;// as inner loop counter value i.e + ;// (iWidth -1) and LSB (lower 16 bits) + ;// as original width + + SUBS iWidth, iWidth, #1<<17 + + SMLAD OutRow00, x11x10, DCCoeff, x01x00 + SMLAD OutRow01, x12x11, DCCoeff, x02x01 + + RSB pSrc2, pSrc, pSrc1, LSL #1 + + MOV OutRow00, OutRow00, LSR #6 + MOV OutRow01, OutRow01, LSR #6 + + LDRB x20,[pSrc2, #-2] + + ORR OutRow0100, OutRow00, OutRow01, LSL #8 + STRH OutRow0100, [pDst], #2 + + LDRB x21,[pSrc2, #-1] + LDRB x22,[pSrc2, #+0] + + ADD pDst1, pDst, iDstStep + + ;// M_STALL ARM1136JS=1 + + ORR x21x20, x20, x21, LSL #16 + ORR x22x21, x21, x22, LSL #16 + + MOV tmp0x20, #32 + + ;// Reusing the packed data x11x10 and x12x11 + + SMLAD x11x10, x11x10, BACoeff, tmp0x20 + SMLAD x12x11, x12x11, BACoeff, tmp0x20 + SMLAD OutRow10, x21x20, DCCoeff, x11x10 + SMLAD OutRow11, x22x21, DCCoeff, x12x11 + + MOV OutRow10, OutRow10, LSR #6 + MOV OutRow11, OutRow11, LSR #6 + + ;// M_STALL ARM1136JS=1 + + ORR OutRow1110, OutRow10, OutRow11, LSL #8 + + STRH OutRow1110, [pDst1, #-2] + + BGT InnerLoopMVIsNotZero + + SUBS iHeight, iHeight, #2 + ADD iWidth, iWidth, #1<<16 + RSB iDstStepx2MinusWidth, iWidth, iDstStep, LSL #1 + SUB iSrcStep1, pSrc1, pSrc + SUB temp, iWidth, #1 + RSB iSrcStepx2MinusWidth, iWidth, iSrcStep1, LSL #1 + ADD pDst, pDst, iDstStepx2MinusWidth + ADD pSrc1, pSrc1, iSrcStepx2MinusWidth + ADD pSrc, pSrc, iSrcStepx2MinusWidth + ORR iWidth, iWidth, temp, LSL #16 + BGT OuterLoopMVIsNotZero + MOV return, #OMX_Sts_NoErr + M_EXIT + +;// If fractionl mv is (0, 0) + +MVIsZero + ;// M_STALL ARM1136JS=4 +OuterLoopMVIsZero + +InnerLoopMVIsZero + + LDRB pix00, [pSrc], #+1 + LDRB pix01, [pSrc], #+1 + LDRB pix10, [pSrc1], #+1 + LDRB pix11, [pSrc1], #+1 + + ADD pDst2, pDst, iDstStep + SUBS iWidth, iWidth, #1<<17 + + ORR Out0100, pix00, pix01, LSL #8 + ORR Out1110, pix10, pix11, LSL #8 + + STRH Out0100, [pDst], #2 + STRH Out1110, [pDst2], #2 + + BGT InnerLoopMVIsZero + + SUBS iHeight, iHeight, #2 + ADD iWidth, iWidth, #1<<16 + RSB iDstStepx2MinusWidth, iWidth, iDstStep, LSL #1 + SUB iSrcStep1, pSrc1, pSrc + SUB temp, iWidth, #1 + RSB iSrcStepx2MinusWidth, iWidth, iSrcStep1, LSL #1 + ADD pDst, pDst, iDstStepx2MinusWidth + ADD pSrc1, pSrc1, iSrcStepx2MinusWidth + ADD pSrc, pSrc, iSrcStepx2MinusWidth + ORR iWidth, iWidth, temp, LSL #16 + BGT OuterLoopMVIsZero + MOV return, #OMX_Sts_NoErr + M_END + + ENDIF ;// ARM1136JS + + + END + +;//----------------------------------------------------------------------------------------------- +;// armVCM4P10_Interpolate_Chroma_asm ends +;//----------------------------------------------------------------------------------------------- + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_QuantTables_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_QuantTables_s.s new file mode 100644 index 0000000000000000000000000000000000000000..f962f70cd9ac1f8193f91ce964ebb6c7a440e596 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_QuantTables_s.s @@ -0,0 +1,74 @@ +;// +;// +;// File Name: armVCM4P10_QuantTables_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// Description: +;// This file contains quantization tables +;// +;// + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + + EXPORT armVCM4P10_MFMatrixQPModTable + EXPORT armVCM4P10_QPDivIntraTable + EXPORT armVCM4P10_QPDivPlusOneTable + +;//-------------------------------------------------------------- +;// This table contains armVCM4P10_MFMatrix [iQP % 6][0] entires, +;// for values of iQP from 0 to 51 (inclusive). +;//-------------------------------------------------------------- + + M_TABLE armVCM4P10_MFMatrixQPModTable + DCW 13107, 11916, 10082, 9362, 8192, 7282 + DCW 13107, 11916, 10082, 9362, 8192, 7282 + DCW 13107, 11916, 10082, 9362, 8192, 7282 + DCW 13107, 11916, 10082, 9362, 8192, 7282 + DCW 13107, 11916, 10082, 9362, 8192, 7282 + DCW 13107, 11916, 10082, 9362, 8192, 7282 + DCW 13107, 11916, 10082, 9362, 8192, 7282 + DCW 13107, 11916, 10082, 9362, 8192, 7282 + DCW 13107, 11916, 10082, 9362, 8192, 7282 + +;//--------------------------------------------------------------- +;// This table contains ARM_M4P10_Q_OFFSET + 1 + (iQP / 6) values, +;// for values of iQP from 0 to 51 (inclusive). +;//--------------------------------------------------------------- + + M_TABLE armVCM4P10_QPDivPlusOneTable + DCB 16, 16, 16, 16, 16, 16 + DCB 17, 17, 17, 17, 17, 17 + DCB 18, 18, 18, 18, 18, 18 + DCB 19, 19, 19, 19, 19, 19 + DCB 20, 20, 20, 20, 20, 20 + DCB 21, 21, 21, 21, 21, 21 + DCB 22, 22, 22, 22, 22, 22 + DCB 23, 23, 23, 23, 23, 23 + DCB 24, 24, 24, 24, 24, 24 + +;//------------------------------------------------------------------ +;// This table contains (1 << QbitsPlusOne) / 3 Values (Intra case) , +;// for values of iQP from 0 to 51 (inclusive). +;//------------------------------------------------------------------ + + M_TABLE armVCM4P10_QPDivIntraTable, 2 + DCD 21845, 21845, 21845, 21845, 21845, 21845 + DCD 43690, 43690, 43690, 43690, 43690, 43690 + DCD 87381, 87381, 87381, 87381, 87381, 87381 + DCD 174762, 174762, 174762, 174762, 174762, 174762 + DCD 349525, 349525, 349525, 349525, 349525, 349525 + DCD 699050, 699050, 699050, 699050, 699050, 699050 + DCD 1398101, 1398101, 1398101, 1398101, 1398101, 1398101 + DCD 2796202, 2796202, 2796202, 2796202, 2796202, 2796202 + DCD 5592405, 5592405, 5592405, 5592405, 5592405, 5592405 + + + END + \ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_TransformResidual4x4_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_TransformResidual4x4_s.s new file mode 100644 index 0000000000000000000000000000000000000000..241d18837a889ec8b3f0158a2cb0209b893aab8b --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_TransformResidual4x4_s.s @@ -0,0 +1,407 @@ +;// +;// +;// File Name: armVCM4P10_TransformResidual4x4_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// +;// Description: +;// Transform Residual 4x4 Coefficients +;// +;// + + +;// Include standard headers + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS ARM1136JS + +;// Import symbols required from other files +;// (For example tables) + + + + +;// Set debugging level +;//DEBUG_ON SETL {TRUE} + + + +;// Guarding implementation by the processor name + + IF ARM1136JS + +;//Input Registers +pDst RN 0 +pSrc RN 1 + +;//Output Registers + + +;//Local Scratch Registers + +;// Packed Input pixels +in00 RN 2 ;// Src[0] & Src[1] +in02 RN 3 ;// Src[2] & Src[3] +in10 RN 4 ;// Src[4] & Src[5] +in12 RN 5 ;// Src[6] & Src[7] +in20 RN 6 ;// Src[8] & Src[9] +in22 RN 7 ;// Src[10] & Src[11] +in30 RN 8 ;// Src[12] & Src[13] +in32 RN 9 ;// Src[14] & Src[15] + +;// Transpose for Row operations (Rows to cols) +trRow00 RN 2 +trRow10 RN 10 +trRow02 RN 3 +trRow12 RN 5 +trRow20 RN 11 +trRow30 RN 12 +trRow32 RN 14 +trRow22 RN 7 + +;// Intermediate calculations +e0 RN 4 +e1 RN 6 +e2 RN 8 +e3 RN 9 +constZero RN 1 + +;// Row operated pixels +rowOp00 RN 2 +rowOp10 RN 10 +rowOp20 RN 11 +rowOp30 RN 12 +rowOp02 RN 3 +rowOp12 RN 5 +rowOp22 RN 7 +rowOp32 RN 14 + +;// Transpose for colulmn operations +trCol00 RN 2 +trCol02 RN 3 +trCol10 RN 4 +trCol12 RN 5 +trCol20 RN 6 +trCol22 RN 7 +trCol30 RN 8 +trCol32 RN 9 + +;// Intermediate calculations +g0 RN 10 +g1 RN 11 +g2 RN 12 +g3 RN 14 + +;// Coloumn operated pixels +colOp00 RN 2 +colOp02 RN 3 +colOp10 RN 4 +colOp12 RN 5 +colOp20 RN 6 +colOp22 RN 7 +colOp30 RN 8 +colOp32 RN 9 + + +temp1 RN 10 ;// Temporary scratch varaibles +const1 RN 11 +const2 RN 12 +mask RN 14 + +;// Output pixels +out00 RN 2 +out02 RN 3 +out10 RN 4 +out12 RN 5 +out20 RN 6 +out22 RN 7 +out30 RN 8 +out32 RN 9 + + + + ;// Allocate stack memory required by the function + + + ;// Write function header + M_START armVCM4P10_TransformResidual4x4,r11 + + ;****************************************************************** + ;// The strategy used in implementing the transform is as follows:* + ;// Load the 4x4 block into 8 registers * + ;// Transpose the 4x4 matrix * + ;// Perform the row operations (on columns) using SIMD * + ;// Transpose the 4x4 result matrix * + ;// Perform the coloumn operations * + ;// Store the 4x4 block at one go * + ;****************************************************************** + + ;// Load all the 4x4 pixels + + LDMIA pSrc,{in00,in02,in10,in12,in20,in22,in30,in32} + + MOV constZero,#0 ;// Used to right shift by 1 + ;LDR constZero,=0x00000000 + + ;***************************************************************** + ;// + ;// Transpose the matrix inorder to perform row ops as coloumn ops + ;// Input: in[][] = original matrix + ;// Output: trRow[][]= transposed matrix + ;// Step1: Obtain the LL part of the transposed matrix + ;// Step2: Obtain the HL part + ;// step3: Obtain the LH part + ;// Step4: Obtain the HH part + ;// + ;***************************************************************** + + ;// LL 2x2 transposed matrix + ;// d0 d1 - - + ;// d4 d5 - - + ;// - - - - + ;// - - - - + + PKHTB trRow10,in10,in00,ASR #16 ;// [5 4] = [f5:f1] + PKHBT trRow00,in00,in10,LSL #16 ;// [1 0] = [f4:f0] + + ;// HL 2x2 transposed matrix + ;// - - - - + ;// - - - - + ;// d8 d9 - - + ;// d12 d13 - - + + + PKHTB trRow30,in12,in02,ASR #16 ;// [13 12] = [7 3] + PKHBT trRow20,in02,in12,LSL #16 ;// [9 8] = [6 2] + + ;// LH 2x2 transposed matrix + ;// - - d2 d3 + ;// - - d6 d7 + ;// - - - - + ;// - - - - + + PKHBT trRow02,in20,in30,LSL #16 ;// [3 2] = [f12:f8] + PKHTB trRow12,in30,in20,ASR #16 ;// [7 6] = [f13:f9] + + + + + ;// HH 2x2 transposed matrix + ;// - - - - + ;// - - - - + ;// - - d10 d11 + ;// - - d14 d15 + + PKHTB trRow32,in32,in22,ASR #16 ;// [15 14] = [15 11] + PKHBT trRow22,in22,in32,LSL #16 ;// [11 10] = [14 10] + + + ;**************************************** + ;// Row Operations (Performed on columns) + ;**************************************** + + + ;// SIMD operations on first two columns(two rows of the original matrix) + + + SADD16 e0, trRow00,trRow20 ;// e0 = d0 + d2 + SSUB16 e1, trRow00,trRow20 ;// e1 = d0 - d2 + SHADD16 e2, trRow10,constZero ;// (f1>>1) constZero is a register holding 0 + SHADD16 e3, trRow30,constZero ;// avoid pipeline stalls for e2 and e3 + SSUB16 e2, e2, trRow30 ;// e2 = (d1>>1) - d3 + SADD16 e3, e3, trRow10 ;// e3 = d1 + (d3>>1) + SADD16 rowOp00, e0, e3 ;// f0 = e0 + e3 + SADD16 rowOp10, e1, e2 ;// f1 = e1 + e2 + SSUB16 rowOp20, e1, e2 ;// f2 = e1 - e2 + SSUB16 rowOp30, e0, e3 ;// f3 = e0 - e3 + + ;// SIMD operations on next two columns(next two rows of the original matrix) + + SADD16 e0, trRow02,trRow22 + SSUB16 e1, trRow02,trRow22 + SHADD16 e2, trRow12,constZero ;//(f1>>1) constZero is a register holding 0 + SHADD16 e3, trRow32,constZero + SSUB16 e2, e2, trRow32 + SADD16 e3, e3, trRow12 + SADD16 rowOp02, e0, e3 + SADD16 rowOp12, e1, e2 + SSUB16 rowOp22, e1, e2 + SSUB16 rowOp32, e0, e3 + + + ;***************************************************************** + ;// Transpose the resultant matrix + ;// Input: rowOp[][] + ;// Output: trCol[][] + ;***************************************************************** + + ;// LL 2x2 transposed matrix + ;// d0 d1 - - + ;// d4 d5 - - + ;// - - - - + ;// - - - - + + PKHTB trCol10,rowOp10,rowOp00,ASR #16 ;// [5 4] = [f5:f1] + PKHBT trCol00,rowOp00,rowOp10,LSL #16 ;// [1 0] = [f4:f0] + + ;// HL 2x2 transposed matrix + ;// - - - - + ;// - - - - + ;// d8 d9 - - + ;// d12 d13 - - + + + PKHTB trCol30,rowOp12,rowOp02,ASR #16 ;// [13 12] = [7 3] + PKHBT trCol20,rowOp02,rowOp12,LSL #16 ;// [9 8] = [6 2] + + ;// LH 2x2 transposed matrix + ;// - - d2 d3 + ;// - - d6 d7 + ;// - - - - + ;// - - - - + + PKHBT trCol02,rowOp20,rowOp30,LSL #16 ;// [3 2] = [f12:f8] + PKHTB trCol12,rowOp30,rowOp20,ASR #16 ;// [7 6] = [f13:f9] + + + + + ;// HH 2x2 transposed matrix + ;// - - - - + ;// - - - - + ;// - - d10 d11 + ;// - - d14 d15 + + PKHTB trCol32,rowOp32,rowOp22,ASR #16 ;// [15 14] = [15 11] + PKHBT trCol22,rowOp22,rowOp32,LSL #16 ;// [11 10] = [14 10] + + + ;******************************* + ;// Coloumn Operations + ;******************************* + + + ;// SIMD operations on first two columns + + + SADD16 g0, trCol00,trCol20 + SSUB16 g1, trCol00,trCol20 + SHADD16 g2, trCol10,constZero ;// (f1>>1) constZero is a register holding 0 + SHADD16 g3, trCol30,constZero + SSUB16 g2, g2, trCol30 + SADD16 g3, g3, trCol10 + SADD16 colOp00, g0, g3 + SADD16 colOp10, g1, g2 + SSUB16 colOp20, g1, g2 + SSUB16 colOp30, g0, g3 + + ;// SIMD operations on next two columns + + SADD16 g0, trCol02,trCol22 + SSUB16 g1, trCol02,trCol22 + SHADD16 g2, trCol12,constZero ;// (f1>>1) constZero is a register holding 0 + SHADD16 g3, trCol32,constZero + SSUB16 g2, g2, trCol32 + SADD16 g3, g3, trCol12 + SADD16 colOp02, g0, g3 + SADD16 colOp12, g1, g2 + SSUB16 colOp22, g1, g2 + SSUB16 colOp32, g0, g3 + + + + + + ;************************************************ + ;// Calculate final value (colOp[i][j] + 32)>>6 + ;************************************************ + + ;// const1: Serves dual purpose + ;// (1) Add #32 to both the lower and higher 16bits of the SIMD result + ;// (2) Convert the lower 16 bit value to an unsigned number (Add 32768) + + LDR const1, =0x00208020 + + LDR mask, =0xffff03ff ;// Used to mask the down shifted 6 bits + + ;// const2(#512): used to convert the lower 16bit number back to signed value + + MOV const2,#0x200 ;// const2 = 2^9 + + ;// First Row + + SADD16 colOp00, colOp00, const1 + SADD16 colOp02, colOp02, const1 + AND colOp00, mask, colOp00, ASR #6 + AND colOp02, mask, colOp02, ASR #6 + SSUB16 out00,colOp00,const2 + SSUB16 out02,colOp02,const2 + + + ;// Second Row + + SADD16 colOp10, colOp10, const1 + SADD16 colOp12, colOp12, const1 + AND colOp10, mask, colOp10, ASR #6 + AND colOp12, mask, colOp12, ASR #6 + SSUB16 out10,colOp10,const2 + SSUB16 out12,colOp12,const2 + + + ;// Third Row + + SADD16 colOp20, colOp20, const1 + SADD16 colOp22, colOp22, const1 + AND colOp20, mask, colOp20, ASR #6 + AND colOp22, mask, colOp22, ASR #6 + SSUB16 out20,colOp20,const2 + SSUB16 out22,colOp22,const2 + + + ;// Fourth Row + + SADD16 colOp30, colOp30, const1 + SADD16 colOp32, colOp32, const1 + AND colOp30, mask, colOp30, ASR #6 + AND colOp32, mask, colOp32, ASR #6 + SSUB16 out30,colOp30,const2 + SSUB16 out32,colOp32,const2 + + + + + ;*************************** + ;// Store all the 4x4 pixels + ;*************************** + + STMIA pDst,{out00,out02,out10,out12,out20,out22,out30,out32} + + + + ;// Set return value + +End + + + ;// Write function tail + M_END + + ENDIF ;//ARM1136JS + + + + + + + +;// Guarding implementation by the processor name + + + END \ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_UnpackBlock4x4_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_UnpackBlock4x4_s.s new file mode 100644 index 0000000000000000000000000000000000000000..ad16d9cb09070f115a145d0c5247cddac9973528 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_UnpackBlock4x4_s.s @@ -0,0 +1,92 @@ +;// +;// +;// File Name: armVCM4P10_UnpackBlock4x4_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + +;// Define the processor variants supported by this file + + M_VARIANTS ARM1136JS + + + IF ARM1136JS + +;//-------------------------------------- +;// Input Arguments and their scope/usage +;//-------------------------------------- +ppSrc RN 0 ;// Persistent variable +pDst RN 1 ;// Persistent variable + +;//-------------------------------- +;// Variables and their scope/usage +;//-------------------------------- +pSrc RN 2 ;// Persistent variables +Flag RN 3 +Value RN 4 +Value2 RN 5 +strOffset RN 6 +cstOffset RN 7 + + + M_START armVCM4P10_UnpackBlock4x4, r7 + + LDR pSrc, [ppSrc] ;// Load pSrc + MOV cstOffset, #31 ;// To be used in the loop, to compute offset + + ;//----------------------------------------------------------------------- + ; Firstly, fill all the coefficient values on the buffer by zero + ;//----------------------------------------------------------------------- + + MOV Value, #0 ;// Initialize the zero value + MOV Value2, #0 ;// Initialize the zero value + LDRB Flag, [pSrc], #1 ;// Preload before + + STRD Value, [pDst, #0] ;// pDst[0] = pDst[1] = pDst[2] = pDst[3] = 0 + STRD Value, [pDst, #8] ;// pDst[4] = pDst[5] = pDst[6] = pDst[7] = 0 + STRD Value, [pDst, #16] ;// pDst[8] = pDst[9] = pDst[10] = pDst[11] = 0 + STRD Value, [pDst, #24] ;// pDst[12] = pDst[13] = pDst[14] = pDst[15] = 0 + + ;//---------------------------------------------------------------------------- + ;// The loop below parses and unpacks the input stream. The C-model has + ;// a somewhat complicated logic for sign extension. But in the v6 version, + ;// that can be easily taken care by loading the data from stream as + ;// SIGNED byte/halfword. So, based on the first TST instruction, 8-bits or + ;// 16-bits are read. + ;// + ;// Next, to compute the offset, where the unpacked value needs to be stored, + ;// we modify the computation to perform [(Flag & 15) < 1] as [(Flag < 1) & 31] + ;// This results in a saving of one cycle. + ;//---------------------------------------------------------------------------- + +unpackLoop + TST Flag, #0x10 ;// Computing (Flag & 0x10) + LDRSBNE Value2,[pSrc,#1] ;// Load byte wise to avoid unaligned access + LDRBNE Value, [pSrc], #2 + AND strOffset, cstOffset, Flag, LSL #1 ;// strOffset = (Flag & 15) < 1; + LDRSBEQ Value, [pSrc], #1 ;// Value = (OMX_U8) *pSrc++ + ORRNE Value,Value,Value2, LSL #8 ;// Value = (OMX_U16) *pSrc++ + + TST Flag, #0x20 ;// Computing (Flag & 0x20) to check, if we're done + LDRBEQ Flag, [pSrc], #1 ;// Flag = (OMX_U8) *pSrc++, for next iteration + STRH Value, [pDst, strOffset] ;// Store at offset + BEQ unpackLoop ;// Branch to the loop beginning + + STR pSrc, [ppSrc] ;// Update the bitstream pointer + M_END + + ENDIF + + + + END + \ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c new file mode 100644 index 0000000000000000000000000000000000000000..c2e6b6012ad1dd426638d653438ea4c6e20eb9e3 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c @@ -0,0 +1,88 @@ +/* ---------------------------------------------------------------- + * + * + * File Name: omxVCM4P10_DeblockChroma_I.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * H.264 intra chroma deblock + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armVC.h" + +/** + * Function: omxVCM4P10_DeblockChroma_I + * + * Description: + * Performs deblocking filtering on all edges of the chroma macroblock (16x16). + * + * Remarks: + * + * Parameters: + * [in] pSrcDst pointer to the input macroblock. Must be 8-byte aligned. + * [in] srcdstStep Step of the arrays + * [in] pAlpha pointer to a 2x2 array of alpha thresholds, organized as follows: { external + * vertical edge, internal vertical edge, external + * horizontal edge, internal horizontal edge } + * [in] pBeta pointer to a 2x2 array of beta thresholds, organized as follows: { external + * vertical edge, internal vertical edge, external horizontal edge, + * internal horizontal edge } + * [in] pThresholds AArray of size 8x2 of Thresholds (TC0) (values for the left or + * above edge of each 4x2 or 2x4 block, arranged in vertical block order + * and then in horizontal block order) + * [in] pBS array of size 16x2 of BS parameters (arranged in scan block order for vertical edges and then horizontal edges); + * valid in the range [0,4] with the following restrictions: i) pBS[i]== 4 may occur only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^1]== 4. Must be 4-byte aligned. + * [out] pSrcDst pointer to filtered output macroblock + * + * Return Value: + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments + * - Either of the pointers in pSrcDst, pAlpha, pBeta, pTresholds, or pBS is NULL. + * - pSrcDst is not 8-byte aligned. + * - either pThresholds or pBS is not 4-byte aligned. + * - pBS is out of range, i.e., one of the following conditions is true: pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or (pBS[i]==4 && pBS[i^1]!=4) for 0<=i<=3. + * - srcdstStep is not a multiple of 8. + * + */ +OMXResult omxVCM4P10_DeblockChroma_I( + OMX_U8* pSrcDst, + OMX_S32 srcdstStep, + const OMX_U8* pAlpha, + const OMX_U8* pBeta, + const OMX_U8* pThresholds, + const OMX_U8 *pBS +) +{ + OMXResult errorCode; + + armRetArgErrIf(pSrcDst == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(armNot8ByteAligned(pSrcDst), OMX_Sts_BadArgErr); + armRetArgErrIf(srcdstStep & 7, OMX_Sts_BadArgErr); + armRetArgErrIf(pAlpha == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pBeta == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pThresholds == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(armNot4ByteAligned(pThresholds), OMX_Sts_BadArgErr); + armRetArgErrIf(pBS == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(armNot4ByteAligned(pBS), OMX_Sts_BadArgErr); + + errorCode = omxVCM4P10_FilterDeblockingChroma_VerEdge_I( + pSrcDst, srcdstStep, pAlpha, pBeta, pThresholds, pBS); + + armRetArgErrIf(errorCode != OMX_Sts_NoErr, errorCode) + + errorCode = omxVCM4P10_FilterDeblockingChroma_HorEdge_I( + pSrcDst, srcdstStep, pAlpha+2, pBeta+2, pThresholds+8, pBS+16); + + return errorCode; +} diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c new file mode 100644 index 0000000000000000000000000000000000000000..6023862268299dddff46d23bdb6b48d2714cfd78 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c @@ -0,0 +1,91 @@ +/* ---------------------------------------------------------------- + * + * + * File Name: omxVCM4P10_DeblockLuma_I.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * H.264 luma deblock + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armVC.h" + + +/** + * Function: omxVCM4P10_DeblockLuma_I + * + * Description: + * This function performs deblock filtering the horizontal and vertical edges of a luma macroblock + *(16x16). + * + * Remarks: + * + * Parameters: + * [in] pSrcDst pointer to the input macroblock. Must be 8-byte aligned. + * [in] srcdstStep image width + * [in] pAlpha pointer to a 2x2 table of alpha thresholds, organized as follows: { external + * vertical edge, internal vertical edge, external horizontal + * edge, internal horizontal edge } + * [in] pBeta pointer to a 2x2 table of beta thresholds, organized as follows: { external + * vertical edge, internal vertical edge, external horizontal edge, + * internal horizontal edge } + * [in] pThresholds pointer to a 16x2 table of threshold (TC0), organized as follows: { values for + * the left or above edge of each 4x4 block, arranged in vertical block order + * and then in horizontal block order) + * [in] pBS pointer to a 16x2 table of BS parameters arranged in scan block order for vertical edges and then horizontal edges; + * valid in the range [0,4] with the following restrictions: i) pBS[i]== 4 may occur only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^1]== 4. Must be 4-byte aligned. + * [out] pSrcDst pointer to filtered output macroblock. + * + * Return Value: + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments + * - Either of the pointers in pSrcDst, pAlpha, pBeta, pTresholds or pBS is NULL. + * - pSrcDst is not 8-byte aligned. + * - srcdstStep is not a multiple of 8 + * - pBS is out of range, i.e., one of the following conditions is true: pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or (pBS[i]==4 && pBS[i^1]!=4) for 0<=i<=3. +. + * + */ + +OMXResult omxVCM4P10_DeblockLuma_I( + OMX_U8* pSrcDst, + OMX_S32 srcdstStep, + const OMX_U8* pAlpha, + const OMX_U8* pBeta, + const OMX_U8* pThresholds, + const OMX_U8 *pBS +) +{ + OMXResult errorCode; + + armRetArgErrIf(pSrcDst == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(armNot8ByteAligned(pSrcDst), OMX_Sts_BadArgErr); + armRetArgErrIf(srcdstStep & 7, OMX_Sts_BadArgErr); + armRetArgErrIf(pAlpha == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pBeta == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pThresholds == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(armNot4ByteAligned(pThresholds), OMX_Sts_BadArgErr); + armRetArgErrIf(pBS == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(armNot4ByteAligned(pBS), OMX_Sts_BadArgErr); + + errorCode = omxVCM4P10_FilterDeblockingLuma_VerEdge_I( + pSrcDst, srcdstStep, pAlpha, pBeta, pThresholds, pBS); + + armRetArgErrIf(errorCode != OMX_Sts_NoErr, errorCode) + + errorCode = omxVCM4P10_FilterDeblockingLuma_HorEdge_I( + pSrcDst, srcdstStep, pAlpha+2, pBeta+2, pThresholds+16, pBS+16); + + return errorCode; +} diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c new file mode 100644 index 0000000000000000000000000000000000000000..a19f277707f8753143e1d394ef67ba83091eb4d8 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c @@ -0,0 +1,62 @@ +/* ---------------------------------------------------------------- + * + * + * File Name: omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * H.264 decode coefficients module + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armVC.h" + +/** + * Function: omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC + * + * Description: + * Performs CAVLC decoding and inverse raster scan for 2x2 block of + * ChromaDCLevel. The decoded coefficients in packed position-coefficient + * buffer are stored in increasing raster scan order, namely position order. + * + * Remarks: + * + * Parameters: + * [in] ppBitStream Double pointer to current byte in bit stream + * buffer + * [in] pOffset Pointer to current bit position in the byte + * pointed to by *ppBitStream + * [out] ppBitStream *ppBitStream is updated after each block is decoded + * [out] pOffset *pOffset is updated after each block is decoded + * [out] pNumCoeff Pointer to the number of nonzero coefficients + * in this block + * [out] ppPosCoefbuf Double pointer to destination residual + * coefficient-position pair buffer + * + * Return Value: + * Standard omxError result. See enumeration for possible result codes. + * + */ + +OMXResult omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC ( + const OMX_U8** ppBitStream, + OMX_S32* pOffset, + OMX_U8* pNumCoeff, + OMX_U8** ppPosCoefbuf + ) + +{ + return armVCM4P10_DecodeCoeffsToPair(ppBitStream, pOffset, pNumCoeff, + ppPosCoefbuf, 17, 4); + +} diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c new file mode 100644 index 0000000000000000000000000000000000000000..99bb4ce8514001616120a893c9105c82853502c1 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c @@ -0,0 +1,68 @@ +/* ---------------------------------------------------------------- + * + * + * File Name: omxVCM4P10_DecodeCoeffsToPairCAVLC.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * H.264 decode coefficients module + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armVC.h" + +/** + * Function: omxVCM4P10_DecodeCoeffsToPairCAVLC + * + * Description: + * Performs CAVLC decoding and inverse zigzag scan for 4x4 block of + * Intra16x16DCLevel, Intra16x16ACLevel,LumaLevel, and ChromaACLevel. + * Inverse field scan is not supported. The decoded coefficients in packed + * position-coefficient buffer are stored in increasing zigzag order instead + * of position order. + * + * Remarks: + * + * Parameters: + * [in] ppBitStream Double pointer to current byte in bit stream buffer + * [in] pOffset Pointer to current bit position in the byte pointed + * to by *ppBitStream + * [in] sMaxNumCoeff Maximum number of non-zero coefficients in current + * block + * [in] sVLCSelect VLC table selector, obtained from number of non-zero + * AC coefficients of above and left 4x4 blocks. It is + * equivalent to the variable nC described in H.264 standard + * table 9-5, except its value can¡¯t be less than zero. + * [out] ppBitStream *ppBitStream is updated after each block is decoded + * [out] pOffset *pOffset is updated after each block is decoded + * [out] pNumCoeff Pointer to the number of nonzero coefficients in + * this block + * [out] ppPosCoefbuf Double pointer to destination residual + * coefficient-position pair buffer + * Return Value: + * Standard omxError result. See enumeration for possible result codes. + * + */ + +OMXResult omxVCM4P10_DecodeCoeffsToPairCAVLC( + const OMX_U8** ppBitStream, + OMX_S32* pOffset, + OMX_U8* pNumCoeff, + OMX_U8**ppPosCoefbuf, + OMX_INT sVLCSelect, + OMX_INT sMaxNumCoeff + ) +{ + return armVCM4P10_DecodeCoeffsToPair(ppBitStream, pOffset, pNumCoeff, + ppPosCoefbuf, sVLCSelect, sMaxNumCoeff); +} diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.s new file mode 100644 index 0000000000000000000000000000000000000000..2b7148686a3e331c8bb4e7c2969dc6d0cd25b54b --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.s @@ -0,0 +1,480 @@ +;// +;// (c) Copyright 2007 ARM Limited. All Rights Reserved. +;// +;// Description: +;// H.264 inverse quantize and transform module +;// +;// + + + +;// Include standard headers + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + +;// Import symbols required from other files +;// (For example tables) + + IMPORT armVCM4P10_UnpackBlock4x4 + IMPORT armVCM4P10_TransformResidual4x4 + IMPORT armVCM4P10_QPDivTable + IMPORT armVCM4P10_VMatrixU16 + IMPORT armVCM4P10_QPModuloTable + + M_VARIANTS ARM1136JS, ARM1136JS_U + +;// Set debugging level +;//DEBUG_ON SETL {TRUE} + + +;// Static Function: armVCM4P10_DequantLumaAC4x4 + +;// Guarding implementation by the processor name + + IF ARM1136JS + +;//Input Registers +pSrcDst RN 0 +QP RN 1 + + +;//Output Registers + + +;//Local Scratch Registers +pQPdiv RN 4 +pQPmod RN 5 +pVRow RN 2 +QPmod RN 6 +shift RN 3 +rowLuma01 RN 1 +rowLuma23 RN 4 + +SrcDst00 RN 5 +SrcDst02 RN 6 +SrcDst10 RN 7 +SrcDst12 RN 8 +SrcDst20 RN 9 +SrcDst22 RN 10 +SrcDst30 RN 11 +SrcDst32 RN 12 + +temp1 RN 2 +temp2 RN 3 +temp3 RN 14 + + + ;// Allocate stack memory required by the function + + ;// Write function header + M_START armVCM4P10_DequantLumaAC4x4,r11 + + LDR pQPmod,=armVCM4P10_QPModuloTable + LDR pQPdiv,=armVCM4P10_QPDivTable + LDR pVRow,=armVCM4P10_VMatrixU16 + + LDRSB QPmod,[pQPmod,QP] ;// (QP%6) * 6 + LDRSB shift,[pQPdiv,QP] ;// Shift = QP / 6 + + LDRH rowLuma01,[pVRow,QPmod]! ;// rowLuma01 = [00|0a] + LDRH temp3,[pVRow,#2] ;// temp3 = [00|0b] + LDRH rowLuma23,[pVRow,#4] ;// rowLuma23 = [00|0c] + ORR rowLuma01,rowLuma01,temp3,LSL #16 ;// rowLuma01 = [0b|0a] + + ;// Load all the 16 'src' values + LDMIA pSrcDst,{SrcDst00,SrcDst02,SrcDst10,SrcDst12,SrcDst20,SrcDst22,SrcDst30,SrcDst32} + + + ;//********************************************************************************************* + ;// + ;// 'Shift' ranges between [0,8] + ;// So we can shift the packed rowLuma values [0b|0a] with a single LSL operation + ;// + ;//********************************************************************************************* + + LSL rowLuma01,rowLuma01,shift + LSL rowLuma23,rowLuma23,shift + + + ;//********************************************************************************************** + ;// + ;// The idea is to unroll the Loop completely + ;// All the 16 src values are loaded at once into 8 registers : SrcDst (above) + ;// 0<= armVCM4P10_PosToVCol4x4[i] <=2 for any 'i<16' + ;// So the only values of pVRow[i] that need to be loaded are for i=0,1,2 + ;// These 3 values are loaded into rowLuma01 and rowLuma23 (above) + ;// We first calculate pVRow[armVCM4P10_PosToVCol4x4[i]]) << Shift which fits into 16 bits (above) + ;// Then the product pSrcDst[i] * (pVRow[armVCM4P10_PosToVCol4x4[i]] << Shift) is calculated + ;// Here we interleave the PKHBT operations for various rows to avoide pipeline stalls + ;// + ;// We then pack the two 16 bit multiplication result into a word and store at one go + ;// + ;//********************************************************************************************** + + + ;// Row 1 + + + SMULTB temp1,SrcDst00,rowLuma23 ;// pSrcDst[1] * (pVRow[2]< (above) + ;// 0<= armVCM4P10_PosToVCol4x4[i] <=2 for any 'i<16' + ;// So the only values of pVRow[i] that need to be loaded are for i=0,1,2 + ;// These 3 values are loaded into rowLuma01 and rowLuma23 (above) + ;// We first calculate pVRow[armVCM4P10_PosToVCol4x4[i]]) << Shift which fits into 16 bits (above) + ;// Then the product pSrcDst[i] * (pVRow[armVCM4P10_PosToVCol4x4[i]] << Shift) is calculated + ;// Here we interleave the PKHBT operations for various rows to avoide pipeline stalls + ;// + ;// We then pack the two 16 bit multiplication result into a word and store at one go + ;// + ;//********************************************************************************************** + + + ;// Row 1 + + + SMULTB temp1,SrcDst00,rowLuma23 ;// pSrcDst[1] * (pVRow[2]<>3)) + ;// + ;// Calculate A = (((q0-p0)<<2) + (p1-q1) + 4)>>3 + ;// = (4*q0 - 4*p0 + p1 - q1 + 4)>>3 + ;// = ((p1-p0) - (q1-q0) - 3*(p0-q0) + 4)>>3 + + USUB8 t1, p_1, p_0 + USUB8 t2, q_1, q_0 + BEQ NoFilterFilt0 + + LDRB tC0, [pThresholds],#1 + SSUB8 t1, t1, t2 + LDRB tC1, [pThresholds],#1 + M_STR pThresholds, ppThresholds + UHSUB8 t4, p_0, q_0 + ORR tC, tC0, tC1, LSL #16 + USUB8 t5, p_0, q_0 + AND t5, t5, m01 + SHSUB8 t1, t1, t5 + ORR tC, tC, LSL #8 + SSUB8 t1, t1, t5 + SHSUB8 t1, t1, t4 + UQADD8 tC, tC, m01 + SADD8 t1, t1, m01 + USUB8 t5, filt, m01 + SHSUB8 t1, t1, t4 + SEL tC, tC, m00 + + ;// Split into positive and negative part and clip + + SSUB8 t1, t1, m00 + SEL pos, t1, m00 + USUB8 neg, pos, t1 + USUB8 t3, pos, tC + SEL pos, tC, pos + USUB8 t3, neg, tC + SEL neg, tC, neg + UQADD8 P_0, p_0, pos + UQSUB8 Q_0, q_0, pos + UQSUB8 P_0, P_0, neg + UQADD8 Q_0, Q_0, neg + + SUB pQ0, pQ0, srcdstStep, LSL #1 + + ;// Choose to store the filtered + ;// value or the original pixel + USUB8 t1, filt, m01 + SEL P_0, P_0, p_0 + SEL Q_0, Q_0, q_0 + +StoreResultAndExit + + ;//---------Store result--------------- + + ;// P_0 = [r0p0 r1p0 r2p0 r3p0] + ;// Q_0 = [r0q0 r1q0 r2q0 r3q0] + + M_STR P_0, [pQ0], srcdstStep + STR Q_0, [pQ0], #4 + + M_LDRD XY, pBS, pXYBS + M_LDRD alpha, beta, pAlphaBeta0 + + SUB pQ0, pQ0, srcdstStep, LSL #1 + + ADDS XY, XY, XY + M_STR XY, pXYBS + BCC LoopX + +;//-------- Common Exit of LoopY ----------------- + ;// Align the pointers + +ExitLoopY + ADD pBS, pBS, #4 + M_LDRD alpha, beta, pAlphaBeta1 + SUB pQ0, pQ0, #8 + ADD pQ0, pQ0, srcdstStep, LSL #2 + M_STRD alpha, beta, pAlphaBeta0 + + BNE LoopY + MOV r0, #OMX_Sts_NoErr + +;//-----------------End Filter-------------------- + M_END + + ENDIF + + END + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s new file mode 100644 index 0000000000000000000000000000000000000000..00c8354cfc0dc6c3d81f3ef5a4e1c958a2bc2ed6 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s @@ -0,0 +1,437 @@ +;// +;// +;// File Name: omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS ARM1136JS + + + IF ARM1136JS + + +MASK_0 EQU 0x00000000 +MASK_1 EQU 0x01010101 +MASK_2 EQU 0x0000ff00 +LOOP_COUNT EQU 0x50000000 + +;// Declare input registers + +pSrcDst RN 0 +srcdstStep RN 1 +pAlphaArg RN 2 +pBetaArg RN 3 + +pThresholds RN 6 +pBS RN 9 +pQ0 RN 0 +bS RN 2 +bSTemp RN 10 + +alpha RN 6 +alpha0 RN 6 +alpha1 RN 8 + +beta RN 7 +beta0 RN 7 +beta1 RN 9 + +;// Declare Local/Temporary variables + +;// Pixels +p_0 RN 3 +p_1 RN 5 +q_0 RN 8 +q_1 RN 9 + +;// Unpacking +mask RN 11 + +row0 RN 2 +row1 RN 4 +row2 RN 5 +row3 RN 3 + +row4 RN 8 +row5 RN 9 +row6 RN 10 +row7 RN 12 + +tunpk0 RN 2 +tunpk2 RN 10 +tunpk3 RN 12 + +tunpk4 RN 4 +tunpk5 RN 5 +tunpk6 RN 14 +tunpk7 RN 2 + +;// Filtering + +dp0q0 RN 12 +dp1p0 RN 12 +dq1q0 RN 12 + +ap0q0 RN 4 +filt RN 2 + +m00 RN 14 +m01 RN 11 + +pQ0 RN 0 +Step RN 1 + +;// Output + +P_0 RN 6 +Q_0 RN 7 + +;//Declarations for bSLT4 kernel + +tC RN 12 +tC0 RN 5 +tC1 RN 12 +pos RN 5 +neg RN 9 + +;//Declarations for bSGE4 kernel + + +;// Miscellanous +XY RN 8 + +a RN 10 +t1 RN 10 +t2 RN 12 +t3 RN 14 +t4 RN 6 +t5 RN 5 + + + ;// Allocate stack memory + M_ALLOC4 ppThresholds,4 + M_ALLOC8 pAlphaBeta0,8 + M_ALLOC8 pAlphaBeta1,8 + M_ALLOC8 pXYBS,4 + M_ALLOC4 ppBS,4 + + ;// Function header + M_START omxVCM4P10_FilterDeblockingChroma_VerEdge_I, r11 + + ;//Input arguments on the stack + M_ARG ppThresholdsArg, 4 + M_ARG ppBSArg, 4 + + LDRB alpha1, [pAlphaArg,#1] + LDRB beta1, [pBetaArg,#1] + M_LDR pThresholds, ppThresholdsArg + LDR a,=MASK_1 + LDRB beta0, [pBetaArg] + M_STR pThresholds, ppThresholds + LDRB alpha0, [pAlphaArg] + + MUL alpha1, alpha1, a + MUL beta1, beta1, a + MUL alpha0, alpha0, a + MUL beta0, beta0, a + + M_STRD alpha1, beta1, pAlphaBeta1 + M_LDR pBS, ppBSArg + M_STRD alpha0, beta0, pAlphaBeta0 + + LDR XY,=LOOP_COUNT + M_STRD XY, pBS, pXYBS + + +LoopY +LoopX +;//---------------Load Pixels------------------- + +;//----------------Pack q0-q1----------------------- + LDRH bS, [pBS], #8 + LDR mask, =MASK_2 + + M_LDRH row4, [pQ0], srcdstStep + CMP bS, #0 + M_STR pBS, ppBS + M_LDRH row5, [pQ0], srcdstStep + BEQ.W NoFilterBS0 + LDRH row6, [pQ0] + LDRH row7, [pQ0, srcdstStep] + + ;// row4 = [0 0 r0q0 r0q1] + ;// row5 = [0 0 r1q0 r1q1] + ;// row6 = [0 0 r2q0 r2q1] + ;// row7 = [0 0 r3q0 r3q1] + + AND tunpk4, mask, row4 + AND tunpk5, mask, row4, LSL#8 + UXTAB tunpk4, tunpk4, row5, ROR#8 + UXTAB tunpk5, tunpk5, row5 + AND tunpk6, mask, row6 + AND tunpk7, mask, row6, LSL#8 + UXTAB tunpk6, tunpk6, row7, ROR#8 + UXTAB tunpk7, tunpk7, row7 + + ;// tunpk4 = [0 0 r0q0 r1q0] + ;// tunpk5 = [0 0 r0q1 r1q1] + ;// tunpk6 = [0 0 r2q0 r3q0] + ;// tunpk7 = [0 0 r2q1 r3q1] + + SUB pQ0, pQ0, srcdstStep, LSL #1 + SUB pQ0, pQ0, #2 + + PKHBT q_1, tunpk6, tunpk4, LSL#16 + PKHBT q_0, tunpk7, tunpk5, LSL#16 + + ;// q_0 = [r0q0 r1q0 r2q0 r3q0] + ;// q_1 = [r0q1 r1q1 r2q1 r3q1] + + +;//----------------Pack p0-p1----------------------- + + M_LDRH row0, [pQ0], srcdstStep + M_LDRH row1, [pQ0], srcdstStep + LDRH row2, [pQ0] + LDRH row3, [pQ0, srcdstStep] + + ;// row0 = [0 0 r0p0 r0p1] + ;// row1 = [0 0 r1p0 r1p1] + ;// row2 = [0 0 r2p0 r2p1] + ;// row3 = [0 0 r3p0 r3p1] + + AND tunpk2, mask, row0 + AND tunpk6, mask, row0, LSL#8 + UXTAB tunpk2, tunpk2, row1, ROR#8 + UXTAB tunpk6, tunpk6, row1 + + AND tunpk0, mask, row2 + AND tunpk3, mask, row2, LSL#8 + UXTAB tunpk0, tunpk0, row3, ROR#8 + UXTAB tunpk3, tunpk3, row3 + + ;// tunpk2 = [0 0 r0p0 r1p0] + ;// tunpk6 = [0 0 r0p1 r1p1] + ;// tunpk0 = [0 0 r2p0 r3p0] + ;// tunpk3 = [0 0 r2p1 r3p1] + + PKHBT p_0, tunpk0, tunpk2, LSL#16 + M_LDR bSTemp, ppBS + PKHBT p_1, tunpk3, tunpk6, LSL#16 + + ;// p_0 = [r0p0 r1p0 r2p0 r3p0] + ;// p_1 = [r0p1 r1p1 r2p1 r3p1] + +;//--------------Filtering Decision ------------------- + USUB8 dp0q0, p_0, q_0 + LDR m01, =MASK_1 + LDRH bSTemp, [bSTemp ,#-8] + MOV m00, #MASK_0 ;// 00000000 mask + + MOV filt, m01 + TST bSTemp, #0xff00 + MOVEQ filt, filt, LSL #16 + TST bSTemp, #0xff + MOVEQ filt, filt, LSR #16 + TST bSTemp, #4 + + ;// Check |p0-q0|>3)) + ;// + ;// Calculate A = (((q0-p0)<<2) + (p1-q1) + 4)>>3 + ;// = (4*q0 - 4*p0 + p1 - q1 + 4)>>3 + ;// = ((p1-p0) - (q1-q0) - 3*(p0-q0) + 4)>>3 + + USUB8 t1, p_1, p_0 + USUB8 t2, q_1, q_0 + BEQ NoFilterFilt0 + + LDRB tC0, [pThresholds], #1 + SSUB8 t1, t1, t2 + LDRB tC1, [pThresholds], #3 + M_STR pThresholds, ppThresholds + UHSUB8 t4, p_0, q_0 + ORR tC, tC1, tC0, LSL #16 + USUB8 t5, p_0, q_0 + AND t5, t5, m01 + SHSUB8 t1, t1, t5 + ORR tC, tC, LSL #8 + SSUB8 t1, t1, t5 + SHSUB8 t1, t1, t4 + UQADD8 tC, tC, m01 + SADD8 t1, t1, m01 + USUB8 t5, filt, m01 + SHSUB8 t1, t1, t4 + SEL tC, tC, m00 + + ;// Split into positive and negative part and clip + + SSUB8 t1, t1, m00 + SEL pos, t1, m00 + USUB8 neg, pos, t1 + USUB8 t3, pos, tC + SEL pos, tC, pos + USUB8 t3, neg, tC + SEL neg, tC, neg + UQADD8 P_0, p_0, pos + UQSUB8 Q_0, q_0, pos + UQSUB8 P_0, P_0, neg + UQADD8 Q_0, Q_0, neg + + ;// Choose to store the filtered + ;// value or the original pixel + USUB8 t1, filt, m01 + SEL P_0, P_0, p_0 + SEL Q_0, Q_0, q_0 + +StoreResultAndExit + + ;//---------Store result--------------- + + ;// P_0 = [r0p0 r1p0 r2p0 r3p0] + ;// Q_0 = [r0q0 r1q0 r2q0 r3q0] + + SUB pQ0, pQ0, srcdstStep, LSL #1 + ADD pQ0, pQ0, #1 + + MOV t1, Q_0, LSR #24 + STRB t1, [pQ0, #1] + MOV t1, P_0, LSR #24 + M_STRB t1, [pQ0], srcdstStep + + MOV t1, Q_0, LSR #16 + STRB t1, [pQ0, #1] + MOV t1, P_0, LSR #16 + M_STRB t1, [pQ0], srcdstStep + + MOV t1, P_0, LSR #8 + STRB t1, [pQ0] + STRB P_0, [pQ0, srcdstStep] + MOV t1, Q_0, LSR #8 + STRB t1, [pQ0, #1]! + STRB Q_0, [pQ0, srcdstStep] + + M_LDRD XY, pBS, pXYBS + M_LDRD alpha, beta, pAlphaBeta1 + + SUB pQ0, pQ0, srcdstStep, LSL #1 + ADD pQ0, pQ0, #4 + + ADDS XY, XY, XY + M_STR XY, pXYBS + BCC LoopX + +;//-------- Common Exit of LoopY ----------------- + ;// Align the pointers + +ExitLoopY + + M_LDR pThresholds, ppThresholds + SUB pQ0, pQ0, #8 + ADD pQ0, pQ0, srcdstStep, LSL #2 + SUB pBS, pBS, #14 + SUB pThresholds, pThresholds, #6 + M_STR pThresholds, ppThresholds + + M_LDRD alpha, beta, pAlphaBeta0 + + BNE LoopY + MOV r0, #OMX_Sts_NoErr +;//-----------------End Filter-------------------- + + M_END + + ENDIF + + END + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s new file mode 100644 index 0000000000000000000000000000000000000000..1b840805c7b136712584e84fb13b82d2be3e38be --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s @@ -0,0 +1,331 @@ +;// +;// +;// File Name: omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS ARM1136JS + + IMPORT armVCM4P10_DeblockingLumabSLT4_unsafe + IMPORT armVCM4P10_DeblockingLumabSGE4_unsafe + + + + IF ARM1136JS + + +MASK_0 EQU 0x00000000 +MASK_1 EQU 0x01010101 +MASK_2 EQU 0xff00ff00 +LOOP_COUNT EQU 0x11110000 + +;// Declare input registers + +pSrcDst RN 0 +srcdstStep RN 1 +pAlphaArg RN 2 +pBetaArg RN 3 + +pThresholds RN 14 +pBS RN 9 +pQ0 RN 0 +bS RN 2 + +alpha RN 6 +alpha0 RN 6 +alpha1 RN 8 + +beta RN 7 +beta0 RN 7 +beta1 RN 9 + +;// Declare Local/Temporary variables + +;// Pixels +p_0 RN 3 +p_1 RN 5 +p_2 RN 4 +p_3 RN 2 +q_0 RN 8 +q_1 RN 9 +q_2 RN 10 +q_3 RN 12 + +;// Filtering + +dp0q0 RN 12 +dp1p0 RN 12 +dq1q0 RN 12 +dp2p0 RN 12 +dq2q0 RN 12 + +ap0q0 RN 1 +filt RN 2 + +m00 RN 14 +m01 RN 11 + +apflg RN 0 +aqflg RN 6 +apqflg RN 0 + + +;//Declarations for bSLT4 kernel + +tC0 RN 7 +ptC0 RN 1 + +pQ0a RN 0 +Stepa RN 1 +maska RN 14 + +P0a RN 1 +P1a RN 8 +Q0a RN 7 +Q1a RN 11 + +;//Declarations for bSGE4 kernel + +pQ0b RN 0 +Stepb RN 1 +maskb RN 14 + +P0b RN 6 +P1b RN 7 +P2b RN 1 +P3b RN 3 + +Q0b RN 9 +Q1b RN 0 +Q2b RN 2 +Q3b RN 3 + +;// Miscellanous +XY RN 8 +t0 RN 3 +t1 RN 12 +t2 RN 14 +t7 RN 7 +t4 RN 4 +t5 RN 1 +t8 RN 6 +a RN 0 + + + + + ;// Allocate stack memory + M_ALLOC4 ppThresholds,4 + M_ALLOC4 pQ_3,4 + M_ALLOC4 pP_3,4 + M_ALLOC8 pAlphaBeta0,8 + M_ALLOC8 pAlphaBeta1,8 + M_ALLOC8 pXYBS,4 + M_ALLOC4 ppBS,4 + M_ALLOC8 ppQ0Step,4 + M_ALLOC4 pStep,4 + + ;// Function header + M_START omxVCM4P10_FilterDeblockingLuma_HorEdge_I, r11 + + ;//Input arguments on the stack + M_ARG ppThresholdsArg, 4 + M_ARG ppBSArg, 4 + + LDR t4,=MASK_1 + + LDRB alpha0, [pAlphaArg] + LDRB beta0, [pBetaArg] + LDRB alpha1, [pAlphaArg,#1] + LDRB beta1, [pBetaArg,#1] + + MUL alpha0, alpha0, t4 + MUL beta0, beta0, t4 + MUL alpha1, alpha1, t4 + MUL beta1, beta1, t4 + + M_STRD alpha0, beta0, pAlphaBeta0 + M_STRD alpha1, beta1, pAlphaBeta1 + + LDR XY,=LOOP_COUNT + M_LDR pBS, ppBSArg + M_LDR pThresholds, ppThresholdsArg + M_STR srcdstStep, pStep + M_STRD XY, pBS, pXYBS + SUB pQ0, pQ0, srcdstStep, LSL #2 + M_STR pThresholds, ppThresholds +LoopY +LoopX +;//---------------Load Pixels------------------- + M_STR pQ0, ppQ0Step + M_LDR p_3, [pQ0], srcdstStep + M_LDR p_2, [pQ0], srcdstStep + M_STR p_3, pP_3 + LDRB bS, [pBS], #1 + M_STR pBS, ppBS + M_LDR p_1, [pQ0], srcdstStep + CMP bS, #0 + M_LDR p_0, [pQ0], srcdstStep + M_LDR q_0, [pQ0], srcdstStep + M_LDR q_1, [pQ0], srcdstStep + M_LDR q_2, [pQ0], srcdstStep + M_LDR q_3, [pQ0], srcdstStep + BEQ NoFilterBS0 + CMP bS, #4 + M_STR q_3, pQ_3 + +;//--------------Filtering Decision ------------------- + LDR m01, =MASK_1 ;// 01010101 mask + MOV m00, #MASK_0 ;// 00000000 mask + + ;// Check |p0-q0|= roi.width. +;// dx or dy is in the range [0-3]. +;// roi.width or roi.height is not out of range {4, 8, 16}. +;// If roi.width is equal to 4, Dst is 4 byte aligned. +;// If roi.width is equal to 8, pDst is 8 byte aligned. +;// If roi.width is equal to 16, pDst is 16 byte aligned. +;// srcStep and dstStep is multiple of 8. +;// +;// + + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS ARM1136JS + + EXPORT omxVCM4P10_InterpolateLuma + + IF ARM1136JS + IMPORT armVCM4P10_InterpolateLuma_Copy4x4_unsafe + IMPORT armVCM4P10_InterpolateLuma_HorAlign9x_unsafe + IMPORT armVCM4P10_InterpolateLuma_VerAlign4x_unsafe + IMPORT armVCM4P10_Average_4x4_Align0_unsafe + IMPORT armVCM4P10_Average_4x4_Align2_unsafe + IMPORT armVCM4P10_Average_4x4_Align3_unsafe + IMPORT armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe + IMPORT armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe + ENDIF + + IF ARM1136JS + IMPORT armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe + IMPORT armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe + IMPORT armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe + IMPORT armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe + ENDIF + + + +;// Declare input registers +pSrc RN 0 +srcStep RN 1 +pDst RN 2 +dstStep RN 3 +iHeight RN 4 +iWidth RN 5 + +;// Declare other intermediate registers +idx RN 6 +idy RN 7 +index RN 6 +Temp RN 12 +pArgs RN 11 + + + ;// End of CortexA8 + +;//------------------------------------------------------------------------------------------------------------------------- +;//------------------------------------------------------------------------------------------------------------------------- + IF ARM1136JS + + + M_ALLOC4 ppDst, 8 + M_ALLOC4 ppSrc, 8 + M_ALLOC4 ppArgs, 16 + M_ALLOC4 pBuffer, 120 ;// 120 = 12x10 + M_ALLOC8 pInterBuf, 120 ;// 120 = 12*5*2 + M_ALLOC8 pTempBuf, 32 ;// 32 = 8*4 + + ;// Function header + ;// Interpolation of luma is implemented by processing block of pixels, size 4x4 at a time. + ;// Depending on the values of motion vector fractional parts (dx,dy), one out of 16 cases will be processed. + ;// Registers r4, r5, r6 to be preserved by internal unsafe functions + ;// r4 - iHeight + ;// r5 - iWidth + ;// r6 - index + M_START omxVCM4P10_InterpolateLuma, r11 + +;// Declare other intermediate registers +idx RN 6 +idy RN 7 +index RN 6 +Temp RN 12 +pArgs RN 11 + +pBuf RN 8 +Height RN 9 +bufStep RN 9 + + ;// Define stack arguments + M_ARG ptridx, 4 + M_ARG ptridy, 4 + M_ARG ptrWidth, 4 + M_ARG ptrHeight, 4 + + ;// Load structure elements of roi + M_LDR idx, ptridx + M_LDR idy, ptridy + M_LDR iWidth, ptrWidth + M_LDR iHeight, ptrHeight + + M_PRINTF "roi.width %d\n", iWidth + M_PRINTF "roi.height %d\n", iHeight + + ADD index, idx, idy, LSL #2 ;// [index] = [idy][idx] + M_ADR pArgs, ppArgs + +InterpolateLuma +Block4x4WidthLoop +Block4x4HeightLoop + + STM pArgs, {pSrc,srcStep,pDst,dstStep} + M_ADR pBuf, pBuffer + + ;// switch table using motion vector as index + M_SWITCH index, L + M_CASE Case_0 + M_CASE Case_1 + M_CASE Case_2 + M_CASE Case_3 + M_CASE Case_4 + M_CASE Case_5 + M_CASE Case_6 + M_CASE Case_7 + M_CASE Case_8 + M_CASE Case_9 + M_CASE Case_a + M_CASE Case_b + M_CASE Case_c + M_CASE Case_d + M_CASE Case_e + M_CASE Case_f + M_ENDSWITCH + +Case_0 + ;// Case G + M_PRINTF "Case 0 \n" + + BL armVCM4P10_InterpolateLuma_Copy4x4_unsafe + B Block4x4LoopEnd + +Case_1 + ;// Case a + M_PRINTF "Case 1 \n" + + SUB pSrc, pSrc, #2 + MOV Height, #4 + BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe + BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe + BL armVCM4P10_Average_4x4_Align2_unsafe + B Block4x4LoopEnd +Case_2 + ;// Case b + M_PRINTF "Case 2 \n" + + SUB pSrc, pSrc, #2 + MOV Height, #4 + BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe + BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe + B Block4x4LoopEnd +Case_3 + ;// Case c + M_PRINTF "Case 3 \n" + + SUB pSrc, pSrc, #2 + MOV Height, #4 + BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe + BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe + BL armVCM4P10_Average_4x4_Align3_unsafe + B Block4x4LoopEnd +Case_4 + ;// Case d + M_PRINTF "Case 4 \n" + + SUB pSrc, pSrc, srcStep, LSL #1 + MOV Height, #9 + BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe + BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe + BL armVCM4P10_Average_4x4_Align0_unsafe + + B Block4x4LoopEnd +Case_5 + ;// Case e + M_PRINTF "Case 5 \n" + + SUB pSrc, pSrc, #2 + MOV Height, #4 + M_ADR pDst, pTempBuf + MOV dstStep, #4 + BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe + BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe + M_ADR pArgs, ppArgs + LDM pArgs, {pSrc, srcStep, pDst, dstStep} + SUB pSrc, pSrc, srcStep, LSL #1 + M_ADR pBuf, pBuffer + MOV Height, #9 + BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe + BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe + M_ADR pSrc, pTempBuf + MOV srcStep, #4 + BL armVCM4P10_Average_4x4_Align0_unsafe + + + B Block4x4LoopEnd +Case_6 + ;// Case f + M_PRINTF "Case 6 \n" + + SUB pSrc, pSrc, #2 + SUB pSrc, pSrc, srcStep, LSL #1 + MOV Height, #9 + BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe + M_ADR pBuf, pInterBuf + BL armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe + M_ADR idy, pTempBuf + BL armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe + BL armVCM4P10_Average_4x4_Align0_unsafe + B Block4x4LoopEnd +Case_7 + ;// Case g + M_PRINTF "Case 7 \n" + + SUB pSrc, pSrc, #2 + MOV Height, #4 + M_ADR pDst, pTempBuf + MOV dstStep, #4 + BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe + BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe + M_ADR pArgs, ppArgs + LDM pArgs, {pSrc, srcStep, pDst, dstStep} + SUB pSrc, pSrc, srcStep, LSL #1 + ADD pSrc, pSrc, #1 + M_ADR pBuf, pBuffer + MOV Height, #9 + BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe + BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe + M_ADR pSrc, pTempBuf + MOV srcStep, #4 + BL armVCM4P10_Average_4x4_Align0_unsafe + + B Block4x4LoopEnd +Case_8 + ;// Case h + M_PRINTF "Case 8 \n" + + SUB pSrc, pSrc, srcStep, LSL #1 + MOV Height, #9 + BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe + BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe + B Block4x4LoopEnd +Case_9 + ;// Case i + M_PRINTF "Case 9 \n" + + SUB pSrc, pSrc, #2 + SUB pSrc, pSrc, srcStep, LSL #1 + MOV Height, #9 + BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe + ADD pSrc, pSrc, srcStep, LSL #1 + M_ADR pBuf, pInterBuf + BL armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe + M_ADR idy, pTempBuf + BL armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe + BL armVCM4P10_Average_4x4_Align2_unsafe + B Block4x4LoopEnd +Case_a + ;// Case j + M_PRINTF "Case a \n" + + SUB pSrc, pSrc, #2 + SUB pSrc, pSrc, srcStep, LSL #1 + MOV Height, #9 + BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe + ADD pSrc, pSrc, srcStep, LSL #1 + M_ADR pBuf, pInterBuf + BL armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe + B Block4x4LoopEnd +Case_b + ;// Case k + M_PRINTF "Case b \n" + SUB pSrc, pSrc, #2 + SUB pSrc, pSrc, srcStep, LSL #1 + MOV Height, #9 + BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe + ADD pSrc, pSrc, srcStep, LSL #1 + M_ADR pBuf, pInterBuf + BL armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe + M_ADR idy, pTempBuf + BL armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe + BL armVCM4P10_Average_4x4_Align3_unsafe + B Block4x4LoopEnd +Case_c + ;// Case n + M_PRINTF "Case c \n" + + SUB pSrc, pSrc, srcStep, LSL #1 + MOV Height, #9 + BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe + BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe + ADD pSrc, pSrc, srcStep ;// Update pSrc to one row down + BL armVCM4P10_Average_4x4_Align0_unsafe + B Block4x4LoopEnd +Case_d + ;// Case p + M_PRINTF "Case d \n" + SUB pSrc, pSrc, #2 + ADD pSrc, pSrc, srcStep + MOV Height, #4 + M_ADR pDst, pTempBuf + MOV dstStep, #4 + BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe + BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe + M_ADR pArgs, ppArgs + LDM pArgs, {pSrc, srcStep, pDst, dstStep} + SUB pSrc, pSrc, srcStep, LSL #1 + M_ADR pBuf, pBuffer + MOV Height, #9 + BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe + BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe + M_ADR pSrc, pTempBuf + MOV srcStep, #4 + BL armVCM4P10_Average_4x4_Align0_unsafe + B Block4x4LoopEnd +Case_e + ;// Case q + M_PRINTF "Case e \n" + + SUB pSrc, pSrc, #2 + SUB pSrc, pSrc, srcStep, LSL #1 + MOV Height, #9 + BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe + M_ADR pBuf, pInterBuf + BL armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe + M_ADR idy, pTempBuf + BL armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe + ADD pSrc, pSrc, #4 + BL armVCM4P10_Average_4x4_Align0_unsafe + + B Block4x4LoopEnd +Case_f + ;// Case r + M_PRINTF "Case f \n" + SUB pSrc, pSrc, #2 + ADD pSrc, pSrc, srcStep + MOV Height, #4 + M_ADR pDst, pTempBuf + MOV dstStep, #4 + BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe + BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe + M_ADR pArgs, ppArgs + LDM pArgs, {pSrc, srcStep, pDst, dstStep} + SUB pSrc, pSrc, srcStep, LSL #1 + ADD pSrc, pSrc, #1 + M_ADR pBuf, pBuffer + MOV Height, #9 + BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe + BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe + M_ADR pSrc, pTempBuf + MOV srcStep, #4 + BL armVCM4P10_Average_4x4_Align0_unsafe + +Block4x4LoopEnd + + ;// Width Loop + SUBS iWidth, iWidth, #4 + M_ADR pArgs, ppArgs + LDM pArgs, {pSrc,srcStep,pDst,dstStep} ;// Load arguments + ADD pSrc, pSrc, #4 + ADD pDst, pDst, #4 + BGT Block4x4WidthLoop + + ;// Height Loop + SUBS iHeight, iHeight, #4 + M_LDR iWidth, ptrWidth + M_ADR pArgs, ppArgs + ADD pSrc, pSrc, srcStep, LSL #2 + ADD pDst, pDst, dstStep, LSL #2 + SUB pSrc, pSrc, iWidth + SUB pDst, pDst, iWidth + BGT Block4x4HeightLoop + +EndOfInterpolation + MOV r0, #0 + M_END + + ENDIF + + + END + \ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8_s.s new file mode 100644 index 0000000000000000000000000000000000000000..34fedd85b6c63e6fed24c6dad5bceb5c9fb62d6c --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8_s.s @@ -0,0 +1,494 @@ +;// +;// +;// File Name: omxVCM4P10_PredictIntraChroma_8x8_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + EXPORT armVCM4P10_pIndexTable8x8 + +;// Define the processor variants supported by this file + + M_VARIANTS ARM1136JS + + AREA table, DATA +;//------------------------------------------------------- +;// This table for implementing switch case of C in asm by +;// the mehtod of two levels of indexing. +;//------------------------------------------------------- + + M_TABLE armVCM4P10_pIndexTable8x8 + DCD OMX_VC_CHROMA_DC, OMX_VC_CHROMA_HOR + DCD OMX_VC_CHROMA_VERT, OMX_VC_CHROMA_PLANE + + M_TABLE armVCM4P10_MultiplierTableChroma8x8,1 + DCW 3, 2, 1,4 + DCW -3,-2,-1,0 + DCW 1, 2, 3,4 + + IF ARM1136JS + +;//-------------------------------------------- +;// Constants +;//-------------------------------------------- + +BLK_SIZE EQU 0x8 +MUL_CONST0 EQU 0x01010101 +MASK_CONST EQU 0x00FF00FF +MUL_CONST1 EQU 0x80808080 + +;//-------------------------------------------- +;// Scratch variable +;//-------------------------------------------- +y RN 12 +pc RN 15 +return RN 0 +pSrcLeft2 RN 1 +pDst2 RN 2 +sum1 RN 6 +sum2 RN 7 +pTable RN 9 +dstStepx2 RN 11 +leftStepx2 RN 14 +outerCount RN 14 +r0x01010101 RN 10 +r0x00FF00FF RN 11 + +tVal0 RN 0 +tVal1 RN 1 +tVal2 RN 2 +tVal3 RN 3 +tVal4 RN 4 +tVal5 RN 5 +tVal6 RN 6 +tVal7 RN 7 +tVal8 RN 8 +tVal9 RN 9 +tVal10 RN 10 +tVal11 RN 11 +tVal12 RN 12 +tVal14 RN 14 + +b RN 14 +c RN 12 + +p2p0 RN 0 +p3p1 RN 1 +p6p4 RN 2 +p7p5 RN 4 + +pp2pp0 RN 6 +pp3pp1 RN 7 +pp6pp4 RN 8 +pp7pp5 RN 9 + +p3210 RN 10 +p7654 RN 10 + +;//-------------------------------------------- +;// Input Arguments +;//-------------------------------------------- +pSrcLeft RN 0 ;// input pointer +pSrcAbove RN 1 ;// input pointer +pSrcAboveLeft RN 2 ;// input pointer +pDst RN 3 ;// output pointer +leftStep RN 4 ;// input variable +dstStep RN 5 ;// input variable +predMode RN 6 ;// input variable +availability RN 7 ;// input variable + +;//----------------------------------------------------------------------------------------------- +;// omxVCM4P10_PredictIntraChroma_8x8 starts +;//----------------------------------------------------------------------------------------------- + + ;// Write function header + M_START omxVCM4P10_PredictIntraChroma_8x8, r11 + + ;// Define stack arguments + M_ARG LeftStep, 4 + M_ARG DstStep, 4 + M_ARG PredMode, 4 + M_ARG Availability, 4 + + ;// M_STALL ARM1136JS=4 + + LDR pTable,=armVCM4P10_pIndexTable8x8 ;// Load index table for switch case + + + ;// Load argument from the stack + M_LDR predMode, PredMode ;// Arg predMode loaded from stack to reg + M_LDR leftStep, LeftStep ;// Arg leftStep loaded from stack to reg + M_LDR dstStep, DstStep ;// Arg dstStep loaded from stack to reg + M_LDR availability, Availability ;// Arg availability loaded from stack to reg + + MOV y, #BLK_SIZE ;// Outer Loop Count + LDR pc, [pTable, predMode, LSL #2] ;// Branch to the case based on preMode + +OMX_VC_CHROMA_DC + AND availability, availability,#(OMX_VC_UPPER + OMX_VC_LEFT) + CMP availability, #(OMX_VC_UPPER + OMX_VC_LEFT) ;// if(availability & (#OMX_VC_UPPER | #OMX_VC_LEFT)) + LDR r0x01010101, =MUL_CONST0 + BNE TST_UPPER ;// Jump to Upper if not both + LDM pSrcAbove,{tVal8,tVal9} ;// tVal 8 to 9 = pSrcAbove[0 to 7] + + ADD leftStepx2, leftStep,leftStep ;// leftStepx2 = 2 * leftStep + ADD pSrcLeft2, pSrcLeft, leftStep ;// pSrcLeft2 = pSrcLeft + leftStep + + ;// M_STALL ARM1136JS=1 + + UXTB16 tVal7, tVal8 ;// pSrcAbove[0, 2] + UXTB16 tVal8, tVal8, ROR #8 ;// pSrcAbove[1, 3] + UADD16 sum1, tVal7, tVal8 ;// pSrcAbove[0, 2] + pSrcAbove[1, 3] + + UXTB16 tVal7, tVal9 ;// pSrcAbove[4, 6] + UXTB16 tVal9, tVal9, ROR #8 ;// pSrcAbove[5, 7] + UADD16 sum2, tVal7, tVal9 ;// pSrcAbove[0, 2] + pSrcAbove[4, 6] + ADD sum1, sum1, sum1, LSR #16 ;// sum(pSrcAbove[0] to pSrcAbove[3]) + ADD sum2, sum2, sum2, LSR #16 ;// sum(pSrcAbove[4] to pSrcAbove[7]) + UXTH sum1, sum1 ;// upsum1 (Clear the top junk bits) + UXTH sum2, sum2 ;// upsum2 (Clear the top junk bits) + + M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[0] + M_LDRB tVal9, [pSrcLeft2], +leftStepx2 ;// tVal9 = pSrcLeft[1] + M_LDRB tVal4, [pSrcLeft], +leftStepx2 ;// tVal4 = pSrcLeft[2] + M_LDRB tVal12,[pSrcLeft2], +leftStepx2 ;// tVal12= pSrcLeft[3] + ADD tVal2, tVal8, tVal9 ;// tVal14 = tVal8 + tVal9 + + M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[4] + M_LDRB tVal9, [pSrcLeft2], +leftStepx2 ;// tVal9 = pSrcLeft[5] + ADD tVal14, tVal4, tVal12 ;// tVal14 = tVal4 + tVal12 + + LDRB tVal4, [pSrcLeft] ;// tVal4 = pSrcLeft[6] + LDRB tVal12,[pSrcLeft2] ;// tVal12= pSrcLeft[7] + ADD tVal8, tVal8, tVal9 ;// tVal8 = tVal8 + tVal9 + ADD tVal2, tVal2, tVal14 ;// leftsum1 = sum(pSrcLeft[0] to pSrcLeft[3]) + ADD tVal4, tVal4, tVal12 ;// tVal4 = tVal4 + tVal12 + ADD tVal14, tVal8, tVal4 ;// leftsum2 = sum(pSrcLeft[4] to pSrcLeft[7]) + ADD tVal8, tVal14, #2 ;// tVal8 = leftsum2 + 2 + ADD tVal9, sum2, #2 ;// tVal8 = upsum2 + 2 + ADD sum1, sum1, tVal2 ;// sum1 = upsum1 + leftsum1 + ADD sum2, sum2, tVal14 ;// sum2 = upsum2 + leftsum2 + ADD sum1, sum1, #4 ;// (sum1 + 4) + ADD sum2, sum2, #4 ;// (sum2 + 4) + MOV sum1, sum1, LSR #3 ;// (sum1 + 4)>>3 + MOV tVal9, tVal9, LSR #2 ;// (tVal9 + 2)>>2 + MOV tVal8, tVal8, LSR #2 ;// (tVal8 + 2)>>2 + MOV sum2, sum2, LSR #3 ;// (sum2 + 4)>>3 + + MUL tVal0, sum1, r0x01010101 ;// replicate the val in all the bytes + MUL tVal1, tVal9,r0x01010101 ;// replicate the val in all the bytes + MUL tVal8, tVal8,r0x01010101 ;// replicate the val in all the bytes + MUL tVal9, sum2, r0x01010101 ;// replicate the val in all the bytes + + M_STRD tVal0, tVal1, [pDst], dstStep ;// pDst[0 to 7] = tVal 0 to 1 + M_STRD tVal0, tVal1, [pDst], dstStep ;// pDst[8 to 15] = tVal 0 to 1 + M_STRD tVal0, tVal1, [pDst], dstStep ;// pDst[16 to 23] = tVal 0 to 1 + M_STRD tVal0, tVal1, [pDst], dstStep ;// pDst[24 to 31] = tVal 0 to 1 + + M_STRD tVal8, tVal9, [pDst], dstStep ;// pDst[32 to 39] = tVal 8 to 9 + M_STRD tVal8, tVal9, [pDst], dstStep ;// pDst[40 to 47] = tVal 8 to 9 + M_STRD tVal8, tVal9, [pDst], dstStep ;// pDst[48 to 55] = tVal 8 to 9 + M_STRD tVal8, tVal9, [pDst], dstStep ;// pDst[56 to 63] = tVal 8 to 9 + MOV return, #OMX_Sts_NoErr + M_EXIT + +TST_UPPER + + ;// M_STALL ARM1136JS=3 + + CMP availability, #OMX_VC_UPPER ;// if(availability & #OMX_VC_UPPER) + + BNE TST_LEFT ;// Jump to Left if not upper + LDM pSrcAbove,{tVal8,tVal9} ;// tVal 8 to 9 = pSrcAbove[0 to 7] + + ;// M_STALL ARM1136JS=3 + + UXTB16 tVal7, tVal8 ;// pSrcAbove[0, 2] + UXTB16 tVal8, tVal8, ROR #8 ;// pSrcAbove[1, 3] + UADD16 sum1, tVal7, tVal8 ;// pSrcAbove[0, 2] + pSrcAbove[1, 3] + + UXTB16 tVal7, tVal9 ;// pSrcAbove[4, 6] + UXTB16 tVal9, tVal9, ROR #8 ;// pSrcAbove[5, 7] + UADD16 sum2, tVal7, tVal9 ;// pSrcAbove[0, 2] + pSrcAbove[4, 6] + + ADD sum1, sum1, sum1, LSR #16 ;// sum(pSrcAbove[0] to pSrcAbove[3]) + ADD sum2, sum2, sum2, LSR #16 ;// sum(pSrcAbove[4] to pSrcAbove[7]) + + UXTH sum1, sum1 ;// upsum1 (Clear the top junk bits) + UXTH sum2, sum2 ;// upsum2 (Clear the top junk bits) + + ADD sum1, sum1, #2 ;// sum1 + 2 + ADD sum2, sum2, #2 ;// sum2 + 2 + + MOV sum1, sum1, LSR #2 ;// (sum1 + 2)>>2 + MOV sum2, sum2, LSR #2 ;// (sum2 + 2)>>2 + + MUL sum1, sum1,r0x01010101 ;// replicate the val in all the bytes + MUL sum2, sum2,r0x01010101 ;// replicate the val in all the bytes + + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[0 to 7] = tVal 6 to 7 + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[8 to 15] = tVal 6 to 7 + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[16 to 23] = tVal 6 to 7 + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[24 to 31] = tVal 6 to 7 + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[32 to 39] = tVal 6 to 7 + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[40 to 47] = tVal 6 to 7 + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[48 to 55] = tVal 6 to 7 + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[56 to 63] = tVal 6 to 7 + MOV return, #OMX_Sts_NoErr + M_EXIT + +TST_LEFT + ;// M_STALL ARM1136JS=3 + + CMP availability, #OMX_VC_LEFT + BNE TST_COUNT0 + ADD leftStepx2, leftStep,leftStep ;// leftStepx2 = 2 * leftStep + ADD pSrcLeft2, pSrcLeft, leftStep ;// pSrcLeft2 = pSrcLeft + leftStep + + M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[0] + M_LDRB tVal9, [pSrcLeft2], +leftStepx2 ;// tVal9 = pSrcLeft[1] + M_LDRB tVal4, [pSrcLeft], +leftStepx2 ;// tVal4 = pSrcLeft[2] + M_LDRB tVal12,[pSrcLeft2], +leftStepx2 ;// tVal12= pSrcLeft[3] + + ADD tVal6, tVal8, tVal9 ;// tVal6 = tVal8 + tVal9 + + M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[4] + ADD tVal7, tVal4, tVal12 ;// tVal7 = tVal4 + tVal12 + M_LDRB tVal9, [pSrcLeft2], +leftStepx2 ;// tVal9 = pSrcLeft[5] + M_LDRB tVal4, [pSrcLeft], +leftStepx2 ;// tVal4 = pSrcLeft[6] + M_LDRB tVal12,[pSrcLeft2], +leftStepx2 ;// tVal12= pSrcLeft[7] + + ADD tVal8, tVal8, tVal9 ;// tVal8 = tVal8 + tVal9 + ADD sum1, tVal6, tVal7 ;// sum1 = sum(pSrcLeft[0] to pSrcLeft[3]) + ADD tVal4, tVal4, tVal12 ;// tVal4 = tVal4 + tVal12 + ADD sum2, tVal8, tVal4 ;// sum2 = sum(pSrcLeft[4] to pSrcLeft[7]) + + ADD sum1, sum1, #2 ;// sum1 + 2 + ADD sum2, sum2, #2 ;// sum2 + 2 + + MOV sum1, sum1, LSR #2 ;// (sum1 + 2)>>2 + MOV sum2, sum2, LSR #2 ;// (sum2 + 2)>>2 + + MUL tVal6, sum1,r0x01010101 ;// replicate the val in all the bytes + MUL tVal8, sum2,r0x01010101 ;// replicate the val in all the bytes + + ;// M_STALL ARM1136JS=1 + MOV tVal7,tVal6 ;// tVal7 = sum1 + MOV tVal9,tVal8 ;// tVal9 = sum2 + + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[0 to 7] = tVal 6 to 7 + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[8 to 15] = tVal 6 to 7 + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[16 to 23] = tVal 6 to 7 + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[24 to 31] = tVal 6 to 7 + + M_STRD tVal8, tVal9, [pDst], dstStep ;// pDst[32 to 39] = tVal 8 to 9 + M_STRD tVal8, tVal9, [pDst], dstStep ;// pDst[40 to 47] = tVal 8 to 9 + M_STRD tVal8, tVal9, [pDst], dstStep ;// pDst[48 to 55] = tVal 8 to 9 + M_STRD tVal8, tVal9, [pDst], dstStep ;// pDst[56 to 63] = tVal 8 to 9 + + MOV return, #OMX_Sts_NoErr + M_EXIT ;// Macro to exit midway-break frm case + +TST_COUNT0 + LDR sum1, =MUL_CONST1 ;// sum1 = 0x80808080 if(count == 0) + + ;// M_STALL ARM1136JS=2 + + MOV tVal7, sum1 ;// tVal7 = sum1 + + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[0 to 7] = tVal 6 to 7 + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[8 to 15] = tVal 6 to 7 + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[16 to 23] = tVal 6 to 7 + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[24 to 31] = tVal 6 to 7 + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[32 to 39] = tVal 6 to 7 + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[40 to 47] = tVal 6 to 7 + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[48 to 55] = tVal 6 to 7 + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[56 to 63] = tVal 6 to 7 + + MOV return, #OMX_Sts_NoErr + M_EXIT ;// Macro to exit midway-break frm case + +OMX_VC_CHROMA_HOR + + ;// M_STALL ARM1136JS=2 + + ADD pSrcLeft2, pSrcLeft, leftStep ;// pSrcLeft2 = pSrcLeft + leftStep + ADD leftStepx2, leftStep, leftStep ;// leftStepx2 = leftStep * 2 + ADD pDst2, pDst, dstStep ;// pDst2 = pDst + dstStep + ADD dstStepx2, dstStep, dstStep ;// double dstStep + SUB dstStepx2, dstStepx2, #4 ;// double dstStep minus 4 + LDR r0x01010101, =MUL_CONST0 ;// Const to repeat the byte in reg 4 times + M_LDRB tVal6, [pSrcLeft], +leftStepx2 ;// tVal6 = pSrcLeft[0] + M_LDRB tVal7, [pSrcLeft2],+leftStepx2 ;// tVal7 = pSrcLeft[1] + M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[2] + M_LDRB tVal9, [pSrcLeft2],+leftStepx2 ;// tVal9 = pSrcLeft[3] + MUL tVal6, tVal6, r0x01010101 ;// replicate the val in all the bytes + MUL tVal7, tVal7, r0x01010101 ;// replicate the val in all the bytes + MUL tVal8, tVal8, r0x01010101 ;// replicate the val in all the bytes + MUL tVal9, tVal9, r0x01010101 ;// replicate the val in all the bytes + STR tVal6, [pDst], #+4 ;// store {tVal6} at pDst [0 to 3] + STR tVal7, [pDst2], #+4 ;// store {tVal7} at pDst2[0 to 3] + M_STR tVal6, [pDst], dstStepx2 ;// store {tVal6} at pDst [4 to 7] + M_STR tVal7, [pDst2], dstStepx2 ;// store {tVal7} at pDst2[4 to 7] + STR tVal8, [pDst], #+4 ;// store {tVal6} at pDst [0 to 3] + STR tVal9, [pDst2], #+4 ;// store {tVal7} at pDst2[0 to 3] + M_STR tVal8, [pDst], dstStepx2 ;// store {tVal6} at pDst [4 to 7] + M_STR tVal9, [pDst2], dstStepx2 ;// store {tVal7} at pDst2[4 to 7] + M_LDRB tVal6, [pSrcLeft], +leftStepx2 ;// tVal6 = pSrcLeft[4] + M_LDRB tVal7, [pSrcLeft2],+leftStepx2 ;// tVal7 = pSrcLeft[5] + M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[6] + M_LDRB tVal9, [pSrcLeft2],+leftStepx2 ;// tVal9 = pSrcLeft[7] + MUL tVal6, tVal6, r0x01010101 ;// replicate the val in all the bytes + MUL tVal7, tVal7, r0x01010101 ;// replicate the val in all the bytes + MUL tVal8, tVal8, r0x01010101 ;// replicate the val in all the bytes + MUL tVal9, tVal9, r0x01010101 ;// replicate the val in all the bytes + STR tVal6, [pDst], #+4 ;// store {tVal6} at pDst [0 to 3] + STR tVal7, [pDst2], #+4 ;// store {tVal7} at pDst2[0 to 3] + M_STR tVal6, [pDst], dstStepx2 ;// store {tVal6} at pDst [4 to 7] + M_STR tVal7, [pDst2], dstStepx2 ;// store {tVal7} at pDst2[4 to 7] + STR tVal8, [pDst], #+4 ;// store {tVal6} at pDst [0 to 3] + STR tVal9, [pDst2], #+4 ;// store {tVal7} at pDst2[0 to 3] + M_STR tVal8, [pDst], dstStepx2 ;// store {tVal6} at pDst [4 to 7] + M_STR tVal9, [pDst2], dstStepx2 ;// store {tVal7} at pDst2[4 to 7] + MOV return, #OMX_Sts_NoErr + M_EXIT + +OMX_VC_CHROMA_VERT + + ;// M_STALL ARM1136JS=4 + + LDMIA pSrcAbove, {tVal6,tVal7} ;// tVal 6 to 7 = pSrcAbove[0 to 7] + MOV return, #OMX_Sts_NoErr + + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[0 to 7] = tVal 6 to 7 + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[8 to 15] = tVal 6 to 7 + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[16 to 23] = tVal 6 to 7 + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[24 to 31] = tVal 6 to 7 + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[32 to 39] = tVal 6 to 7 + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[40 to 47] = tVal 6 to 7 + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[48 to 55] = tVal 6 to 7 + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[56 to 63] = tVal 6 to 7 + + M_EXIT ;// Macro to exit midway-break frm case + +OMX_VC_CHROMA_PLANE + + ;// M_STALL ARM1136JS=3 + + RSB tVal14, leftStep, leftStep, LSL #3 ;// 7*leftStep + LDRB tVal7, [pSrcAbove, #+7] ;// pSrcAbove[7] + LDRB tVal6, [pSrcLeft, +tVal14] ;// pSrcLeft[7*leftStep] + LDRB tVal8, [pSrcAboveLeft] ;// pSrcAboveLeft[0] + LDRB tVal9, [pSrcAbove, #+6 ] ;// pSrcAbove[6] + LDRB tVal10,[pSrcAbove] ;// pSrcAbove[0] + ADD tVal2, tVal7, tVal6 ;// pSrcAbove[7] + pSrcLeft[7*leftStep] + SUB tVal6, tVal6, tVal8 ;// V0 = pSrcLeft[7*leftStep] - pSrcAboveLeft[0] + SUB tVal7, tVal7, tVal8 ;// H0 = pSrcAbove[7] - pSrcAboveLeft[0] + LSL tVal2, tVal2, #4 ;// a = 16 * (pSrcAbove[15] + pSrcLeft[15*lS]) + ADD tVal2, tVal2, #16 ;// a + 16 + SUB tVal9, tVal9,tVal10 ;// pSrcAbove[6] - pSrcAbove[0] + LDRB tVal8, [pSrcAbove,#+5] ;// pSrcAbove[5] + LDRB tVal10,[pSrcAbove,#+1] ;// pSrcAbove[1] + ADD tVal9, tVal9, tVal9, LSL #1 ;// H1 = 3 * (pSrcAbove[6] - pSrcAbove[0]) + ADD tVal7, tVal9, tVal7, LSL #2 ;// H = H1 + H0 + SUB tVal8, tVal8, tVal10 ;// pSrcAbove[5] - pSrcAbove[1] + LDRB tVal9, [pSrcAbove,#+4] ;// pSrcAbove[4] + LDRB tVal10,[pSrcAbove,#+2] ;// pSrcAbove[2] + ADD tVal7, tVal7, tVal8, LSL #1 ;// H = H + H2 + SUB tVal11, tVal14,leftStep ;// 6*leftStep + ADD tVal11, pSrcLeft, tVal11 ;// pSrcLeft + 6*leftStep + MOV tVal12, pSrcLeft ;// pSrcLeft + SUB tVal9, tVal9, tVal10 ;// pSrcAbove[4] - pSrcAbove[2] + ADD tVal7, tVal7, tVal9 ;// H = H + H3 + M_LDRB tVal8, [tVal11],-leftStep ;// pSrcLeft[6*leftStep] + M_LDRB tVal10,[tVal12],+leftStep ;// pSrcLeft[0] + ADD tVal7, tVal7, tVal7, LSL #4 ;// 17 * H + ADD tVal7, tVal7, #16 ;// 17 * H + 16 + SUB tVal8, tVal8, tVal10 ;// pSrcLeft[6*leftStep] - pSrcLeft[0] + ASR b, tVal7, #5 ;// b = (17 * H + 16) >> 5 + ADD tVal8, tVal8, tVal8, LSL #1 ;// V1 = 3 * (pSrcLeft[6*leftStep] - pSrcLeft[0]) + ADD tVal6, tVal8, tVal6, LSL #2 ;// V = V0 +V1 + M_LDRB tVal8, [tVal11],-leftStep ;// pSrcLeft[5*leftStep] + M_LDRB tVal10,[tVal12],+leftStep ;// pSrcLeft[leftStep] + ADD tVal7, b, b, LSL #1 ;// 3*b + SUB tVal2, tVal2, tVal7 ;// a + 16 - 3*b + SUB tVal7, tVal8, tVal10 ;// pSrcLeft[5*leftStep] - pSrcLeft[leftStep] + M_LDRB tVal8, [tVal11],-leftStep ;// pSrcLeft[4*leftStep] + M_LDRB tVal10,[tVal12],+leftStep ;// pSrcLeft[2*leftStep] + ADD tVal6, tVal6, tVal7, LSL #1 ;// V = V + V2 + LDR r0x00FF00FF, =MASK_CONST ;// r0x00FF00FF = 0x00FF00FF + SUB tVal7, tVal8, tVal10 ;// pSrcLeft[4*leftStep] - pSrcLeft[2*leftStep] + ADD tVal6, tVal6, tVal7 ;// V = V + V7 + SUB dstStep, dstStep, #4 ;// dstStep - 4 + ADD tVal6, tVal6, tVal6, LSL #4 ;// 17*V + ADD tVal6, tVal6, #16 ;// 17*V + 16 + + ;// M_STALL ARM1136JS=1 + + ASR c, tVal6, #5 ;// c = (17*V + 16)>>5 + + ;// M_STALL ARM1136JS=1 + + ADD tVal6, c, c, LSL #1 ;// 3*c + UXTH c, c ;// only in half word + SUB tVal6, tVal2, tVal6 ;// a - 3*b - 3*c + 16 + ORR c, c, c, LSL #16 ;// c c + ADD tVal7, b, b ;// 2b + ADD tVal2, tVal6, tVal7 ;// pp2 = d + 2*b + ADD tVal7, tVal7, b ;// 3b + ORR p2p0, tVal6, tVal2, LSL #16 ;// p2p0 = pack {p2, p0} + UXTH b, b + UXTH tVal7, tVal7 + ORR b, b, b, LSL #16 ;// {b,b} + ORR tVal7, tVal7, tVal7, LSL #16 ;// {3b,3b} + SADD16 p3p1, p2p0, b ;// p3p1 = p2p0 + {b,b} + SADD16 p6p4, p3p1, tVal7 ;// p6p4 = p3p1 + {3b,3b} + SADD16 p7p5, p6p4, b ;// p7p5 = p6p4 + {b,b} + MOV outerCount, #BLK_SIZE ;// Outer Loop Count + +LOOP_PLANE + + USAT16 p7p5, #13, p7p5 ;// clip13(p7) clip13(p5) + USAT16 p6p4, #13, p6p4 ;// clip13(p6) clip13(p4) + USAT16 p3p1, #13, p3p1 ;// clip13(p3) clip13(p1) + USAT16 p2p0, #13, p2p0 ;// clip13(p2) clip13(p0) + + AND pp7pp5, r0x00FF00FF, p7p5, ASR #5 ;// clip8(p7) clip8(p5) + AND pp6pp4, r0x00FF00FF, p6p4, ASR #5 ;// clip8(p6) clip8(p4) + AND pp3pp1, r0x00FF00FF, p3p1, ASR #5 ;// clip8(p3) clip8(p1) + AND pp2pp0, r0x00FF00FF, p2p0, ASR #5 ;// clip8(p2) clip8(p0) + + SUBS outerCount, outerCount, #1 ;// outerCount-- + + ORR p3210, pp2pp0, pp3pp1, LSL #8 ;// pack {p3,p2, p1, p0} + STR p3210, [pDst], #4 ;// store {pDst[0] to pDst[3]} + + ORR p7654, pp6pp4, pp7pp5, LSL #8 ;// pack {p7,p6, p5, p4} + M_STR p7654, [pDst], dstStep ;// store {pDst[4] to pDst[7]} + + SADD16 p7p5, p7p5, c ;// {p7 + c}, {p5 + c} + SADD16 p6p4, p6p4, c ;// {p6 + c}, {p4 + c} + SADD16 p3p1, p3p1, c ;// {p3 + c}, {p1 + c} + SADD16 p2p0, p2p0, c ;// {p2 + c}, {p0 + c} + + BNE LOOP_PLANE ;// Loop for 8 times + MOV return, #OMX_Sts_NoErr + M_END + + ENDIF ;// ARM1136JS + + + + END +;//----------------------------------------------------------------------------------------------- +;// omxVCM4P10_PredictIntraChroma_8x8 ends +;//----------------------------------------------------------------------------------------------- diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_PredictIntra_16x16_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_PredictIntra_16x16_s.s new file mode 100644 index 0000000000000000000000000000000000000000..1557208158041988ad756d2f3a892f4c14df68b0 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_PredictIntra_16x16_s.s @@ -0,0 +1,501 @@ +;// +;// +;// File Name: omxVCM4P10_PredictIntra_16x16_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS ARM1136JS + +;//------------------------------------------------------- +;// This table for implementing switch case of C in asm by +;// the mehtod of two levels of indexing. +;//------------------------------------------------------- + + M_TABLE armVCM4P10_pIndexTable16x16 + DCD OMX_VC_16X16_VERT, OMX_VC_16X16_HOR + DCD OMX_VC_16X16_DC, OMX_VC_16X16_PLANE + + IF ARM1136JS + +;//-------------------------------------------- +;// Constants +;//-------------------------------------------- +BLK_SIZE EQU 0x10 +MUL_CONST0 EQU 0x01010101 +MUL_CONST1 EQU 0x00060004 +MUL_CONST2 EQU 0x00070005 +MUL_CONST3 EQU 0x00030001 +MASK_CONST EQU 0x00FF00FF + +;//-------------------------------------------- +;// Scratch variable +;//-------------------------------------------- +y RN 12 +pc RN 15 + +return RN 0 +innerCount RN 0 +outerCount RN 1 +pSrcLeft2 RN 1 +pDst2 RN 2 +sum RN 6 +pTable RN 9 +temp1 RN 10 +temp2 RN 12 +cMul1 RN 11 +cMul2 RN 12 +count RN 12 +dstStepx2 RN 11 +leftStepx2 RN 14 +r0x01010101 RN 10 +r0x00FF00FF RN 11 + +tVal0 RN 0 +tVal1 RN 1 +tVal2 RN 2 +tVal3 RN 3 +tVal4 RN 4 +tVal5 RN 5 +tVal6 RN 6 +tVal7 RN 7 +tVal8 RN 8 +tVal9 RN 9 +tVal10 RN 10 +tVal11 RN 11 +tVal12 RN 12 +tVal14 RN 14 + +b RN 12 +c RN 14 + +p2p0 RN 0 +p3p1 RN 1 +p6p4 RN 2 +p7p5 RN 4 +p10p8 RN 6 +p11p9 RN 7 +p14p12 RN 8 +p15p13 RN 9 + +p3210 RN 10 +p7654 RN 10 +p111098 RN 10 +p15141312 RN 10 + +;//-------------------------------------------- +;// Declare input registers +;//-------------------------------------------- +pSrcLeft RN 0 ;// input pointer +pSrcAbove RN 1 ;// input pointer +pSrcAboveLeft RN 2 ;// input pointer +pDst RN 3 ;// output pointer +leftStep RN 4 ;// input variable +dstStep RN 5 ;// input variable +predMode RN 6 ;// input variable +availability RN 7 ;// input variable + +;//----------------------------------------------------------------------------------------------- +;// omxVCM4P10_PredictIntra_16x16 starts +;//----------------------------------------------------------------------------------------------- + + ;// Write function header + M_START omxVCM4P10_PredictIntra_16x16, r11 + + ;// Define stack arguments + M_ARG LeftStep, 4 + M_ARG DstStep, 4 + M_ARG PredMode, 4 + M_ARG Availability, 4 + + ;// M_STALL ARM1136JS=4 + + LDR pTable,=armVCM4P10_pIndexTable16x16 ;// Load index table for switch case + + ;// Load argument from the stack + M_LDR predMode, PredMode ;// Arg predMode loaded from stack to reg + M_LDR leftStep, LeftStep ;// Arg leftStep loaded from stack to reg + M_LDR dstStep, DstStep ;// Arg dstStep loaded from stack to reg + M_LDR availability, Availability ;// Arg availability loaded from stack to reg + + MOV y, #BLK_SIZE ;// Outer Loop Count + LDR pc, [pTable, predMode, LSL #2] ;// Branch to the case based on preMode + +OMX_VC_16X16_VERT + LDM pSrcAbove, {tVal6,tVal7,tVal8,tVal9};// tVal 6 to 9 = pSrcAbove[0 to 15] + ADD dstStepx2, dstStep, dstStep ;// double dstStep + ADD pDst2, pDst, dstStep ;// pDst2- pDst advanced by dstStep + + ;// M_STALL ARM1136JS=2 ;// Stall outside the loop + +LOOP_VERT + STM pDst, {tVal6,tVal7,tVal8,tVal9} ;// pDst[0 to 15] = tVal 6 to 9 + SUBS y, y, #2 ;// y-- + ADD pDst, pDst, dstStepx2 ;// pDst advanced by dstStep + STM pDst2, {tVal6,tVal7,tVal8,tVal9} ;// pDst2[16 to 31] = tVal 6 to 9 + ADD pDst2, pDst2, dstStepx2 ;// pDst advanced by dstStep + BNE LOOP_VERT ;// Loop for 8 times + MOV return, #OMX_Sts_NoErr + M_EXIT + + +OMX_VC_16X16_HOR + + ;// M_STALL ARM1136JS=6 + + LDR r0x01010101, =MUL_CONST0 ;// Const to repeat the byte in reg 4 times + MOV y, #4 ;// Outer Loop Count + M_LDRB tVal6, [pSrcLeft], +leftStep ;// tVal6 = pSrcLeft[0 to 3] + ADD pDst2, pDst, dstStep ;// pDst2- pDst advanced by dstStep + M_LDRB tVal7, [pSrcLeft], +leftStep ;// tVal1 = pSrcLeft[4 to 7] + ADD dstStepx2, dstStep, dstStep ;// double dstStep + SUB dstStepx2, dstStepx2, #12 ;// double dstStep minus 12 + +LOOP_HOR + M_LDRB tVal8, [pSrcLeft], +leftStep ;// tVal8 = pSrcLeft[0 to 3] + MUL tVal6, tVal6, r0x01010101 ;// replicate the val in all the bytes + M_LDRB tVal9, [pSrcLeft], +leftStep ;// tVal9 = pSrcLeft[4 to 7] + MUL tVal7, tVal7, r0x01010101 ;// replicate the val in all the bytes + SUBS y, y, #1 ;// y-- + STR tVal6, [pDst], #+4 ;// store {tVal6} at pDst[0 to 3] + STR tVal7, [pDst2], #+4 ;// store {tVal7} at pDst2[0 to 3] + STR tVal6, [pDst], #+4 ;// store {tVal6} at pDst[4 to 7] + STR tVal7, [pDst2], #+4 ;// store {tVal7} at pDst2[4 to 7] + MUL tVal8, tVal8, r0x01010101 ;// replicate the val in all the bytes + STR tVal6, [pDst], #+4 ;// store {tVal6} at pDst[8 to 11] + STR tVal7, [pDst2], #+4 ;// store {tVal7} at pDst2[8 to 11] + MUL tVal9, tVal9, r0x01010101 ;// replicate the val in all the bytes + M_STR tVal6, [pDst], dstStepx2 ;// store {tVal6} at pDst[12 to 15] + M_STR tVal7, [pDst2], dstStepx2 ;// store {tVal7} at pDst2[12 to 15] + STR tVal8, [pDst], #+4 ;// store {tVal6} at pDst[0 to 3] + STR tVal9, [pDst2], #+4 ;// store {tVal7} at pDst2[0 to 3] + STR tVal8, [pDst], #+4 ;// store {tVal6} at pDst[4 to 7] + STR tVal9, [pDst2], #+4 ;// store {tVal7} at pDst2[4 to 7] + STR tVal8, [pDst], #+4 ;// store {tVal6} at pDst[8 to 11] + STR tVal9, [pDst2], #+4 ;// store {tVal7} at pDst2[8 to 11] + M_STR tVal8, [pDst], dstStepx2 ;// store {tVal6} at pDst[12 to 15] + M_LDRB tVal6, [pSrcLeft], +leftStep ;// tVal6 = pSrcLeft[0 to 3] + M_STR tVal9, [pDst2], dstStepx2 ;// store {tVal7} at pDst2[12 to 15] + M_LDRB tVal7, [pSrcLeft], +leftStep ;// tVal7 = pSrcLeft[4 to 7] + BNE LOOP_HOR ;// Loop for 3 times + MOV return, #OMX_Sts_NoErr + M_EXIT + +OMX_VC_16X16_DC + + ;// M_STALL ARM1136JS=2 + + MOV count, #0 ;// count = 0 + TST availability, #OMX_VC_UPPER ;// if(availability & #OMX_VC_UPPER) + BEQ TST_LEFT ;// Jump to Left if not upper + LDM pSrcAbove,{tVal8,tVal9,tVal10,tVal11};// tVal 8 to 11 = pSrcAbove[0 to 15] + ADD count, count, #1 ;// if upper inc count by 1 + + ;// M_STALL ARM1136JS=2 + + UXTB16 tVal2, tVal8 ;// pSrcAbove[0, 2] + UXTB16 tVal6, tVal9 ;// pSrcAbove[4, 6] + UADD16 tVal2, tVal2, tVal6 ;// pSrcAbove[0, 2] + pSrcAbove[4, 6] + UXTB16 tVal8, tVal8, ROR #8 ;// pSrcAbove[1, 3] + UXTB16 tVal9, tVal9, ROR #8 ;// pSrcAbove[5, 7] + UADD16 tVal8, tVal8, tVal9 ;// pSrcAbove[1, 3] + pSrcAbove[5, 7] + UADD16 tVal2, tVal2, tVal8 ;// sum(pSrcAbove[0] to pSrcAbove[7]) + + UXTB16 tVal8, tVal10 ;// pSrcAbove[8, 10] + UXTB16 tVal9, tVal11 ;// pSrcAbove[12, 14] + UADD16 tVal8, tVal8, tVal9 ;// pSrcAbove[8, 10] + pSrcAbove[12, 14] + UXTB16 tVal10, tVal10, ROR #8 ;// pSrcAbove[9, 11] + UXTB16 tVal11, tVal11, ROR #8 ;// pSrcAbove[13, 15] + UADD16 tVal10, tVal10, tVal11 ;// pSrcAbove[9, 11] + pSrcAbove[13, 15] + UADD16 tVal8, tVal8, tVal10 ;// sum(pSrcAbove[8] to pSrcAbove[15]) + + UADD16 tVal2, tVal2, tVal8 ;// sum(pSrcAbove[0] to pSrcAbove[15]) + + ;// M_STALL ARM1136JS=1 + + ADD tVal2, tVal2, tVal2, LSR #16 ;// sum(pSrcAbove[0] to pSrcAbove[15]) + + ;// M_STALL ARM1136JS=1 + + UXTH sum, tVal2 ;// Extract the lower half for result + +TST_LEFT + TST availability, #OMX_VC_LEFT + BEQ TST_COUNT + ADD leftStepx2, leftStep,leftStep ;// leftStepx2 = 2 * leftStep + ADD pSrcLeft2, pSrcLeft, leftStep ;// pSrcLeft2 = pSrcLeft + leftStep + + M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[0] + M_LDRB tVal9, [pSrcLeft2], +leftStepx2 ;// tVal9 = pSrcLeft[1] + M_LDRB tVal10, [pSrcLeft], +leftStepx2 ;// tVal10= pSrcLeft[2] + M_LDRB tVal11, [pSrcLeft2],+leftStepx2 ;// tVal11= pSrcLeft[3] + ADD tVal7, tVal8, tVal9 ;// tVal7 = tVal8 + tVal9 + ADD count, count, #1 ;// Inc Counter if Left is available + ADD tVal6, tVal10, tVal11 ;// tVal6 = tVal10 + tVal11 + + M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[0] + M_LDRB tVal9, [pSrcLeft2], +leftStepx2 ;// tVal9 = pSrcLeft[1] + M_LDRB tVal10, [pSrcLeft], +leftStepx2 ;// tVal10= pSrcLeft[2] + M_LDRB tVal11, [pSrcLeft2],+leftStepx2 ;// tVal11= pSrcLeft[3] + ADD sum, tVal7, tVal6 ;// sum = tVal8 + tVal10 + ADD tVal8, tVal8, tVal9 ;// tVal8 = tVal8 + tVal9 + ADD tVal10, tVal10, tVal11 ;// tVal10= tVal10 + tVal11 + ADD tVal7, tVal8, tVal10 ;// tVal7 = tVal8 + tVal10 + + + M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[0] + M_LDRB tVal9, [pSrcLeft2], +leftStepx2 ;// tVal9 = pSrcLeft[1] + M_LDRB tVal10, [pSrcLeft], +leftStepx2 ;// tVal10= pSrcLeft[2] + M_LDRB tVal11, [pSrcLeft2],+leftStepx2 ;// tVal11= pSrcLeft[3] + ADD sum, sum, tVal7 ;// sum = sum + tVal7 + ADD tVal8, tVal8, tVal9 ;// tVal8 = tVal8 + tVal9 + ADD tVal10, tVal10, tVal11 ;// tVal10= tVal10 + tVal11 + ADD tVal7, tVal8, tVal10 ;// tVal7 = tVal8 + tVal10 + + + M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[0] + M_LDRB tVal9, [pSrcLeft2], +leftStepx2 ;// tVal9 = pSrcLeft[1] + M_LDRB tVal10, [pSrcLeft], +leftStepx2 ;// tVal10= pSrcLeft[2] + M_LDRB tVal11, [pSrcLeft2],+leftStepx2 ;// tVal11= pSrcLeft[3] + ADD sum, sum, tVal7 ;// sum = sum + tVal7 + ADD tVal8, tVal8, tVal9 ;// tVal8 = tVal8 + tVal9 + ADD tVal10, tVal10, tVal11 ;// tVal10= tVal10 + tVal11 + ADD tVal7, tVal8, tVal10 ;// tVal7 = tVal8 + tVal10 + ADD sum, sum, tVal7 ;// sum = sum + tVal7 + +TST_COUNT + CMP count, #0 ;// if(count == 0) + MOVEQ sum, #128 ;// sum = 128 if(count == 0) + BEQ TST_COUNT0 ;// if(count == 0) + CMP count, #1 ;// if(count == 1) + ADDEQ sum, sum, #8 ;// sum += 8 if(count == 1) + ADDNE sum, sum, tVal2 ;// sum = sumleft + sumupper + ADDNE sum, sum, #16 ;// sum += 16 if(count == 2) + + ;// M_STALL ARM1136JS=1 + + UXTH sum, sum ;// sum only byte rest cleared + + ;// M_STALL ARM1136JS=1 + + LSREQ sum, sum, #4 ;// sum >> 4 if(count == 1) + + ;// M_STALL ARM1136JS=1 + + LSRNE sum, sum, #5 ;// sum >> 5 if(count == 2) + +TST_COUNT0 + + ;// M_STALL ARM1136JS=1 + + ORR sum, sum, sum, LSL #8 ;// sum replicated in two halfword + + ;// M_STALL ARM1136JS=1 + + ORR tVal6, sum, sum, LSL #16 ;// sum replicated in all bytes + CPY tVal7, tVal6 ;// tVal1 = tVal0 + CPY tVal8, tVal6 ;// tVal2 = tVal0 + CPY tVal9, tVal6 ;// tVal3 = tVal0 + ADD dstStepx2, dstStep, dstStep ;// double dstStep + ADD pDst2, pDst, dstStep ;// pDst2- pDst advanced by dstStep + MOV y, #BLK_SIZE ;// Outer Loop Count + +LOOP_DC + STM pDst, {tVal6,tVal7,tVal8,tVal9} ;// pDst[0 to 15] = tVal 6 to 9 + SUBS y, y, #2 ;// y-- + ADD pDst, pDst, dstStepx2 ;// pDst advanced by dstStep + STM pDst2, {tVal6,tVal7,tVal8,tVal9} ;// pDst2[16 to 31] = tVal 6 to 9 + ADD pDst2, pDst2, dstStepx2 ;// pDst advanced by dstStep + BNE LOOP_DC ;// Loop for 8 times + + MOV return, #OMX_Sts_NoErr + M_EXIT + +OMX_VC_16X16_PLANE + + ;// M_STALL ARM1136JS=3 + RSB tVal14, leftStep, leftStep, LSL #4 ;// tVal14 = 15*leftStep + + ;// M_STALL ARM1136JS=2 + LDRB tVal10, [pSrcLeft, tVal14] ;// tVal10 = pSrcLeft[15*leftStep] + LDRB tVal11, [pSrcAboveLeft] ;// tVal11 = pSrcAboveLeft[0] + LDRB tVal12, [pSrcAbove, #15] + + ADD tVal2, tVal12, tVal10 ;// tVal2 = pSrcAbove[15] + pSrcLeft[15*leftStep] + SUB tVal10, tVal10, tVal11 ;// tVal10 = V0 = pSrcLeft[15*leftStep] - pSrcAboveLeft[0] + SUB tVal11, tVal12, tVal11 ;// tVal11 = H0 = pSrcAbove[15] - pSrcAboveLeft[0] + MOV tVal2, tVal2, LSL #4 ;// tVal2 = a = 16 * (pSrcAbove[15] + pSrcLeft[15*leftStep]) + + MOV tVal11, tVal11, LSL #3 ;// 8*[15]-[-1] + LDRB tVal6, [pSrcAbove, #0] + LDRB tVal7, [pSrcAbove, #14] + SUB tVal8, tVal7, tVal6 + RSB tVal8, tVal8, tVal8, LSL #3 ;// 7*[14]-[0] + ADD tVal11, tVal11, tVal8 + LDRB tVal6, [pSrcAbove, #1] + LDRB tVal7, [pSrcAbove, #13] + SUB tVal8, tVal7, tVal6 + ADD tVal8, tVal8, tVal8 + ADD tVal8, tVal8, tVal8, LSL #1 ;// 6*[13]-[1] + ADD tVal11, tVal11, tVal8 + LDRB tVal6, [pSrcAbove, #2] + LDRB tVal7, [pSrcAbove, #12] + SUB tVal8, tVal7, tVal6 + ADD tVal8, tVal8, tVal8, LSL #2 ;// 5*[12]-[2] + ADD tVal11, tVal11, tVal8 + LDRB tVal6, [pSrcAbove, #3] + LDRB tVal7, [pSrcAbove, #11] + SUB tVal8, tVal7, tVal6 + ADD tVal11, tVal11, tVal8, LSL #2 ;// + 4*[11]-[3] + LDRB tVal6, [pSrcAbove, #4] + LDRB tVal7, [pSrcAbove, #10] + SUB tVal8, tVal7, tVal6 + ADD tVal8, tVal8, tVal8, LSL #1 ;// 3*[10]-[4] + ADD tVal11, tVal11, tVal8 + LDRB tVal6, [pSrcAbove, #5] + LDRB tVal7, [pSrcAbove, #9] + SUB tVal8, tVal7, tVal6 + ADD tVal11, tVal11, tVal8, LSL #1 ;// + 2*[9]-[5] + LDRB tVal6, [pSrcAbove, #6] + LDRB tVal7, [pSrcAbove, #8] + SUB tVal8, tVal7, tVal6 ;// 1*[8]-[6] + ADD tVal7, tVal11, tVal8 + + ADD tVal2, tVal2, #16 ;// tVal2 = a + 16 + MOV tVal1, pSrcLeft ;// tVal4 = pSrcLeft + SUB tVal9, tVal14, leftStep ;// tVal9 = 14*leftStep + ADD tVal9, pSrcLeft, tVal9 ;// tVal9 = pSrcLeft + 14*leftStep + + M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[14*leftStep] + M_LDRB tVal11, [tVal1], +leftStep ;// tVal11 = pSrcLeft[0] + ADD tVal7, tVal7, tVal7, LSL #2 ;// tVal7 = 5 * H + ADD tVal7, tVal7, #32 ;// tVal7 = 5 * H + 32 + SUB tVal8, tVal8, tVal11 ;// tVal8 = pSrcLeft[14*leftStep] - pSrcLeft[0] + ASR tVal12, tVal7, #6 ;// tVal12 = b = (5 * H + 32) >> 6 + + RSB tVal8, tVal8, tVal8, LSL #3 ;// tVal8 = V1 = 7* (pSrcLeft[14*leftStep]-pSrcLeft[0]) + ADD tVal6, tVal8, tVal10, LSL #3 ;// tVal6 = V = V0 +V1 + M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[13*leftStep] + M_LDRB tVal10, [tVal1], +leftStep ;// tVal10 = pSrcLeft[leftStep] + RSB tVal7, tVal12, tVal12, LSL #3 ;// tVal7 = 7*b + SUB tVal2, tVal2, tVal7 ;// tVal2 = a + 16 - 7*b + SUB tVal7, tVal8, tVal10 ;// tVal7 = pSrcLeft[13*leftStep] - pSrcLeft[leftStep] + M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[12*lS] + ADD tVal7, tVal7, tVal7 ;// tVal7 = 2 * (pSrcLeft[13*leftStep] - pSrcLeft[leftStep]) + M_LDRB tVal10, [tVal1], +leftStep ;// tVal10 = pSrcLeft[2*leftStep] + ADD tVal7, tVal7, tVal7, LSL #1 ;// tVal7 = 6 * (pSrcLeft[13*leftStep] - pSrcLeft[leftStep]) + ADD tVal6, tVal6, tVal7 ;// tVal6 = V = V + V2 + SUB tVal7, tVal8, tVal10 ;// tVal7 = pSrcLeft[12*leftStep] - pSrcLeft[2*leftStep] + M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[11*leftStep] + M_LDRB tVal10, [tVal1], +leftStep ;// tVal10 = pSrcLeft[3*leftStep] + ADD tVal7, tVal7, tVal7, LSL #2 ;// tVal7 = 5 * (pSrcLeft[12*leftStep] - pSrcLeft[2*leftStep]) + ADD tVal6, tVal6, tVal7 ;// tVal6 = V = V + V3 + SUB tVal7, tVal8, tVal10 ;// tVal7 = pSrcLeft[11*leftStep] - pSrcLeft[3*leftStep] + M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[10*leftStep] + M_LDRB tVal10, [tVal1], +leftStep ;// tVal10 = pSrcLeft[4*leftStep] + ADD tVal6, tVal6, tVal7, LSL #2 ;// tVal6 = V = V + V4 + SUB dstStep, dstStep, #16 ;// tVal5 = dstStep - 16 + SUB tVal7, tVal8, tVal10 ;// tVal7 = pSrcLeft[10*leftStep] - pSrcLeft[4*leftStep] + M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[9*leftStep] + M_LDRB tVal10, [tVal1], +leftStep ;// tVal10 = pSrcLeft[5*leftStep] + ADD tVal7, tVal7, tVal7, LSL #1 ;// tVal7 = 3 * (pSrcLeft[10*leftStep] - pSrcLeft[4*leftStep]) + ADD tVal6, tVal6, tVal7 ;// tVal6 = V = V + V5 + SUB tVal7, tVal8, tVal10 ;// tVal7 = pSrcLeft[9*leftStep] - pSrcLeft[5*leftStep] + M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[8*leftStep] + M_LDRB tVal10, [tVal1], +leftStep ;// tVal10 = pSrcLeft[6*leftStep] + ADD tVal6, tVal6, tVal7, LSL #1 ;// tVal6 = V = V + V6 + + ;// M_STALL ARM1136JS=1 + SUB tVal7, tVal8, tVal10 ;// tVal7 = pSrcLeft[8*leftStep] - pSrcLeft[6*leftStep] + ADD tVal6, tVal6, tVal7 ;// tVal6 = V = V + V7 + + ;// M_STALL ARM1136JS=1 + ADD tVal6, tVal6, tVal6, LSL #2 ;// tVal6 = 5*V + ADD tVal6, tVal6, #32 ;// tVal6 = 5*V + 32 + + ;// M_STALL ARM1136JS=1 + ASR tVal14, tVal6, #6 ;// tVal14 = c = (5*V + 32)>>6 + + ;// M_STALL ARM1136JS=1 + RSB tVal6, tVal14, tVal14, LSL #3 ;// tVal6 = 7*c + UXTH tVal14, tVal14 ;// tVal14 = Cleared the upper half word + ADD tVal10, tVal12, tVal12 ;// tVal10 = 2*b + ORR tVal14, tVal14, tVal14, LSL #16 ;// tVal14 = {c , c} + SUB tVal6, tVal2, tVal6 ;// tVal6 = d = a - 7*b - 7*c + 16 + ADD tVal1, tVal6, tVal10 ;// tVal1 = pp2 = d + 2*b + ADD tVal10, tVal10, tVal12 ;// tVal10 =3*b + ORR tVal0, tVal6, tVal1, LSL #16 ;// tval0 = p2p0 = pack {p2, p0} + UXTH tVal12, tVal12 ;// tVal12 = Cleared the upper half word + UXTH tVal10, tVal10 ;// tVal12 = Cleared the upper half word + ORR tVal12, tVal12, tVal12, LSL #16 ;// tVal12 = {b , b} + ORR tVal10, tVal10, tVal10, LSL #16 ;// tVal10 = {3b , 3b} + SADD16 tVal1, tVal0, tVal12 ;// tVal1 = p3p1 = p2p0 + {b,b} + SADD16 tVal2, tVal1, tVal10 ;// tVal2 = p6p4 = p3p1 + {3b,3b} + SADD16 tVal4, tVal2, tVal12 ;// tVal4 = p7p5 = p6p4 + {b,b} + SADD16 tVal6, tVal4, tVal10 ;// tVal6 = p10p8 = p7p5 + {3b,3b} + SADD16 tVal7, tVal6, tVal12 ;// tVal7 = p11p9 = p10p8 + {b,b} + SADD16 tVal8, tVal7, tVal10 ;// tVal8 = p14p12 = p11p9 + {3b,3b} + SADD16 tVal9, tVal8, tVal12 ;// tVal9 = p15p13 = p14p12 + {b,b} + LDR r0x00FF00FF, =MASK_CONST ;// r0x00FF00FF = 0x00FF00FF + +LOOP_PLANE + + USAT16 temp2, #13, p3p1 + USAT16 temp1, #13, p2p0 + SADD16 p3p1, p3p1, c + SADD16 p2p0, p2p0, c + AND temp2, r0x00FF00FF, temp2, ASR #5 + AND temp1, r0x00FF00FF, temp1, ASR #5 + ORR temp1, temp1, temp2, LSL #8 + STR temp1, [pDst], #4 + + USAT16 temp2, #13, p7p5 + USAT16 temp1, #13, p6p4 + SADD16 p7p5, p7p5, c + SADD16 p6p4, p6p4, c + AND temp2, r0x00FF00FF, temp2, ASR #5 + AND temp1, r0x00FF00FF, temp1, ASR #5 + ORR temp1, temp1, temp2, LSL #8 + STR temp1, [pDst], #4 + + USAT16 temp2, #13, p11p9 + USAT16 temp1, #13, p10p8 + SADD16 p11p9, p11p9, c + SADD16 p10p8, p10p8, c + AND temp2, r0x00FF00FF, temp2, ASR #5 + AND temp1, r0x00FF00FF, temp1, ASR #5 + ORR temp1, temp1, temp2, LSL #8 + STR temp1, [pDst], #4 + + USAT16 temp2, #13, p15p13 + USAT16 temp1, #13, p14p12 + SADD16 p15p13, p15p13, c + SADD16 p14p12, p14p12, c + AND temp2, r0x00FF00FF, temp2, ASR #5 + AND temp1, r0x00FF00FF, temp1, ASR #5 + ORR temp1, temp1, temp2, LSL #8 + STR temp1, [pDst], #4 + + ADDS r0x00FF00FF, r0x00FF00FF, #1<<28 ;// Loop counter value in top 4 bits + + ADD pDst, pDst, dstStep + + BCC LOOP_PLANE ;// Loop for 16 times + MOV return, #OMX_Sts_NoErr + M_END + + ENDIF ;// ARM1136JS + + + END +;----------------------------------------------------------------------------------------------- +; omxVCM4P10_PredictIntra_16x16 ends +;----------------------------------------------------------------------------------------------- diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_PredictIntra_4x4_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_PredictIntra_4x4_s.s new file mode 100644 index 0000000000000000000000000000000000000000..a90f4604967f1e528f48c145abb47672a619cb93 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_PredictIntra_4x4_s.s @@ -0,0 +1,567 @@ +;// +;// +;// File Name: omxVCM4P10_PredictIntra_4x4_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + +;// Define the processor variants supported by this file + + M_VARIANTS ARM1136JS + +;//------------------------------------------------------- +;// This table for implementing switch case of C in asm by +;// the mehtod of two levels of indexing. +;//------------------------------------------------------- + + M_TABLE armVCM4P10_pSwitchTable4x4 + DCD OMX_VC_4x4_VERT, OMX_VC_4x4_HOR + DCD OMX_VC_4x4_DC, OMX_VC_4x4_DIAG_DL + DCD OMX_VC_4x4_DIAG_DR, OMX_VC_4x4_VR + DCD OMX_VC_4x4_HD, OMX_VC_4x4_VL + DCD OMX_VC_4x4_HU + + IF ARM1136JS + +;//-------------------------------------------- +;// Constants +;//-------------------------------------------- +BLK_SIZE EQU 0x8 +MUL_CONST0 EQU 0x01010101 +ADD_CONST1 EQU 0x80808080 + +;//-------------------------------------------- +;// Scratch variable +;//-------------------------------------------- +return RN 0 +pTable RN 9 +pc RN 15 +r0x01010101 RN 1 +r0x80808080 RN 0 + +tVal0 RN 0 +tVal1 RN 1 +tVal2 RN 2 +tVal4 RN 4 +tVal6 RN 6 +tVal7 RN 7 +tVal8 RN 8 +tVal9 RN 9 +tVal10 RN 10 +tVal11 RN 11 +tVal12 RN 12 +tVal14 RN 14 + +Out0 RN 6 +Out1 RN 7 +Out2 RN 8 +Out3 RN 9 + +Left0 RN 6 +Left1 RN 7 +Left2 RN 8 +Left3 RN 9 + +Above0123 RN 12 +Above4567 RN 14 + +AboveLeft RN 10 + +;//-------------------------------------------- +;// Declare input registers +;//-------------------------------------------- +pSrcLeft RN 0 ;// input pointer +pSrcAbove RN 1 ;// input pointer +pSrcAboveLeft RN 2 ;// input pointer +pDst RN 3 ;// output pointer +leftStep RN 4 ;// input variable +dstStep RN 5 ;// input variable +predMode RN 6 ;// input variable +availability RN 7 ;// input variable + +;//----------------------------------------------------------------------------------------------- +;// omxVCM4P10_PredictIntra_4x4 starts +;//----------------------------------------------------------------------------------------------- + + ;// Write function header + M_START omxVCM4P10_PredictIntra_4x4, r11 + + ;// Define stack arguments + M_ARG LeftStep, 4 + M_ARG DstStep, 4 + M_ARG PredMode, 4 + M_ARG Availability, 4 + + ;// M_STALL ARM1136JS=4 + + LDR pTable,=armVCM4P10_pSwitchTable4x4 ;// Load index table for switch case + + ;// Load argument from the stack + M_LDR predMode, PredMode ;// Arg predMode loaded from stack to reg + M_LDR leftStep, LeftStep ;// Arg leftStep loaded from stack to reg + M_LDR dstStep, DstStep ;// Arg dstStep loaded from stack to reg + M_LDR availability, Availability ;// Arg availability loaded from stack to reg + + LDR pc, [pTable, predMode, LSL #2] ;// Branch to the case based on preMode + +OMX_VC_4x4_VERT + + LDR Above0123, [pSrcAbove] ;// Above0123 = pSrcAbove[0 to 3] + M_STR Above0123, [pDst], dstStep ;// pDst[0 to 3] = Above0123 + M_STR Above0123, [pDst], dstStep ;// pDst[4 to 7] = Above0123 + M_STR Above0123, [pDst], dstStep ;// pDst[8 to 11] = Above0123 + STR Above0123, [pDst] ;// pDst[12 to 15] = Above0123 + MOV return, #OMX_Sts_NoErr + M_EXIT ;// Macro to exit midway-break frm case + +OMX_VC_4x4_HOR + + ;// M_STALL ARM1136JS=6 + + LDR r0x01010101, =MUL_CONST0 ;// Const to repeat the byte in reg 4 times + M_LDRB Left0, [pSrcLeft], leftStep ;// Left0 = pSrcLeft[0] + M_LDRB Left1, [pSrcLeft], leftStep ;// Left1 = pSrcLeft[1] + M_LDRB Left2, [pSrcLeft], leftStep ;// Left2 = pSrcLeft[2] + LDRB Left3, [pSrcLeft] ;// Left3 = pSrcLeft[3] + MUL Out0, Left0, r0x01010101 ;// replicate the val in all the bytes + MUL Out1, Left1, r0x01010101 ;// replicate the val in all the bytes + MUL Out2, Left2, r0x01010101 ;// replicate the val in all the bytes + MUL Out3, Left3, r0x01010101 ;// replicate the val in all the bytes + M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [0 to 3 ] + M_STR Out1, [pDst], dstStep ;// store {Out1} at pDst [4 to 7 ] + M_STR Out2, [pDst], dstStep ;// store {Out2} at pDst [8 to 11] + STR Out3, [pDst] ;// store {Out3} at pDst [12 to 15] + MOV return, #OMX_Sts_NoErr + M_EXIT ;// Macro to exit midway-break frm case + +OMX_VC_4x4_DC + + ;// M_STALL ARM1136JS=6 + + AND availability, availability, #(OMX_VC_UPPER + OMX_VC_LEFT) + CMP availability, #(OMX_VC_UPPER + OMX_VC_LEFT) + BNE UpperOrLeftOrNoneAvailable ;// Jump to Upper if not both + LDR Above0123, [pSrcAbove] ;// Above0123 = pSrcAbove[0 to 3] + + ;// M_STALL ARM1136JS=1 + + UXTB16 tVal7, Above0123 ;// pSrcAbove[0, 2] + UXTB16 tVal6, Above0123, ROR #8 ;// pSrcAbove[1, 3] + UADD16 tVal11, tVal6, tVal7 ;// pSrcAbove[0, 2] + pSrcAbove[1, 3] + M_LDRB Left0, [pSrcLeft], leftStep ;// Left0 = pSrcLeft[0] + M_LDRB Left1, [pSrcLeft], leftStep ;// Left1 = pSrcLeft[1] + ADD tVal11, tVal11, LSR #16 ;// sum(pSrcAbove[0] to pSrcAbove[3]) + M_LDRB Left2, [pSrcLeft], leftStep ;// Left2 = pSrcLeft[2] + LDRB Left3, [pSrcLeft] ;// Left3 = pSrcLeft[3] + UXTH tVal11, tVal11 ;// upsum1 (Clear the top junk bits) + ADD tVal6, Left0, Left1 ;// tVal6 = Left0 + Left1 + ADD tVal7, Left2, Left3 ;// tVal7 = Left2 + Left3 + ADD tVal6, tVal6, tVal7 ;// tVal6 = tVal6 + tVal7 + ADD Out0, tVal6, tVal11 ;// Out0 = tVal6 + tVal11 + ADD Out0, Out0, #4 ;// Out0 = Out0 + 4 + LDR r0x01010101, =MUL_CONST0 ;// 0x01010101 + MOV Out0, Out0, LSR #3 ;// Out0 = (Out0 + 4)>>3 + + ;// M_STALL ARM1136JS=1 + + MUL Out0, Out0, r0x01010101 ;// replicate the val in all the bytes + + ;// M_STALL ARM1136JS=1 + + MOV return, #OMX_Sts_NoErr + M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [0 to 3 ] + M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [4 to 7 ] + M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [8 to 11] + STR Out0, [pDst] ;// store {Out0} at pDst [12 to 15] + M_EXIT ;// Macro to exit midway-break frm case + +UpperOrLeftOrNoneAvailable + ;// M_STALL ARM1136JS=3 + + CMP availability, #OMX_VC_UPPER ;// if(availability & #OMX_VC_UPPER) + BNE LeftOrNoneAvailable ;// Jump to Left if not upper + LDR Above0123, [pSrcAbove] ;// Above0123 = pSrcAbove[0 to 3] + + ;// M_STALL ARM1136JS=3 + + UXTB16 tVal7, Above0123 ;// pSrcAbove[0, 2] + UXTB16 tVal6, Above0123, ROR #8 ;// pSrcAbove[1, 3] + UADD16 Out0, tVal6, tVal7 ;// pSrcAbove[0, 2] + pSrcAbove[1, 3] + LDR r0x01010101, =MUL_CONST0 ;// 0x01010101 + ADD Out0, Out0, LSR #16 ;// sum(pSrcAbove[0] to pSrcAbove[3]) + + ;// M_STALL ARM1136JS=1 + + UXTH Out0, Out0 ;// upsum1 (Clear the top junk bits) + ADD Out0, Out0, #2 ;// Out0 = Out0 + 2 + + ;// M_STALL ARM1136JS=1 + + MOV Out0, Out0, LSR #2 ;// Out0 = (Out0 + 2)>>2 + + ;// M_STALL ARM1136JS=1 + + MUL Out0, Out0, r0x01010101 ;// replicate the val in all the bytes + + ;// M_STALL ARM1136JS=1 + + MOV return, #OMX_Sts_NoErr + M_STR Out0, [pDst], dstStep ;// store {tVal6} at pDst [0 to 3 ] + M_STR Out0, [pDst], dstStep ;// store {tVal6} at pDst [4 to 7 ] + M_STR Out0, [pDst], dstStep ;// store {tVal6} at pDst [8 to 11] + STR Out0, [pDst] ;// store {tVal6} at pDst [12 to 15] + + M_EXIT ;// Macro to exit midway-break frm case + +LeftOrNoneAvailable + ;// M_STALL ARM1136JS=3 + + LDR r0x01010101, =MUL_CONST0 ;// 0x01010101 + CMP availability, #OMX_VC_LEFT + BNE NoneAvailable + M_LDRB Left0, [pSrcLeft], leftStep ;// Left0 = pSrcLeft[0] + M_LDRB Left1, [pSrcLeft], leftStep ;// Left1 = pSrcLeft[1] + M_LDRB Left2, [pSrcLeft], leftStep ;// Left2 = pSrcLeft[2] + LDRB Left3, [pSrcLeft] ;// Left3 = pSrcLeft[3] + ADD Out0, Left0, Left1 ;// Out0 = Left0 + Left1 + + ;// M_STALL ARM1136JS=1 + + ADD Out1, Left2, Left3 ;// Out1 = Left2 + Left3 + ADD Out0, Out0, Out1 ;// Out0 = Out0 + Out1 + ADD Out0, Out0, #2 ;// Out0 = Out0 + 2 + + ;// M_STALL ARM1136JS=1 + + MOV Out0, Out0, LSR #2 ;// Out0 = (Out0 + 2)>>2 + + ;// M_STALL ARM1136JS=1 + + MUL Out0, Out0, r0x01010101 ;// replicate the val in all the bytes + + ;// M_STALL ARM1136JS=1 + + MOV return, #OMX_Sts_NoErr + M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [0 to 3 ] + M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [4 to 7 ] + M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [8 to 11] + STR Out0, [pDst] ;// store {Out0} at pDst [12 to 15] + M_EXIT ;// Macro to exit midway-break frm case + +NoneAvailable + MOV Out0, #128 ;// Out0 = 128 if(count == 0) + + ;// M_STALL ARM1136JS=5 + + MUL Out0, Out0, r0x01010101 ;// replicate the val in all the bytes + + ;// M_STALL ARM1136JS=1 + + MOV return, #OMX_Sts_NoErr + M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [0 to 3 ] + M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [4 to 7 ] + M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [8 to 11] + STR Out0, [pDst] ;// store {Out0} at pDst [12 to 15] + M_EXIT ;// Macro to exit midway-break frm case + +OMX_VC_4x4_DIAG_DL + + ;//------------------------------------------------------------------ + ;// f = (a+2*b+c+2)>>2 + ;// Calculate as: + ;// d = (a + c )>>1 + ;// e = (d - b')>>1 + ;// f = e + 128 + ;//------------------------------------------------------------------ + + ;// M_STALL ARM1136JS=3 + + TST availability, #OMX_VC_UPPER_RIGHT + LDMIA pSrcAbove, {Above0123, Above4567} ;// Above0123, Above4567 = pSrcAbove[0 to 7] + LDR r0x80808080, =ADD_CONST1 ;// 0x80808080 + BNE DLUpperRightAvailable + LDR r0x01010101, =MUL_CONST0 ;// 0x01010101 + MOV tVal7, Above0123, LSR #24 ;// {00, 00, 00, U3 } + MOV tVal11, tVal7, LSL #24 ;// {U3, 00, 00, 00 } + MUL Out3, tVal7, r0x01010101 ;// {U3, U3, U3, U3 } + MOV tVal8, Above0123, LSR #16 ;// {00, 00, U3, U2 } + MOV tVal10, Above0123, LSR #8 ;// {00, U3, U2, U1 } + MVN tVal10, tVal10 ;// {00', U3', U2', U1'} + UHADD8 tVal8, tVal8, Above0123 ;// {xx, xx, d1, d0 } + UHADD8 tVal6, Above0123, tVal9 ;// {xx, d2, xx, xx } + UHSUB8 tVal8, tVal8, tVal10 ;// {xx, xx, e1, e0 } + UHSUB8 tVal6, tVal6, tVal10 ;// {xx, e2, xx, xx } + UADD8 tVal8, tVal8, r0x80808080 ;// {xx, xx, f1, f0 } + UADD8 tVal6, tVal6, r0x80808080 ;// {xx, f2, xx, xx } + + ;// M_STALL ARM1136JS=1 + + PKHBT tVal6, tVal8, tVal6 ;// {xx, f2, f1, f0 } + BIC tVal6, tVal6, #0xFF000000 ;// {00, f2, f1, f0 } + ORR Out0, tVal6, tVal11 ;// {U3, f2, f1, f0 } + + ;// M_STALL ARM1136JS=1 + + PKHTB Out1, Out3, Out0, ASR #8 ;// {U3, U3, f2, f1 } + MOV return, #OMX_Sts_NoErr + PKHTB Out2, Out3, Out1, ASR #8 ;// {U3, U3, U3, f2 } + + M_STR Out0, [pDst], dstStep ;// store {f3 to f0} at pDst[3 to 0 ] + M_STR Out1, [pDst], dstStep ;// store {f4 to f1} at pDst[7 to 4 ] + M_STR Out2, [pDst], dstStep ;// store {f5 to f2} at pDst[11 to 8 ] + STR Out3, [pDst] ;// store {f6 to f3} at pDSt[15 to 12] + M_EXIT ;// Macro to exit midway-break frm case + +DLUpperRightAvailable + + MOV tVal8, Above0123, LSR #24 ;// {00, 00, 00, U3 } + MOV tVal9, Above0123, LSR #16 ;// {00, 00, U3, U2 } + MOV tVal10, Above0123, LSR #8 ;// {00, U3, U2, U1 } + ORR tVal8, tVal8, Above4567, LSL #8 ;// {U6, U5, U4, U3 } + ORR tVal10, tVal10, Above4567, LSL #24 ;// {U4, U3, U2, U1 } + PKHBT tVal9, tVal9, Above4567, LSL #16 ;// {U5, U4, U3, U2 } + MVN tVal1, tVal8 ;// {U6', U5', U4', U3'} + MVN tVal10, tVal10 ;// {U4', U3', U2', U1'} + MVN tVal2, Above4567 ;// {U7', U6', U5', U4'} + UHADD8 tVal6, Above0123, tVal9 ;// {d3, d2, d1, d0 } + UHADD8 tVal9, tVal9, Above4567 ;// {d5, d4, d3, d2 } + UHADD8 tVal8, Above4567, tVal8 ;// {d6, xx, xx, xx } + UHSUB8 tVal6, tVal6, tVal10 ;// {e3, e2, e1, e0 } + UHSUB8 tVal12, tVal9, tVal1 ;// {e5, e4, e3, e2 } + UHSUB8 tVal8, tVal8, tVal2 ;// {e6, xx, xx, xx } + UADD8 Out0, tVal6, r0x80808080 ;// {f3, f2, f1, f0 } + UADD8 tVal9, tVal8, r0x80808080 ;// {f6, xx, xx, xx } + UADD8 Out2, tVal12, r0x80808080 ;// {f5, f4, f3, f2 } + MOV tVal7, Out0, LSR #8 ;// {00, f3, f2, f1 } + AND tVal9, tVal9, #0xFF000000 ;// {f6, 00, 00, 00 } + PKHBT Out1, tVal7, Out2, LSL #8 ;// {f4, f3, f2, f1 } + ORR Out3, tVal9, Out2, LSR #8 ;// {f6, f5, f4, f3 } + M_STR Out0, [pDst], dstStep ;// store {f3 to f0} at pDst[3 to 0 ] + M_STR Out1, [pDst], dstStep ;// store {f4 to f1} at pDst[7 to 4 ] + M_STR Out2, [pDst], dstStep ;// store {f5 to f2} at pDst[11 to 8 ] + STR Out3, [pDst] ;// store {f6 to f3} at pDSt[15 to 12] + MOV return, #OMX_Sts_NoErr + M_EXIT ;// Macro to exit midway-break frm case + + +OMX_VC_4x4_DIAG_DR + + ;// M_STALL ARM1136JS=4 + + M_LDRB Left0, [pSrcLeft], leftStep ;// Left0 = pSrcLeft[0] + M_LDRB Left1, [pSrcLeft], leftStep ;// Left1 = pSrcLeft[1] + M_LDRB Left2, [pSrcLeft], leftStep ;// Left2 = pSrcLeft[2] + LDRB Left3, [pSrcLeft] ;// Left3 = pSrcLeft[3] + LDRB AboveLeft, [pSrcAboveLeft] ;// AboveLeft = pSrcAboveLeft[0] + ORR tVal7, Left1, Left0, LSL #8 ;// tVal7 = 00 00 L0 L1 + LDR Above0123, [pSrcAbove] ;// Above0123 = U3 U2 U1 U0 + LDR r0x80808080, =ADD_CONST1 ;// 0x80808080 + ORR tVal8, Left3, Left2, LSL #8 ;// tVal8 = 00 00 L2 L3 + PKHBT tVal7, tVal8, tVal7, LSL #16 ;// tVal7 = L0 L1 L2 L3 + MOV tVal8, Above0123, LSL #8 ;// tVal8 = U2 U1 U0 00 + MOV tVal9, tVal7, LSR #8 ;// tVal9 = 00 L0 L1 L2 + ORR tVal8, tVal8, AboveLeft ;// tVal8 = U2 U1 U0 UL + ORR tVal9, tVal9, AboveLeft, LSL #24 ;// tVal9 = UL L0 L1 L2 + MOV tVal10, Above0123, LSL #24 ;// tVal10= U0 00 00 00 + UXTB tVal11, tVal7, ROR #24 ;// tVal11= 00 00 00 L0 + ORR tVal10, tVal10, tVal9, LSR #8 ;// tVal10= U0 UL L0 L1 + ORR tVal11, tVal11, tVal8, LSL #8 ;// tVal11= U1 U0 UL L0 + UHADD8 tVal11, Above0123, tVal11 ;// tVal11= d1 d0 dL g0 + UHADD8 tVal10, tVal7, tVal10 ;// tVal10= g0 g1 g2 g3 + MVN tVal8, tVal8 ;// tVal8 = U2'U1'U0'UL' + MVN tVal9, tVal9 ;// tVal9 = UL'L0'L1'L2' + UHSUB8 tVal11, tVal11, tVal8 ;// tVal11= e1 e0 eL h0 + UHSUB8 tVal10, tVal10, tVal9 ;// tVal10= h0 h1 h2 h3 + UADD8 Out3, tVal10, r0x80808080 ;// Out3 = i0 i1 i2 i3 + UADD8 Out0, tVal11, r0x80808080 ;// Out0 = f1 f0 fL i0 + UXTH tVal11, Out3, ROR #8 ;// tVal11= 00 00 i1 i2 + MOV tVal7, Out0, LSL #8 ;// tVal7 = f0 fL i0 00 + ORR Out1, tVal7, tVal11, LSR #8 ;// Out1 = f0 fL i0 i1 + PKHBT Out2, tVal11, Out0, LSL #16 ;// Out2 = fL i0 i1 i2 + M_STR Out0, [pDst], dstStep ;// store {f1 to i0} at pDst[3 to 0 ] + M_STR Out1, [pDst], dstStep ;// store {f0 to i1} at pDst[7 to 4 ] + M_STR Out2, [pDst], dstStep ;// store {fL to i2} at pDst[11 to 8 ] + STR Out3, [pDst] ;// store {i0 to i3} at pDst[15 to 12] + MOV return, #OMX_Sts_NoErr + M_EXIT ;// Macro to exit midway-break frm case + +OMX_VC_4x4_VR + + ;// M_STALL ARM1136JS=4 + + LDR Above0123, [pSrcAbove] ;// Above0123 = U3 U2 U1 U0 + LDRB AboveLeft, [pSrcAboveLeft] ;// AboveLeft = 00 00 00 UL + M_LDRB Left0, [pSrcLeft], leftStep ;// Left0 = 00 00 00 L0 + M_LDRB Left1, [pSrcLeft], leftStep ;// Left1 = 00 00 00 L1 + LDRB Left2, [pSrcLeft] ;// Left2 = 00 00 00 L2 + MOV tVal0, Above0123, LSL #8 ;// tVal0 = U2 U1 U0 00 + MOV tVal9, Above0123 ;// tVal9 = U3 U2 U1 U0 + ORR tVal14, tVal0, AboveLeft ;// tVal14 = U2 U1 U0 UL + MVN tVal11, tVal14 ;// tVal11 = U2'U1'U0'UL' + MOV tVal2, tVal14, LSL #8 ;// tVal2 = U1 U0 UL 00 + UHSUB8 tVal1, Above0123, tVal11 ;// tVal1 = d2 d1 d0 dL + UHADD8 tVal10, AboveLeft, Left1 ;// tVal10 = 00 00 00 j1 + MVN tVal4, Left0 ;// tVal4 = 00 00 00 L0' + UHSUB8 tVal4, tVal10, tVal4 ;// tVal4 = 00 00 00 k1 + ORR tVal12, tVal0, Left0 ;// tVal12 = U2 U1 U0 L0 + ORR tVal14, tVal2, Left0 ;// tVal14 = U1 U0 UL L0 + LDR r0x80808080, =ADD_CONST1 ;// 0x80808080 + UHADD8 tVal10, tVal9, tVal14 ;// tVal10 = g3 g2 g1 g0 + UADD8 Out0, tVal1, r0x80808080 ;// Out0 = e2 e1 e0 eL + UHSUB8 tVal10, tVal10, tVal11 ;// tVal10 = h3 h2 h1 h0 + M_STR Out0, [pDst], dstStep ;// store {e2 to eL} at pDst[3 to 0 ] + MOV tVal1, tVal14, LSL #8 ;// tVal1 = U0 UL L0 00 + MOV tVal6, Out0, LSL #8 ;// tVal6 = e1 e0 eL 00 + ORR tVal2, tVal2, Left1 ;// tVal2 = U1 U0 UL L1 + UADD8 tVal4, tVal4, r0x80808080 ;// tVal4 = 00 00 00 l1 + UADD8 Out1, tVal10, r0x80808080 ;// Out1 = i3 i2 i1 i0 + MVN tVal2, tVal2 ;// tVal14 = U1'U0'UL'L1' + ORR tVal1, tVal1, Left2 ;// tVal1 = U0 UL L0 L2 + ORR Out2, tVal6, tVal4 ;// Out2 = e1 e0 eL l1 + UHADD8 tVal1, tVal1, tVal12 ;// tVal1 = g2 g1 g0 j2 + M_STR Out1, [pDst], dstStep ;// store {i3 to i0} at pDst[7 to 4 ] + M_STR Out2, [pDst], dstStep ;// store {e1 to l1} at pDst[11 to 8 ] + UHSUB8 tVal9, tVal1, tVal2 ;// tVal9 = h2 h1 h0 k2 + UADD8 Out3, tVal9, r0x80808080 ;// Out3 = i2 i1 i0 l2 + STR Out3, [pDst] ;// store {i2 to l2} at pDst[15 to 12] + MOV return, #OMX_Sts_NoErr + M_EXIT ;// Macro to exit midway-break frm case + +OMX_VC_4x4_HD + + ;// M_STALL ARM1136JS=4 + + LDR Above0123, [pSrcAbove] ;// Above0123 = U3 U2 U1 U0 + LDRB AboveLeft, [pSrcAboveLeft] ;// AboveLeft = 00 00 00 UL + M_LDRB Left0, [pSrcLeft], leftStep ;// Left0 = 00 00 00 L0 + M_LDRB Left1, [pSrcLeft], leftStep ;// Left1 = 00 00 00 L1 + M_LDRB Left2, [pSrcLeft], leftStep ;// Left2 = 00 00 00 L2 + LDRB Left3, [pSrcLeft] ;// Left3 = 00 00 00 L3 + LDR r0x80808080, =ADD_CONST1 ;// 0x80808080 + ORR tVal2, AboveLeft, Above0123, LSL #8;// tVal2 = U2 U1 U0 UL + MVN tVal1, Left0 ;// tVal1 = 00 00 00 L0' + ORR tVal4, Left0, tVal2, LSL #8 ;// tVal4 = U1 U0 UL L0 + MVN tVal2, tVal2 ;// tVal2 = U2'U1'U0'UL' + UHADD8 tVal4, tVal4, Above0123 ;// tVal4 = g3 g2 g1 g0 + UHSUB8 tVal1, AboveLeft, tVal1 ;// tVal1 = 00 00 00 dL + UHSUB8 tVal4, tVal4, tVal2 ;// tVal4 = h3 h2 h1 h0 + UADD8 tVal1, tVal1, r0x80808080 ;// tVal1 = 00 00 00 eL + UADD8 tVal4, tVal4, r0x80808080 ;// tVal4 = i3 i2 i1 i0 + ORR tVal2, Left0, AboveLeft, LSL #16 ;// tVal2 = 00 UL 00 L0 + MOV tVal4, tVal4, LSL #8 ;// tVal4 = i2 i1 i0 00 + ORR tVal11, Left1, Left0, LSL #16 ;// tVal11= 00 L0 00 L1 + ORR tVal7, Left2, Left1, LSL #16 ;// tVal7 = 00 L1 00 L2 + ORR tVal10, Left3, Left2, LSL #16 ;// tVal10= 00 L2 00 L3 + ORR Out0, tVal4, tVal1 ;// Out0 = i2 i1 i0 eL + M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [0 to 3 ] + MOV tVal4, Out0, LSL #16 ;// tVal4 = i1 i0 00 00 + UHADD8 tVal2, tVal2, tVal7 ;// tVal2 = 00 j1 00 j2 + UHADD8 tVal6, tVal11, tVal10 ;// tVal11= 00 j2 00 j3 + MVN tVal12, tVal11 ;// tVal12= 00 L0'00 L1' + MVN tVal14, tVal7 ;// tVal14= 00 L1'00 L2' + UHSUB8 tVal2, tVal2, tVal12 ;// tVal2 = 00 k1 00 k2 + UHSUB8 tVal8, tVal7, tVal12 ;// tVal8 = 00 d1 00 d2 + UHSUB8 tVal11, tVal6, tVal14 ;// tVal11= 00 k2 00 k3 + UHSUB8 tVal9, tVal10, tVal14 ;// tVal9 = 00 d2 00 d3 + UADD8 tVal2, tVal2, r0x80808080 ;// tVal2 = 00 l1 00 l2 + UADD8 tVal8, tVal8, r0x80808080 ;// tVal8 = 00 e1 00 e2 + UADD8 tVal11, tVal11, r0x80808080 ;// tVal11= 00 l2 00 l3 + UADD8 tVal9, tVal9, r0x80808080 ;// tVal9 = 00 e2 00 e3 + ORR Out2, tVal8, tVal2, LSL #8 ;// Out2 = l1 e1 l2 e2 + ORR Out3, tVal9, tVal11, LSL #8 ;// Out3 = l2 e2 l3 e3 + PKHTB Out1, tVal4, Out2, ASR #16 ;// Out1 = i1 i0 l1 e1 + M_STR Out1, [pDst], dstStep ;// store {Out1} at pDst [4 to 7 ] + M_STR Out2, [pDst], dstStep ;// store {Out2} at pDst [8 to 11] + STR Out3, [pDst] ;// store {Out3} at pDst [12 to 15] + MOV return, #OMX_Sts_NoErr + M_EXIT ;// Macro to exit midway-break frm case + +OMX_VC_4x4_VL + + ;// M_STALL ARM1136JS=3 + + LDMIA pSrcAbove, {Above0123, Above4567} ;// Above0123, Above4567 = pSrcAbove[0 to 7] + TST availability, #OMX_VC_UPPER_RIGHT + LDR r0x80808080, =ADD_CONST1 ;// 0x80808080 + LDR r0x01010101, =MUL_CONST0 ;// 0x01010101 + MOV tVal11, Above0123, LSR #24 ;// tVal11= 00 00 00 U3 + MULEQ Above4567, tVal11, r0x01010101 ;// Above4567 = U3 U3 U3 U3 + MOV tVal9, Above0123, LSR #8 ;// tVal9 = 00 U3 U2 U1 + MVN tVal10, Above0123 ;// tVal10= U3'U2'U1'U0' + ORR tVal2, tVal9, Above4567, LSL #24 ;// tVal2 = U4 U3 U2 U1 + UHSUB8 tVal8, tVal2, tVal10 ;// tVal8 = d4 d3 d2 d1 + UADD8 Out0, tVal8, r0x80808080 ;// Out0 = e4 e3 e2 e1 + M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [0 to 3 ] + MOV tVal9, tVal9, LSR #8 ;// tVal9 = 00 00 U3 U2 + MOV tVal10, Above4567, LSL #8 ;// tVal10= U6 U5 U4 00 + PKHBT tVal9, tVal9, Above4567, LSL #16 ;// tVal9 = U5 U4 U3 U2 + ORR tVal10, tVal10, tVal11 ;// tVal10= U6 U5 U4 U3 + UHADD8 tVal11, tVal9, Above0123 ;// tVal11= g5 g4 g3 g2 + UHADD8 tVal14, tVal2, tVal10 ;// tVal14= g6 g5 g4 g3 + MVN tVal8, tVal2 ;// tVal8 = U4'U3'U2'U1' + MVN tVal7, tVal9 ;// tVal7 = U5'U4'U3'U2' + UHSUB8 tVal12, tVal9, tVal8 ;// tVal12= d5 d4 d3 d2 + UHSUB8 tVal11, tVal11, tVal8 ;// tVal11= h5 h4 h3 h2 + UHSUB8 tVal2, tVal14, tVal7 ;// tVal2 = h6 h5 h4 h3 + UADD8 Out1, tVal11, r0x80808080 ;// Out1 = i5 i4 i3 i2 + UADD8 Out2, tVal12, r0x80808080 ;// Out2 = e5 e4 e3 e2 + UADD8 Out3, tVal2, r0x80808080 ;// Out3 = i6 i5 i4 i3 + M_STR Out1, [pDst], dstStep ;// store {Out1} at pDst [4 to 7 ] + M_STR Out2, [pDst], dstStep ;// store {Out2} at pDst [8 to 11] + M_STR Out3, [pDst], dstStep ;// store {Out3} at pDst [12 to 15] + MOV return, #OMX_Sts_NoErr + M_EXIT ;// Macro to exit midway-break frm case + +OMX_VC_4x4_HU + + ;// M_STALL ARM1136JS=2 + + LDR r0x01010101, =MUL_CONST0 ;// 0x01010101 + M_LDRB Left0, [pSrcLeft], leftStep ;// Left0 = pSrcLeft[0] + M_LDRB Left1, [pSrcLeft], leftStep ;// Left1 = pSrcLeft[1] + M_LDRB Left2, [pSrcLeft], leftStep ;// Left2 = pSrcLeft[2] + LDRB Left3, [pSrcLeft] ;// Left3 = pSrcLeft[3] + MOV r0x80808080, r0x01010101, LSL #7 ;// 0x80808080 + ORR tVal6, Left0, Left1, LSL #16 ;// tVal6 = 00 L1 00 L0 + ORR tVal7, Left1, Left2, LSL #16 ;// tVal7 = 00 L2 00 L1 + ORR tVal11, Left2, Left3, LSL #16 ;// tVal11= 00 L3 00 L2 + MUL Out3, Left3, r0x01010101 ;// Out3 = L3 L3 L3 L3 + MVN tVal8, tVal7 ;// tVal8 = 00 L2'00 L1' + MVN tVal10, tVal11 ;// tVal10= 00 L3'00 L2' + UHADD8 tVal4, tVal6, tVal11 ;// tVal4 = 00 g3 00 g2 + UXTB16 tVal12, Out3 ;// tVal12= 00 L3 00 L3 + UHSUB8 tVal4, tVal4, tVal8 ;// tVal4 = 00 h3 00 h2 + UHSUB8 tVal6, tVal6, tVal8 ;// tVal6 = 00 d2 00 d1 + UHSUB8 tVal11, tVal11, tVal8 ;// tVal11= 00 d3 00 d2 + UHADD8 tVal12, tVal12, tVal7 ;// tVal12= 00 g4 00 g3 + UADD8 tVal4, tVal4, r0x80808080 ;// tVal4 = 00 i3 00 i2 + UHSUB8 tVal12, tVal12, tVal10 ;// tVal12= 00 h4 00 h3 + UADD8 tVal8, tVal6, r0x80808080 ;// tVal8 = 00 e2 00 e1 + UADD8 tVal11, tVal11, r0x80808080 ;// tVal11= 00 e3 00 e2 + UADD8 tVal12, tVal12, r0x80808080 ;// tVal12= 00 i4 00 i3 + ORR Out0, tVal8, tVal4, LSL #8 ;// Out0 = i3 e2 i2 e1 + ORR Out1, tVal11, tVal12, LSL #8 ;// Out1 = i4 e3 i3 e2 + M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [0 to 3 ] + PKHTB Out2, Out3, Out1, ASR #16 ;// Out2 = L3 L3 i4 e3 + M_STR Out1, [pDst], dstStep ;// store {Out1} at pDst [4 to 7 ] + M_STR Out2, [pDst], dstStep ;// store {Out2} at pDst [8 to 11] + STR Out3, [pDst] ;// store {Out3} at pDst [12 to 15] + MOV return, #OMX_Sts_NoErr + M_END + + ENDIF ;// ARM1136JS + + + END +;//----------------------------------------------------------------------------------------------- +;// omxVCM4P10_PredictIntra_4x4 ends +;//----------------------------------------------------------------------------------------------- diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair_s.s new file mode 100644 index 0000000000000000000000000000000000000000..53597a85be9a410081dd8754ffc3dccae092c14b --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair_s.s @@ -0,0 +1,128 @@ +;// +;// +;// File Name: omxVCM4P10_TransformDequantChromaDCFromPair_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + IMPORT armVCM4P10_QPDivTable + IMPORT armVCM4P10_VMatrixQPModTable + + M_VARIANTS ARM1136JS + + + IF ARM1136JS + +;//-------------------------------------- +;// Declare input registers +;//-------------------------------------- +ppSrc RN 0 +pDst RN 1 +QP RN 2 + +;//-------------------------------- +;// Scratch variable for Unpack2x2 +;//-------------------------------- +pSrc RN 9 +Value RN 4 +Value2 RN 5 +Flag RN 6 +strOffset RN 7 +cstOffset RN 8 + +;//-------------------------------- +;// Scratch variable +;//-------------------------------- +r0w0 RN 3 +r0w1 RN 4 + +c0w0 RN 5 +c1w0 RN 6 + +return RN 0 +pQPDivTable RN 5 +pQPModTable RN 6 +Shift RN 9 +Scale RN 2 + +Temp1 RN 3 +Temp2 RN 4 +Temp3 RN 7 +Temp4 RN 8 + + ;// Write function header + M_START omxVCM4P10_TransformDequantChromaDCFromPair, r9 + + + LDR pSrc, [ppSrc] ;// Load pSrc + MOV cstOffset, #31 ;// To be used in the loop, to compute offset + + ;//----------------------------------------------------------------------- + ;// Firstly, fill all the coefficient values on the buffer by zero + ;//----------------------------------------------------------------------- + + MOV Value, #0 ;// Initialize the zero value + MOV Value2, #0 ;// Initialize the zero value + LDRB Flag, [pSrc], #1 ;// Preload before + STRD Value, [pDst, #0] ;// pDst[0] = pDst[1] = pDst[2] = pDst[3] = 0 + + +unpackLoop + TST Flag, #0x10 ;// Computing (Flag & 0x10) + LDRSBNE Value2,[pSrc,#1] + LDRBNE Value, [pSrc], #2 ;// Load byte wise to avoid unaligned access + AND strOffset, cstOffset, Flag, LSL #1 ;// strOffset = (Flag & 15) < 1; + LDRSBEQ Value, [pSrc], #1 ;// Value = (OMX_U8) *pSrc++ + ORRNE Value,Value,Value2, LSL #8 ;// Value = (OMX_U16) *pSrc++ + + TST Flag, #0x20 ;// Computing (Flag & 0x20) to check, if we're done + LDRBEQ Flag, [pSrc], #1 ;// Flag = (OMX_U8) *pSrc++, for next iteration + STRH Value, [pDst, strOffset] ;// Store at offset + BEQ unpackLoop ;// Branch to the loop beginning + + LDMIA pDst, {r0w0, r0w1} ;// r0w0 = |c1|c0| & r0w1 = |c3|c2| + + + STR pSrc, [ppSrc] ;// Update the bitstream pointer + + LDR pQPDivTable, =armVCM4P10_QPDivTable ;// QP Division look-up-table base pointer + LDR pQPModTable, =armVCM4P10_VMatrixQPModTable ;// QP Modulo look-up-table base pointer + + SADDSUBX r0w0, r0w0, r0w0 ;// [ c00+c01, c00-c01 ] + SADDSUBX r0w1, r0w1, r0w1 ;// [ c10+c11, c10-c11 ] + + LDRSB Shift, [pQPDivTable, QP] ;// Shift = pQPDivTable[QP] + LDRSB Scale, [pQPModTable, QP] ;// Scale = pQPModTable[QP] + + SADD16 c0w0, r0w0, r0w1 ;// [ d00+d10, d01+d11 ] + SSUB16 c1w0, r0w0, r0w1 ;// [ d00-d10, d01-d11 ] + + LSL Scale, Scale, Shift ;// Scale = Scale << Shift + + SMULTB Temp2, c0w0, Scale ;// Temp2 = T(c0w0) * Scale + SMULTB Temp4, c1w0, Scale ;// Temp4 = T(c1w0) * Scale + SMULBB Temp1, c0w0, Scale ;// Temp1 = B(c0w0) * Scale + SMULBB Temp3, c1w0, Scale ;// Temp3 = B(c1w0) * Scale + MOV Temp2, Temp2, ASR #1 ;// Temp2 = Temp2 >> 1 & Temp1 = (Temp1 >> 1) << 16 + MOV Temp4, Temp4, ASR #1 ;// Temp4 = Temp4 >> 1 & Temp3 = (Temp3 >> 1) << 16 + PKHBT c0w0, Temp2, Temp1, LSL #15 ;// c0w0 = | Temp1 | Temp2 | + PKHBT c1w0, Temp4, Temp3, LSL #15 ;// c1w0 = | Temp3 | Temp4 | + STMIA pDst, {c0w0, c1w0} ;// Storing all the coefficients at once + MOV return, #OMX_Sts_NoErr + M_END + + ENDIF ;// ARM1136JS + + + + + END diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair_s.s new file mode 100644 index 0000000000000000000000000000000000000000..73caec2ccac6ad4dbdd004322747705119cc9237 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair_s.s @@ -0,0 +1,469 @@ +;// +;// +;// File Name: omxVCM4P10_TransformDequantLumaDCFromPair_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// +;// Description: +;// H.264 inverse quantize and transform module +;// +;// + +;// Include standard headers + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + +;// Import/Export symbols required from/to other files +;// (For example tables) + + IMPORT armVCM4P10_UnpackBlock4x4 + IMPORT armVCM4P10_QPDivTable + IMPORT armVCM4P10_VMatrixQPModTable + + M_VARIANTS ARM1136JS + +;// Set debugging level +;//DEBUG_ON SETL {TRUE} + + +;// Static Function: armVCM4P10_InvTransformDequantLumaDC4x4 + + +;// Guarding implementation by the processor name + + IF ARM1136JS + + +;//Input Registers +pData RN 0 +QP RN 1 + +;//Output Registers + + +;//Local Scratch Registers + +;// Packed Input pixels +in00 RN 2 ;// Src[0] & Src[1] +in02 RN 3 ;// Src[2] & Src[3] +in10 RN 4 ;// Src[4] & Src[5] +in12 RN 5 ;// Src[6] & Src[7] +in20 RN 6 ;// Src[8] & Src[9] +in22 RN 7 ;// Src[10] & Src[11] +in30 RN 8 ;// Src[12] & Src[13] +in32 RN 9 ;// Src[14] & Src[15] + +;// Transpose for Row operations (Rows to cols) +trRow00 RN 2 +trRow10 RN 10 +trRow02 RN 3 +trRow12 RN 5 +trRow20 RN 11 +trRow30 RN 12 +trRow32 RN 14 +trRow22 RN 7 + +;// Intermediate calculations +rowSum1 RN 4 +rowSum2 RN 6 +rowDiff1 RN 8 +rowDiff2 RN 9 + + +;// Row operated pixels +rowOp00 RN 2 +rowOp10 RN 10 +rowOp20 RN 11 +rowOp30 RN 12 +rowOp02 RN 3 +rowOp12 RN 5 +rowOp22 RN 7 +rowOp32 RN 14 + +;// Transpose for colulmn operations +trCol00 RN 2 +trCol02 RN 3 +trCol10 RN 4 +trCol12 RN 5 +trCol20 RN 6 +trCol22 RN 7 +trCol30 RN 8 +trCol32 RN 9 + +;// Intermediate calculations +colSum1 RN 10 +colSum2 RN 11 +colDiff1 RN 12 +colDiff2 RN 14 + + +;// Coloumn operated pixels +colOp00 RN 2 +colOp02 RN 3 +colOp10 RN 4 +colOp12 RN 5 +colOp20 RN 6 +colOp22 RN 7 +colOp30 RN 8 +colOp32 RN 9 + +;// Temporary scratch varaibles +pQPDivTable RN 0 +pQPModTable RN 11 +Shift RN 10 +Scale RN 14 +Round RN 0 + +temp1 RN 10 +temp2 RN 11 +temp3 RN 12 +temp4 RN 1 + + + +;// InvTransformed and Dequantized pixels +out00 RN 2 +out02 RN 3 +out10 RN 4 +out12 RN 5 +out20 RN 6 +out22 RN 7 +out30 RN 8 +out32 RN 9 + + + + + ;// Allocate stack memory required by the function + M_ALLOC4 pDataOnStack, 4 + + ;// Write function header + M_START armVCM4P10_InvTransformDequantLumaDC4x4,r11 + + ;****************************************************************** + ;// The strategy used in implementing the transform is as follows:* + ;// Load the 4x4 block into 8 registers * + ;// Transpose the 4x4 matrix * + ;// Perform the row operations (on columns) using SIMD * + ;// Transpose the 4x4 result matrix * + ;// Perform the coloumn operations * + ;// Store the 4x4 block at one go * + ;****************************************************************** + + ;// Load all the 4x4 pixels + + LDMIA pData,{in00,in02,in10,in12,in20,in22,in30,in32} + + ;//***************************************************************** + ;// + ;// Transpose the matrix inorder to perform row ops as coloumn ops + ;// Input: in[][] = original matrix + ;// Output: trRow[][]= transposed matrix + ;// Step1: Obtain the LL part of the transposed matrix + ;// Step2: Obtain the HL part + ;// step3: Obtain the LH part + ;// Step4: Obtain the HH part + ;// + ;//***************************************************************** + + ;// LL 2x2 transposed matrix + ;// d0 d1 - - + ;// d4 d5 - - + ;// - - - - + ;// - - - - + + PKHTB trRow10,in10,in00,ASR #16 ;// [5 4] = [f5:f1] + PKHBT trRow00,in00,in10,LSL #16 ;// [1 0] = [f4:f0] + + ;// HL 2x2 transposed matrix + ;// - - - - + ;// - - - - + ;// d8 d9 - - + ;// d12 d13 - - + + + PKHTB trRow30,in12,in02,ASR #16 ;// [13 12] = [7 3] + PKHBT trRow20,in02,in12,LSL #16 ;// [9 8] = [6 2] + + ;// LH 2x2 transposed matrix + ;// - - d2 d3 + ;// - - d6 d7 + ;// - - - - + ;// - - - - + + PKHBT trRow02,in20,in30,LSL #16 ;// [3 2] = [f12:f8] + PKHTB trRow12,in30,in20,ASR #16 ;// [7 6] = [f13:f9] + + + + + ;// HH 2x2 transposed matrix + ;// - - - - + ;// - - - - + ;// - - d10 d11 + ;// - - d14 d15 + + PKHTB trRow32,in32,in22,ASR #16 ;// [15 14] = [15 11] + PKHBT trRow22,in22,in32,LSL #16 ;// [11 10] = [14 10] + + + ;**************************************** + ;// Row Operations (Performed on columns) + ;**************************************** + + + ;// SIMD operations on first two columns(two rows of the original matrix) + + SADD16 rowSum1,trRow00,trRow10 ;// (c0+c1) + SADD16 rowSum2,trRow20,trRow30 ;// (c2+c3) + SSUB16 rowDiff1,trRow00,trRow10 ;// (c0-c1) + SSUB16 rowDiff2,trRow20,trRow30 ;// (c2-c3) + SADD16 rowOp00,rowSum1,rowSum2 ;// (c0+c1+c2+c3) + SSUB16 rowOp10,rowSum1,rowSum2 ;// (c0+c1-c2-c3) + SSUB16 rowOp20,rowDiff1,rowDiff2 ;// (c0-c1-c2+c3) + SADD16 rowOp30,rowDiff1,rowDiff2 ;// (c0-c1+c2-c3) + + + ;// SIMD operations on next two columns(next two rows of the original matrix) + + SADD16 rowSum1,trRow02,trRow12 ;// (c0+c1) + SADD16 rowSum2,trRow22,trRow32 ;// (c2+c3) + SSUB16 rowDiff1,trRow02,trRow12 ;// (c0-c1) + SSUB16 rowDiff2,trRow22,trRow32 ;// (c2-c3) + SADD16 rowOp02,rowSum1,rowSum2 ;// (c0+c1+c2+c3) + SSUB16 rowOp12,rowSum1,rowSum2 ;// (c0+c1-c2-c3) + SSUB16 rowOp22,rowDiff1,rowDiff2 ;// (c0-c1-c2+c3) + SADD16 rowOp32,rowDiff1,rowDiff2 ;// (c0-c1+c2-c3) + + + + ;***************************************************************** + ;// Transpose the resultant matrix + ;// Input: rowOp[][] + ;// Output: trCol[][] + ;***************************************************************** + + ;// LL 2x2 transposed matrix + ;// d0 d1 - - + ;// d4 d5 - - + ;// - - - - + ;// - - - - + + PKHTB trCol10,rowOp10,rowOp00,ASR #16 ;// [5 4] = [f5:f1] + PKHBT trCol00,rowOp00,rowOp10,LSL #16 ;// [1 0] = [f4:f0] + + ;// HL 2x2 transposed matrix + ;// - - - - + ;// - - - - + ;// d8 d9 - - + ;// d12 d13 - - + + + PKHTB trCol30,rowOp12,rowOp02,ASR #16 ;// [13 12] = [7 3] + PKHBT trCol20,rowOp02,rowOp12,LSL #16 ;// [9 8] = [6 2] + + ;// LH 2x2 transposed matrix + ;// - - d2 d3 + ;// - - d6 d7 + ;// - - - - + ;// - - - - + + PKHBT trCol02,rowOp20,rowOp30,LSL #16 ;// [3 2] = [f12:f8] + PKHTB trCol12,rowOp30,rowOp20,ASR #16 ;// [7 6] = [f13:f9] + + + + + ;// HH 2x2 transposed matrix + ;// - - - - + ;// - - - - + ;// - - d10 d11 + ;// - - d14 d15 + + PKHTB trCol32,rowOp32,rowOp22,ASR #16 ;// [15 14] = [15 11] + PKHBT trCol22,rowOp22,rowOp32,LSL #16 ;// [11 10] = [14 10] + + + ;******************************* + ;// Coloumn Operations + ;******************************* + + ;//-------------------------------------------------------------------------------------- + ;// Store pData(RN0) on stack and restore it only at the final store back + ;// This frees up a register (RN0) which is used to reduce number of intermediate stalls + ;//-------------------------------------------------------------------------------------- + M_STR pData,pDataOnStack + + + ;// SIMD operations on first two columns(two rows of the original matrix) + + SADD16 colSum1,trCol00,trCol10 ;// (c0+c1) + SADD16 colSum2,trCol20,trCol30 ;// (c2+c3) + SSUB16 colDiff1,trCol00,trCol10 ;// (c0-c1) + SSUB16 colDiff2,trCol20,trCol30 ;// (c2-c3) + SADD16 colOp00,colSum1,colSum2 ;// (c0+c1+c2+c3) + SSUB16 colOp10,colSum1,colSum2 ;// (c0+c1-c2-c3) + SSUB16 colOp20,colDiff1,colDiff2 ;// (c0-c1-c2+c3) + SADD16 colOp30,colDiff1,colDiff2 ;// (c0-c1+c2-c3) + + + ;// SIMD operations on next two columns(next two rows of the original matrix) + + LDR pQPDivTable, =armVCM4P10_QPDivTable ;// QP Division look-up-table base pointer + SADD16 colSum1,trCol02,trCol12 ;// (c0+c1) + SADD16 colSum2,trCol22,trCol32 ;// (c2+c3) + SSUB16 colDiff1,trCol02,trCol12 ;// (c0-c1) + SSUB16 colDiff2,trCol22,trCol32 ;// (c2-c3) + SADD16 colOp02,colSum1,colSum2 ;// (c0+c1+c2+c3) + SSUB16 colOp12,colSum1,colSum2 ;// (c0+c1-c2-c3) + LDR pQPModTable, =armVCM4P10_VMatrixQPModTable ;// QP Modulo look-up-table base pointer + LDRSB Shift, [pQPDivTable, QP] ;// Shift = pQPDivTable[QP] + SSUB16 colOp22,colDiff1,colDiff2 ;// (c0-c1-c2+c3) + SADD16 colOp32,colDiff1,colDiff2 ;// (c0-c1+c2-c3) + + + LDRSB Scale, [pQPModTable, QP] ;// Scale = pQPModTable[QP] + + ;//---------------------------------------------------------------------- + ;// + ;// improves on the c-reference code + ;// Both the cases i.e., Shift>=0 and Shift<0 cases are covered together + ;// We do not subtract 2 from Shift as in C reference, instead perform a + ;// Scale << Shift once in the beginning and do a right shift by a + ;// constant 2 after the Multiplication. The value of Round would be 2 + ;// + ;// By doing this we aviod the Branches required and also + ;// reduce the code size substantially + ;// + ;//---------------------------------------------------------------------- + + MOV Round, #2 ;// Round = 2 + LSL Scale, Scale, Shift ;// Scale = Scale << Shift + + + ;// Row 1 + SMLABB temp1, colOp00, Scale, Round ;// Temp1 = B(c0w0) * Scale + Round + SMLABB temp3, colOp02, Scale, Round ;// Temp3 = B(c1w0) * Scale + Round + SMLATB temp2, colOp00, Scale, Round ;// Temp2 = T(c0w0) * Scale + Round + SMLATB temp4, colOp02, Scale, Round ;// Temp4 = T(c1w0) * Scale + Round + + ASR temp1, temp1, #2 ;// Temp1 = Temp1 >> 2 + ASR temp3, temp3, #2 ;// Temp3 = Temp3 >> 2 + PKHBT out00, temp1, temp2, LSL #14 ;// c0w0 = | Temp2 | Temp1 | + PKHBT out02, temp3, temp4, LSL #14 ;// c1w0 = | Temp2 | Temp1 | + + + ;// Row 2 + SMLABB temp1, colOp10, Scale, Round ;// Temp1 = B(c0w0) * Scale + Round + SMLABB temp3, colOp12, Scale, Round ;// Temp3 = B(c1w0) * Scale + Round + SMLATB temp2, colOp10, Scale, Round ;// Temp2 = T(c0w0) * Scale + Round + SMLATB temp4, colOp12, Scale, Round ;// Temp4 = T(c1w0) * Scale + Round + + ASR temp1, temp1, #2 ;// Temp1 = Temp1 >> 2 + ASR temp3, temp3, #2 ;// Temp3 = Temp3 >> 2 + PKHBT out10, temp1, temp2, LSL #14 ;// c0w0 = | Temp2 | Temp1 | + PKHBT out12, temp3, temp4, LSL #14 ;// c1w0 = | Temp2 | Temp1 | + + ;// Row 3 + SMLABB temp1, colOp20, Scale, Round ;// Temp1 = B(c0w0) * Scale + Round + SMLABB temp3, colOp22, Scale, Round ;// Temp3 = B(c1w0) * Scale + Round + SMLATB temp2, colOp20, Scale, Round ;// Temp2 = T(c0w0) * Scale + Round + SMLATB temp4, colOp22, Scale, Round ;// Temp4 = T(c1w0) * Scale + Round + + ASR temp1, temp1, #2 ;// Temp1 = Temp1 >> 2 + ASR temp3, temp3, #2 ;// Temp3 = Temp3 >> 2 + PKHBT out20, temp1, temp2, LSL #14 ;// c0w0 = | Temp2 | Temp1 | + PKHBT out22, temp3, temp4, LSL #14 ;// c1w0 = | Temp2 | Temp1 | + + ;// Row 4 + SMLABB temp1, colOp30, Scale, Round ;// Temp1 = B(c0w0) * Scale + Round + SMLABB temp3, colOp32, Scale, Round ;// Temp3 = B(c1w0) * Scale + Round + SMLATB temp2, colOp30, Scale, Round ;// Temp2 = T(c0w0) * Scale + Round + SMLATB temp4, colOp32, Scale, Round ;// Temp4 = T(c1w0) * Scale + Round + + M_LDR pData,pDataOnStack ;// Restore pData pointer from stack + ASR temp1, temp1, #2 ;// Temp1 = Temp1 >> 2 + ASR temp3, temp3, #2 ;// Temp3 = Temp3 >> 2 + PKHBT out30, temp1, temp2, LSL #14 ;// c0w0 = | Temp2 | Temp1 | + PKHBT out32, temp3, temp4, LSL #14 ;// c1w0 = | Temp2 | Temp1 | + + + + ;*************************** + ;// Store all the 4x4 pixels + ;*************************** + +store_coeff + + STMIA pData,{out00,out02,out10,out12,out20,out22,out30,out32} + + + + ;// Set return value + + + ;// Write function tail + M_END + + ENDIF ;//ARM1136JS + + +;// Static Function: armVCM4P10_InvTransformDequantLumaDC4x4 + +;// Guarding implementation by the processor name + + + + +;// Function: omxVCM4P10_TransformDequantLumaDCFromPair + +;//Input Registers +ppSrc RN 0 +pDst RN 1 +QPR2 RN 2 + +;//Output Registers +result RN 0 + +;//Local Scratch Registers +pDstR4 RN 4 +pDstR0 RN 0 +QPR1 RN 1 +QPR5 RN 5 + +;// Guarding implementation by the processor name + + IF ARM1136JS + + ;// Allocate stack memory required by the function + + + ;// Write function header + M_START omxVCM4P10_TransformDequantLumaDCFromPair,r5 + + MOV pDstR4,pDst ;// Saving register r1 + MOV QPR5,QPR2 ;// Saving register r2 + BL armVCM4P10_UnpackBlock4x4 + + MOV pDstR0,pDstR4 ;// Setting up register r0 + MOV QPR1,QPR5 ;// Setting up register r1 + BL armVCM4P10_InvTransformDequantLumaDC4x4 + + + ;// Set return value + MOV result,#OMX_Sts_NoErr + + ;// Write function tail + M_END + + + ENDIF ;//ARM1136JS + + + END \ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h new file mode 100644 index 0000000000000000000000000000000000000000..22115d3326a79780bcafec476099d2c1dd324457 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h @@ -0,0 +1,37 @@ +/** + * + * File Name: armVCM4P2_Huff_Tables_VLC.h + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * + * File: armVCM4P2_Huff_Tables.h + * Description: Declares Tables used for Hufffman coding and decoding + * in MP4P2 codec. + * + */ + +#ifndef _OMXHUFFTAB_H_ +#define _OMXHUFFTAB_H_ + + +extern const OMX_U16 armVCM4P2_IntraVlcL0L1[200]; + + +extern const OMX_U16 armVCM4P2_InterVlcL0L1[200]; + +extern const OMX_U16 armVCM4P2_aIntraDCLumaChromaIndex[64]; +//extern const OMX_U16 armVCM4P2_aIntraDCChromaIndex[32]; +extern const OMX_U16 armVCM4P2_aVlcMVD[124]; + +extern const OMX_U8 armVCM4P2_InterL0L1LMAX[73]; +extern const OMX_U8 armVCM4P2_InterL0L1RMAX[35]; +extern const OMX_U8 armVCM4P2_IntraL0L1LMAX[53]; +extern const OMX_U8 armVCM4P2_IntraL0L1RMAX[40] + +#endif /* _OMXHUFFTAB_H_ */ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/api/armVCM4P2_ZigZag_Tables.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/api/armVCM4P2_ZigZag_Tables.h new file mode 100644 index 0000000000000000000000000000000000000000..d5f865cd8771dff4abd717d04de93bf666e265e9 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/api/armVCM4P2_ZigZag_Tables.h @@ -0,0 +1,25 @@ +/** + * + * File Name: armVCM4P2_ZigZag_Tables.h + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * + * File: armVCM4P2_Zigzag_Tables.h + * Description: Declares Tables used for Zigzag scan in MP4P2 codec. + * + */ + +#ifndef _OMXZIGZAGTAB_H +#define _OMXZIGZAGTAB_H + +extern const OMX_U8 armVCM4P2_aClassicalZigzagScan [192]; +//extern const OMX_U8 armVCM4P2_aHorizontalZigzagScan [64]; +//extern const OMX_U8 armVCM4P2_aVerticalZigzagScan [64]; + +#endif /* _OMXZIGZAGTAB_H_ */ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Clip8_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Clip8_s.s new file mode 100644 index 0000000000000000000000000000000000000000..7801e578b93a177550da288a37da5ea8fbb7df26 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Clip8_s.s @@ -0,0 +1,75 @@ +; /** +; * +; * File Name: armVCM4P2_Clip8_s.s +; * OpenMAX DL: v1.0.2 +; * Revision: 9641 +; * Date: Thursday, February 7, 2008 +; * +; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +; * +; * +; * +; * Description: +; * Contains module for Clipping 16 bit value to [0,255] Range +; */ + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + + M_VARIANTS ARM1136JS + + + + IF ARM1136JS + +;//Input Arguments + +pSrc RN 0 +pDst RN 1 +step RN 2 + +;// Local variables + +x0 RN 3 +x1 RN 4 +x2 RN 5 +x3 RN 6 + +Count RN 14 + + + M_START armVCM4P2_Clip8,r6 + + + MOV Count,#8 +loop + + LDMIA pSrc!,{x0, x1} + SUBS Count,Count, #1 ;// count down + LDMIA pSrc!,{x2, x3} + USAT16 x0, #8, x0 ;// clip two samples to [0,255] + USAT16 x1, #8, x1 ;// clip two samples to [0,255] + STRB x0, [pDst] + MOV x0, x0, LSR #16 + STRB x0, [pDst,#1] + STRB x1, [pDst,#2] + MOV x1, x1, LSR #16 + STRB x1, [pDst,#3] + + USAT16 x2, #8, x2 ;// clip two samples to [0,255] + USAT16 x3, #8, x3 ;// clip two samples to [0,255] + STRB x2, [pDst,#4] + MOV x2, x2, LSR #16 + STRB x2, [pDst,#5] + STRB x3, [pDst,#6] + MOV x3, x3, LSR #16 + STRB x3, [pDst,#7] + ADD pDst,pDst,step ;// Increment pDst by step value + + BGT loop ;// Continue loop until Count reaches 64 + + M_END + ENDIF + + END diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s new file mode 100644 index 0000000000000000000000000000000000000000..9e309006d364dfe49c3c3fca2222c9b99b54ec76 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s @@ -0,0 +1,398 @@ +;/** +; * +; * File Name: armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s +; * OpenMAX DL: v1.0.2 +; * Revision: 9641 +; * Date: Thursday, February 7, 2008 +; * +; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +; * +; * +; * +; * Description: +; * Contains modules for zigzag scanning and VLC decoding +; * for inter, intra block. +; * +; * +; * +; * Function: omxVCM4P2_DecodeVLCZigzag_AC_unsafe +; * +; * Description: +; * Performs VLC decoding and inverse zigzag scan +; * +; * +; * +; * +; */ + + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + INCLUDE armCOMM_BitDec_s.h + + + M_VARIANTS ARM1136JS + + + + + + IF ARM1136JS + + + + + +;//Input Arguments + +ppBitStream RN 0 +pBitOffset RN 1 +pDst RN 2 +shortVideoHeader RN 3 + + +;//Local Variables + +Return RN 0 + +pVlcTableL0L1 RN 4 +pLMAXTableL0L1 RN 4 +pRMAXTableL0L1 RN 4 +pZigzagTable RN 4 + +ftype RN 0 +temp3 RN 4 +temp RN 5 +Count RN 6 +Escape RN 5 + +;// armVCM4P2_FillVLDBuffer +zigzag RN 0 +storeLevel RN 1 +temp2 RN 4 +temp1 RN 5 +sign RN 5 +Last RN 7 +storeRun RN 14 + + +packRetIndex RN 5 + + +markerbit RN 5 + +;// Scratch Registers + +RBitStream RN 8 +RBitBuffer RN 9 +RBitCount RN 10 + +T1 RN 11 +T2 RN 12 +LR RN 14 + + + + M_ALLOC4 pppBitStream,4 + M_ALLOC4 ppOffset,4 + M_ALLOC4 pLinkRegister,4 + + M_START armVCM4P2_DecodeVLCZigzag_AC_unsafe + + ;// get the table addresses from stack + M_ARG ppVlcTableL0L1,4 + M_ARG ppLMAXTableL0L1,4 + M_ARG ppRMAXTableL0L1,4 + M_ARG ppZigzagTable,4 + + ;// Store ALL zeros at pDst + + MOV temp1,#0 ;// Initialize Count to zero + MOV Last,#0 + M_STR LR,pLinkRegister ;// Store Link Register on Stack + MOV temp2,#0 + MOV LR,#0 + + ;// Initialize the Macro and Store all zeros to pDst + + STM pDst!,{temp2,temp1,Last,LR} + M_BD_INIT0 ppBitStream, pBitOffset, RBitStream, RBitBuffer, RBitCount + STM pDst!,{temp2,temp1,Last,LR} + M_BD_INIT1 T1, T2, T2 + STM pDst!,{temp2,temp1,Last,LR} + M_BD_INIT2 T1, T2, T2 + STM pDst!,{temp2,temp1,Last,LR} + M_STR ppBitStream,pppBitStream ;// Store ppBitstream on stack + STM pDst!,{temp2,temp1,Last,LR} + M_STR pBitOffset,ppOffset ;// Store pBitOffset on stack + STM pDst!,{temp2,temp1,Last,LR} + + STM pDst!,{temp2,temp1,Last,LR} + STM pDst!,{temp2,temp1,Last,LR} + + + SUB pDst,pDst,#128 ;// Restore pDst + + ;// The armVCM4P2_GetVLCBits begins + +getVLCbits + + M_BD_LOOK8 Escape,7 ;// Load Escape Value + LSR Escape,Escape,#25 + CMP Escape,#3 ;// check for escape mode + MOVNE ftype,#0 + BNE notEscapemode ;// Branch if not in Escape mode 3 + + M_BD_VSKIP8 #7,T1 + CMP shortVideoHeader,#0 ;// Check shortVideoHeader flag to know the type of Escape mode + BEQ endFillVLD + + ;// Escape Mode 4 + + M_BD_READ8 Last,1,T1 + M_BD_READ8 storeRun,6,T1 + M_BD_READ8 storeLevel,8,T1 + + + ;// Check whether the Reserved values for Level are used and Exit with an Error Message if it is so + + TEQ storeLevel,#0 + TEQNE storeLevel,#128 + BEQ ExitError + + ADD temp2,storeRun,Count + CMP temp2,#64 + BGE ExitError ;// error if Count+storeRun >= 64 + + + ;// Load address of zigzagTable + + M_LDR pZigzagTable,ppZigzagTable ;// Loading the Address of Zigzag table + + + ;// armVCM4P2_FillVLDBuffer + + SXTB storeLevel,storeLevel ;// Sign Extend storeLevel to 32 bits + + + ;// To Reflect Runlength + + ADD Count,Count,storeRun + LDRB zigzag,[pZigzagTable,Count] + ADD Count,Count,#1 + STRH storeLevel,[pDst,zigzag] ;// store Level + + B ExitOk + + + +endFillVLD + + + ;// Load Ftype( Escape Mode) value based on the two successive bits in the bitstream + + M_BD_READ8 temp1,1,T1 + CMP temp1,#0 + MOVEQ ftype,#1 + BEQ notEscapemode + M_BD_READ8 temp1,1,T1 + CMP temp1,#1 + MOVEQ ftype,#3 + MOVNE ftype,#2 + + +notEscapemode + + ;// Load optimized packed VLC table with last=0 and Last=1 + + M_LDR pVlcTableL0L1,ppVlcTableL0L1 ;// Load Combined VLC Table + + + CMP ftype,#3 ;// If ftype >=3 get perform Fixed Length Decoding (Escape Mode 3) + BGE EscapeMode3 ;// Else continue normal VLC Decoding + + ;// Variable lengh decoding, "armUnPackVLC32" + + + M_BD_VLD packRetIndex,T1,T2,pVlcTableL0L1,4,2 + + + LDR temp3,=0xFFF + + CMP packRetIndex,temp3 ;// Check for invalid symbol + BEQ ExitError ;// if invalid symbol occurs exit with an error message + + AND Last,packRetIndex,#2 ;// Get Last from packed Index + + + + + LSR storeRun,packRetIndex,#7 ;// Get Run Value from Packed index + AND storeLevel,packRetIndex,#0x7c ;// storeLevel=packRetIndex[2-6],storeLevel[0-1]=0 + + + M_LDR pLMAXTableL0L1,ppLMAXTableL0L1 ;// Load LMAX table + + + LSR storeLevel,storeLevel,#2 ;// Level value + + CMP ftype,#1 + BNE ftype2 + + ;// ftype==1; Escape mode =1 + + + ADD temp1, pLMAXTableL0L1, Last, LSL#4 ;// If the Last=1 add 32 to table address + LDRB temp1,[temp1,storeRun] + + + ADD storeLevel,temp1,storeLevel + +ftype2 + + ;// ftype =2; Escape mode =2 + + M_LDR pRMAXTableL0L1,ppRMAXTableL0L1 ;// Load RMAX Table + + CMP ftype,#2 + BNE FillVLDL1 + + ADD temp1, pRMAXTableL0L1, Last, LSL#4 ;// If Last=1 add 32 to table address + SUB temp2,storeLevel,#1 + LDRB temp1,[temp1,temp2] + + + ADD storeRun,storeRun,#1 + ADD storeRun,temp1 + +FillVLDL1 + + + ;// armVCM4P2_FillVLDBuffer + + M_LDR pZigzagTable,ppZigzagTable ;// Load address of zigzagTable + + M_BD_READ8 sign,1,T1 + + CMP sign,#1 + RSBEQ storeLevel,storeLevel,#0 + + ADD temp1,storeRun,Count ;// Exit with an error message if Run + Count exceeds 63 + CMP temp1,#64 + BGE ExitError + + + + + + + ;// To Reflect Runlenght + + ADD Count,Count,storeRun + +storeLevelL1 + + LDRB zigzag,[pZigzagTable,Count] + CMP Last,#2 ;// Check if the Level val is Last non zero val + ADD Count,Count,#1 + LSR Last,Last,#1 + STRH storeLevel,[pDst,zigzag] + + BNE end + + B ExitOk + + + + ;// Fixed Lengh Decoding Escape Mode 3 + +EscapeMode3 + + M_BD_READ8 Last,1,T1 + M_BD_READ8 storeRun,6,T1 + + ADD temp2,storeRun,Count ;// Exit with an error message if Run + Count exceeds 63 + CMP temp2,#64 + BGE ExitError + + M_BD_READ8 markerbit,1,T1 + TEQ markerbit,#0 ;// Exit with an error message if marker bit is zero + BEQ ExitError + + M_BD_READ16 storeLevel,12,T1 + + TST storeLevel,#0x800 ;// test if the level is negative + SUBNE storeLevel,storeLevel,#4096 + CMP storeLevel,#0 + CMPNE storeLevel,#-2048 + BEQ ExitError ;// Exit with an error message if Level==0 or -2048 + + M_LDR pZigzagTable,ppZigzagTable ;// Load address of zigzagTable + + M_BD_READ8 markerbit,1,T1 + + + ;// armVCM4P2_FillVLDBuffer ( Sign not used as storeLevel is preprocessed) + + + + ;// To Reflect Run Length + + ADD Count,Count,storeRun + + + +storeLevelLast + + LDRB zigzag,[pZigzagTable,Count] + CMP Last,#1 + ADD Count,Count,#1 + STRH storeLevel,[pDst,zigzag] + + BNE end + + B ExitOk + +end + + CMP Count,#64 ;//Run the Loop untill Count reaches 64 + + BLT getVLCbits + + +ExitOk + ;// Exit When VLC Decoding is done Successfully + + ;// Loading ppBitStream and pBitOffset from stack + + CMP Last,#1 + M_LDR ppBitStream,pppBitStream + M_LDR pBitOffset,ppOffset + + ;//Ending the macro + + M_BD_FINI ppBitStream,pBitOffset + + MOVEQ Return,#OMX_Sts_NoErr + MOVNE Return,#OMX_Sts_Err + M_LDR LR,pLinkRegister ;// Load the Link Register Back + B exit2 + +ExitError + ;// Exit When an Error occurs + + M_LDR ppBitStream,pppBitStream + M_LDR pBitOffset,ppOffset + ;//Ending the macro + + M_BD_FINI ppBitStream,pBitOffset + M_LDR LR,pLinkRegister + MOV Return,#OMX_Sts_Err + +exit2 + + + M_END + ENDIF + + END diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c new file mode 100644 index 0000000000000000000000000000000000000000..ba4d05849b819ab5df51852c210217f27e011d41 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c @@ -0,0 +1,211 @@ + /** + * + * File Name: armVCM4P2_Huff_Tables_VLC.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * File: armVCM4P2_Huff_Tables_VLC.c + * Description: Contains all the Huffman tables used in MPEG4 codec + * + */ + +#include "omxtypes.h" +#include "armOMX.h" + +#include "armCOMM_Bitstream.h" + + + + +// Contains optimized and Packed VLC tables with Last=0 and Last=1 + +// optimized Packed VLC table Entry Format +// --------------------------------------- +// +// 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 +// +------------------------------------------------+ +// | Len | Run | Level |L | 1 | +// +------------------------------------------------+ +// | Offset | 0 | +// +------------------------------------------------+ +// If the table entry is a leaf entry then bit 0 set: +// Len = Number of bits overread (0 to 7) 3 bits +// Run = RunLength of the Symbol (0 to 63) 6 bits +// Level = Level of the Symbol (0 to 31) 5 bits +// L = Last Value of the Symbol (0 or 1) 1 bit +// +// If the table entry is an internal node then bit 0 is clear: +// Offset = Number of (16-bit) half words from the table +// start to the next table node +// +// The table is accessed by successive lookup up on the +// next Step bits of the input bitstream until a leaf node +// is obtained. The Step sizes are supplied to the VLD macro. + +// The VLC tables used for Intra and non inta coefficients in non Escape mode +// contains symbols with both Last=0 and Last=1. +// If a symbol is not found in the table it will be coded as 0xFFF + + +const OMX_U16 armVCM4P2_InterVlcL0L1[200] = { + 0x0020, 0x0108, 0x0148, 0x0170, 0x0178, 0x0180, 0x0188, 0x1b09, + 0x4009, 0x4009, 0x4009, 0x4009, 0x2109, 0x2109, 0x0209, 0x0011, + 0x0028, 0x0060, 0x00b8, 0x00e0, 0x0030, 0x0048, 0x0050, 0x0058, + 0x3fff, 0x3fff, 0x0038, 0x0040, 0x2115, 0x2115, 0x201d, 0x201d, + 0x2059, 0x2059, 0x2051, 0x2051, 0x1c0d, 0x1b0d, 0x1a0d, 0x190d, + 0x0911, 0x0811, 0x0711, 0x0611, 0x0511, 0x0319, 0x0219, 0x0121, + 0x0068, 0x0090, 0x3fff, 0x3fff, 0x0070, 0x0078, 0x0080, 0x0088, + 0x2061, 0x2061, 0x2129, 0x2129, 0x3709, 0x3709, 0x3809, 0x3809, + 0x3d0d, 0x3d0d, 0x3e0d, 0x3e0d, 0x3f0d, 0x3f0d, 0x200d, 0x200d, + 0x0098, 0x00a0, 0x00a8, 0x00b0, 0x0131, 0x0221, 0x0419, 0x0519, + 0x0619, 0x0a11, 0x1909, 0x1a09, 0x210d, 0x220d, 0x230d, 0x240d, + 0x250d, 0x260d, 0x270d, 0x280d, 0x00c0, 0x00c8, 0x00d0, 0x00d8, + 0x0049, 0x0041, 0x380d, 0x380d, 0x370d, 0x370d, 0x360d, 0x360d, + 0x350d, 0x350d, 0x340d, 0x340d, 0x330d, 0x330d, 0x320d, 0x320d, + 0x00e8, 0x00f0, 0x00f8, 0x0100, 0x310d, 0x310d, 0x2015, 0x2015, + 0x3609, 0x3609, 0x3509, 0x3509, 0x3409, 0x3409, 0x3309, 0x3309, + 0x3209, 0x3209, 0x3109, 0x3109, 0x0110, 0x0130, 0x0138, 0x0140, + 0x0118, 0x0120, 0x0128, 0x100d, 0x3009, 0x3009, 0x2f09, 0x2f09, + 0x2411, 0x2411, 0x2311, 0x2311, 0x2039, 0x2039, 0x2031, 0x2031, + 0x0f0d, 0x0e0d, 0x0d0d, 0x0c0d, 0x0b0d, 0x0a0d, 0x090d, 0x0e09, + 0x0d09, 0x0211, 0x0119, 0x0029, 0x0150, 0x0158, 0x0160, 0x0168, + 0x280d, 0x280d, 0x270d, 0x270d, 0x260d, 0x260d, 0x250d, 0x250d, + 0x2c09, 0x2c09, 0xb759, 0xb759, 0x2a09, 0x2a09, 0x2021, 0x2021, + 0x040d, 0x030d, 0x0b35, 0x010d, 0x0909, 0x0809, 0x0709, 0x0609, + 0x0111, 0x0019, 0x2509, 0x2509, 0x2409, 0x2409, 0x2309, 0x2309 +}; + + +const OMX_U16 armVCM4P2_IntraVlcL0L1[200] = { + 0x0020, 0x0108, 0x0148, 0x0170, 0x0178, 0x0180, 0x0188, 0x0f09, + 0x4009, 0x4009, 0x4009, 0x4009, 0x2011, 0x2011, 0x0109, 0x0019, + 0x0028, 0x0060, 0x00b8, 0x00e0, 0x0030, 0x0048, 0x0050, 0x0058, + 0x3fff, 0x3fff, 0x0038, 0x0040, 0x203d, 0x203d, 0x2035, 0x2035, + 0x20b1, 0x20b1, 0x20a9, 0x20a9, 0x0215, 0x011d, 0x002d, 0x0d09, + 0x0519, 0x0811, 0x0419, 0x0321, 0x0221, 0x0139, 0x00a1, 0x0099, + 0x0068, 0x0090, 0x3fff, 0x3fff, 0x0070, 0x0078, 0x0080, 0x0088, + 0x20b9, 0x20b9, 0x20c1, 0x20c1, 0x2141, 0x2141, 0x2911, 0x2911, + 0x2315, 0x2315, 0x2415, 0x2415, 0x2f0d, 0x2f0d, 0x300d, 0x300d, + 0x0098, 0x00a0, 0x00a8, 0x00b0, 0x00c9, 0x00d1, 0x00d9, 0x0149, + 0x0619, 0x0151, 0x0229, 0x0719, 0x0e09, 0x0045, 0x0515, 0x0615, + 0x110d, 0x120d, 0x130d, 0x140d, 0x00c0, 0x00c8, 0x00d0, 0x00d8, + 0x0091, 0x0089, 0x2e0d, 0x2e0d, 0x2d0d, 0x2d0d, 0x2c0d, 0x2c0d, + 0x2b0d, 0x2b0d, 0x2a0d, 0x2a0d, 0x2115, 0x2115, 0x2025, 0x2025, + 0x00e8, 0x00f0, 0x00f8, 0x0100, 0x2c09, 0x2c09, 0x2b09, 0x2b09, + 0x2711, 0x2711, 0x2611, 0x2611, 0x2511, 0x2511, 0x2319, 0x2319, + 0x2219, 0x2219, 0x2131, 0x2131, 0x0110, 0x0130, 0x0138, 0x0140, + 0x0118, 0x0120, 0x0128, 0x080d, 0x2129, 0x2129, 0x2081, 0x2081, + 0x2411, 0x2411, 0x2079, 0x2079, 0x2071, 0x2071, 0x2069, 0x2069, + 0x1bb5, 0x060d, 0x001d, 0xd3f9, 0x0909, 0x0809, 0x090d, 0x0311, + 0x0121, 0x0061, 0x0059, 0x0051, 0x0150, 0x0158, 0x0160, 0x0168, + 0x240d, 0x240d, 0x230d, 0x230d, 0x2609, 0x2609, 0x250d, 0x250d, + 0x2709, 0x2709, 0x2211, 0x2211, 0x2119, 0x2119, 0x2049, 0x2049, + 0x0015, 0x0509, 0x020d, 0x010d, 0x0409, 0x0309, 0x0041, 0x0039, + 0x0111, 0x0031, 0x2209, 0x2209, 0x2029, 0x2029, 0x2021, 0x2021 +}; + +const OMX_U16 armVCM4P2_aIntraDCLumaChromaIndex[64] = { + 0x0020, 0x000b, 0x2009, 0x2009, 0x2007, 0x2007, 0x2001, 0x2001, + 0x4005, 0x4005, 0x4005, 0x4005, 0x4003, 0x4003, 0x4003, 0x4003, + 0x0028, 0x000f, 0x200d, 0x200d, 0x0030, 0x0013, 0x2011, 0x2011, + 0x0038, 0x0017, 0x2015, 0x2015, 0x3fff, 0x3fff, 0x2019, 0x2019, + + 0x0020, 0x0009, 0x2007, 0x2007, 0x4005, 0x4005, 0x4005, 0x4005, + 0x4003, 0x4003, 0x4003, 0x4003, 0x4001, 0x4001, 0x4001, 0x4001, + 0x0028, 0x000d, 0x200b, 0x200b, 0x0030, 0x0011, 0x200f, 0x200f, + 0x0038, 0x0015, 0x2013, 0x2013, 0x1fff, 0x0019, 0x2017, 0x2017 +}; + + +const OMX_U16 armVCM4P2_aVlcMVD[124] = { + 0x0010, 0x00f0, 0x0043, 0x003f, 0x4041, 0x4041, 0x4041, 0x4041, + 0x0018, 0x00d8, 0x0047, 0x003b, 0x0020, 0x0080, 0x00a8, 0x00d0, + 0x0028, 0x0048, 0x0070, 0x0078, 0x1fff, 0x0030, 0x0038, 0x0040, + 0x0081, 0x0001, 0x007f, 0x0003, 0x207d, 0x207d, 0x2005, 0x2005, + 0x207b, 0x207b, 0x2007, 0x2007, 0x0050, 0x0058, 0x0060, 0x0068, + 0x2079, 0x2079, 0x2009, 0x2009, 0x2077, 0x2077, 0x200b, 0x200b, + 0x2075, 0x2075, 0x200d, 0x200d, 0x2073, 0x2073, 0x200f, 0x200f, + 0x0071, 0x0011, 0x006f, 0x0013, 0x006d, 0x0015, 0x006b, 0x0017, + 0x0088, 0x0090, 0x0098, 0x00a0, 0x0069, 0x0019, 0x0067, 0x001b, + 0x0065, 0x001d, 0x0063, 0x001f, 0x0061, 0x0021, 0x005f, 0x0023, + 0x005d, 0x0025, 0x005b, 0x0027, 0x00b0, 0x00b8, 0x00c0, 0x00c8, + 0x0059, 0x0029, 0x0057, 0x002b, 0x2055, 0x2055, 0x202d, 0x202d, + 0x2053, 0x2053, 0x202f, 0x202f, 0x2051, 0x2051, 0x2031, 0x2031, + 0x204f, 0x204f, 0x2033, 0x2033, 0x00e0, 0x00e8, 0x0049, 0x0039, + 0x204d, 0x204d, 0x2035, 0x2035, 0x204b, 0x204b, 0x2037, 0x2037, + 0x2045, 0x2045, 0x203d, 0x203d +}; + +/* LMAX table for non Inter (Last == 0 and Last=1) + Level - 1 Indexed + padded armVCM4P2_InterL0L1LMAX[27-31] with zeros to acess entries for Last=1 effectively + +*/ +const OMX_U8 armVCM4P2_InterL0L1LMAX[73] = +{ + 12, 6, 4, 3, 3, 3, 3, 2, + 2, 2, 2, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 0, 0, 0, 0, 0, + 3, 2, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1 +}; + +/* RMAX table for non Inter (Last == 0 and Last=1) + Level - 1 Indexed + padded armVCM4P2_InterL0L1RMAX[12-31] with zeros to access entries for Last=1 table effectively */ + + +const OMX_U8 armVCM4P2_InterL0L1RMAX[35] = +{ + 26, 10, 6, 2, 1, 1, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, 40, 1, 0 +}; + +/* LMAX table for non Intra (Last == 0 and Last=1) + Level - 1 Indexed + padded armVCM4P2_IntraL0L1LMAX[15-31] with zeros to acess entries for Last=1 effectively + +*/ +const OMX_U8 armVCM4P2_IntraL0L1LMAX[53] = +{ + 27, 10, 5, 4, 3, 3, 3, + 3, 2, 2, 1, 1, 1, 1, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + + 8, 3, 2, 2, 2, 2, 2, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1 +}; + + +/* RMAX table for non Inter (Last == 0 and Last=1) + Level - 1 Indexed + padded armVCM4P2_IntraL0L1RMAX[27-31] with zeros to access entries for Last=1 table effectively */ + + +const OMX_U8 armVCM4P2_IntraL0L1RMAX[40] = +{ + 14, 9, 7, 3, 2, 1, 1, + 1, 1, 1, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, + + 20, 6, 1, 0, 0, 0, 0, 0 + +}; diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Lookup_Tables.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Lookup_Tables.c new file mode 100644 index 0000000000000000000000000000000000000000..25cf8db6a399ad1d160d103ea956644c4a26d5da --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Lookup_Tables.c @@ -0,0 +1,75 @@ + /** + * + * File Name: armVCM4P2_Lookup_Tables.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * File: armVCM4P2_Lookup_Tables.c + * Description: Contains all the Lookup tables used in MPEG4 codec + * + */ + +#include "omxtypes.h" +#include "armOMX.h" + + /* * Table Entries contain Dc Scaler values + * armVCM4P2_DCScaler[i]= 8 for i=1 to 4 and i=33 to 36 + * = 2*i for i=5 to 8 + * = i+8 for i=9 to 25 + * = 2*i-16 for i=26 to 31 + * = (i-32+13)/2 for i=37 to 59 + * = i-6-32 for i=60 to 63 + * = 255 for i=0 and i=32 + */ + +const OMX_U8 armVCM4P2_DCScaler[64]={ + 0xff, 0x8, 0x8, 0x8, 0x8, 0xa, 0xc, 0xe, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + 0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e, + 0xff, 0x8, 0x8, 0x8, 0x8, 0x9, 0x9, 0xa, + 0xa, 0xb, 0xb, 0xc, 0xc, 0xd, 0xd, 0xe, + 0xe, 0xf, 0xf, 0x10, 0x10, 0x11, 0x11, 0x12, + 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, + +}; + + + /* Table Entries Contain reciprocal of 1 to 63 + * armVCM4P2_Reciprocal_QP_S16[i]=round(32767/i) + * armVCM4P2_Reciprocal_QP_S16[0]= 0 + */ + +const OMX_S16 armVCM4P2_Reciprocal_QP_S16[64]={ + 0x0000,0x7fff,0x4000,0x2aaa,0x2000,0x1999,0x1555,0x1249, + 0x1000,0x0e39,0x0ccd,0x0ba3,0x0aab,0x09d9,0x0925,0x0888, + 0x0800,0x0787,0x071c,0x06bd,0x0666,0x0618,0x05d1,0x0591, + 0x0555,0x051f,0x04ec,0x04be,0x0492,0x046a,0x0444,0x0421, + 0x0400,0x03e1,0x03c4,0x03a8,0x038e,0x0376,0x035e,0x0348, + 0x0333,0x031f,0x030c,0x02fa,0x02e9,0x02d8,0x02c8,0x02b9, + 0x02ab,0x029d,0x028f,0x0282,0x0276,0x026a,0x025f,0x0254, + 0x0249,0x023f,0x0235,0x022b,0x0222,0x0219,0x0211,0x0208 + +}; + + /* Table Entries Contain reciprocal of 1 to 63 + * armVCM4P2_Reciprocal_QP_S32[i]=round(131071/i) + * armVCM4P2_Reciprocal_QP_S32[0]= 0 + */ + +const OMX_S32 armVCM4P2_Reciprocal_QP_S32[64]={ + 0x00000000,0x0001ffff,0x00010000,0x0000aaaa, 0x00008000, 0x00006666, 0x00005555, 0x00004924, + 0x00004000,0x000038e3,0x00003333,0x00002e8c, 0x00002aab, 0x00002762, 0x00002492, 0x00002222, + 0x00002000,0x00001e1e,0x00001c72,0x00001af2, 0x0000199a, 0x00001861, 0x00001746, 0x00001643, + 0x00001555,0x0000147b,0x000013b1,0x000012f6, 0x00001249, 0x000011a8, 0x00001111, 0x00001084, + 0x00001000,0x00000f84,0x00000f0f,0x00000ea1, 0x00000e39, 0x00000dd6, 0x00000d79, 0x00000d21, + 0x00000ccd,0x00000c7d,0x00000c31,0x00000be8, 0x00000ba3, 0x00000b61, 0x00000b21, 0x00000ae5, + 0x00000aab,0x00000a73,0x00000a3d,0x00000a0a, 0x000009d9, 0x000009a9, 0x0000097b, 0x0000094f, + 0x00000925,0x000008fb,0x000008d4,0x000008ae, 0x00000889, 0x00000865, 0x00000842, 0x00000820 + +}; diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_SetPredDir_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_SetPredDir_s.s new file mode 100644 index 0000000000000000000000000000000000000000..3f92d85eed993774012d9f5b6467d41187ac7f3d --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_SetPredDir_s.s @@ -0,0 +1,104 @@ +;// +;// +;// File Name: armVCM4P2_SetPredDir_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + +; ** +; * Function: armVCM4P2_SetPredDir +; * +; * Description: +; * Performs detecting the prediction direction +; * +; * Remarks: +; * +; * Parameters: +; * [in] blockIndex block index indicating the component type and +; * position as defined in subclause 6.1.3.8, of ISO/IEC +; * 14496-2. Furthermore, indexes 6 to 9 indicate the +; * alpha blocks spatially corresponding to luminance +; * blocks 0 to 3 in the same macroblock. +; * [in] pCoefBufRow pointer to the coefficient row buffer +; * [in] pQpBuf pointer to the quantization parameter buffer +; * [out]predQP quantization parameter of the predictor block +; * [out]predDir indicates the prediction direction which takes one +; * of the following values: +; * OMX_VC_HORIZONTAL predict horizontally +; * OMX_VC_VERTICAL predict vertically +; * +; * Return Value: +; * Standard OMXResult result. See enumeration for possible result codes. +; * +; */ + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + INCLUDE omxVC_s.h + + + M_VARIANTS ARM1136JS + + + IF ARM1136JS + +;// Input Arguments +BlockIndex RN 0 +pCoefBufRow RN 1 +pCoefBufCol RN 2 +predDir RN 3 +predQP RN 4 +pQpBuf RN 5 + +;// Local Variables + +Return RN 0 +blockDCLeft RN 6 +blockDCTop RN 7 +blockDCTopLeft RN 8 +temp1 RN 9 +temp2 RN 14 + + M_START armVCM4P2_SetPredDir,r9 + + M_ARG ppredQP,4 + M_ARG ppQpBuf,4 + + LDRH blockDCTopLeft,[pCoefBufRow,#-16] + LDRH blockDCLeft,[pCoefBufCol] + + TEQ BlockIndex,#3 + LDREQH blockDCTop,[pCoefBufCol,#-16] + LDRNEH blockDCTop,[pCoefBufRow] + + SUBS temp1,blockDCLeft,blockDCTopLeft + RSBLT temp1,temp1,#0 + SUBS temp2,blockDCTopLeft,blockDCTop + RSBLT temp2,temp2,#0 + + M_LDR pQpBuf,ppQpBuf + M_LDR predQP,ppredQP + CMP temp1,temp2 + MOV temp2,#OMX_VC_VERTICAL + LDRLTB temp1,[pQpBuf,#1] + STRLT temp2,[predDir] + STRLT temp1,[predQP] + MOV temp2,#OMX_VC_HORIZONTAL + LDRGEB temp1,[pQpBuf] + STRGE temp2,[predDir] + MOV Return,#OMX_Sts_NoErr + STRGE temp1,[predQP] + + + + M_END + + ENDIF + + END + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c new file mode 100644 index 0000000000000000000000000000000000000000..ed17f9b4de9faee50f73a0bac593cb27b2992c54 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c @@ -0,0 +1,61 @@ +/** + * + * File Name: armVCM4P2_Zigzag_Tables.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * File: armVCM4P2_ZigZag_Tables.c + * Description: Contains the zigzag tables + * + */ + +#include "omxtypes.h" + +/* Contains Double the values in the reference Zigzag Table + * Contains Classical,Vetical and Horizontal Zigzagscan tables in one array + */ + +const OMX_U8 armVCM4P2_aClassicalZigzagScan [192] = +{ + 0, 2, 16, 32, 18, 4, 6, 20, + 34, 48, 64, 50, 36, 22, 8, 10, + 24, 38, 52, 66, 80, 96, 82, 68, + 54, 40, 26, 12, 14, 28, 42, 56, + 70, 84, 98, 112, 114, 100, 86, 72, + 58, 44, 30, 46, 60, 74, 88, 102, + 116, 118, 104, 90, 76, 62, 78, 92, + 106, 120, 122, 104, 94, 110, 124, 126, + + 0, 16, 32, 48, 2, 18, 4, 20, + 34, 50, 64, 80, 96, 112, 114, 98, + 82, 66, 52, 36, 6, 22, 8, 24, + 38, 54, 68, 84, 100, 116, 70, 86, + 102, 118, 40, 56, 10, 26, 12, 28, + 42, 58, 72, 88, 104, 120, 74, 90, + 106, 122, 44, 60, 14, 30, 46, 62, + 76, 92, 108, 124, 78, 94, 110, 126, + + 0, 2, 4, 6, 16, 18, 32, 34, + 20, 22, 8, 10, 12, 14, 30, 28, + 26, 24, 38, 36, 48, 50, 64, 66, + 52, 54, 40, 42, 44, 46, 56, 58, + 60, 62, 68, 70, 80, 82, 96, 98, + 84, 86, 72, 74, 76, 78, 88, 90, + 92, 94, 100, 102, 112, 114, 116, 118, + 104, 106, 108, 110, 120, 122, 124, 126 + + +}; + + + + + +/* End of file */ + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c new file mode 100644 index 0000000000000000000000000000000000000000..b63d295166c3d7e5726f651b1a0e36a447264b8d --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c @@ -0,0 +1,102 @@ +/** + * + * File Name: omxVCM4P2_DecodeBlockCoef_Inter.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Contains modules for inter reconstruction + * + */ + + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" + + +/** + * Function: omxVCM4P2_DecodeBlockCoef_Inter + * + * Description: + * Decodes the INTER block coefficients. Inverse quantization, inversely zigzag + * positioning and IDCT, with appropriate clipping on each step, are performed + * on the coefficients. The results (residuals) are placed in a contiguous array + * of 64 elements. For INTER block, the output buffer holds the residuals for + * further reconstruction. + * + * Remarks: + * + * Parameters: + * [in] ppBitStream pointer to the pointer to the current byte in + * the bit stream buffer. There is no boundary + * check for the bit stream buffer. + * [in] pBitOffset pointer to the bit position in the byte pointed + * to by *ppBitStream. *pBitOffset is valid within + * [0-7] + * [in] QP quantization parameter + * [in] shortVideoHeader a flag indicating presence of short_video_header; + * shortVideoHeader==1 indicates using quantization method defined in short + * video header mode, and shortVideoHeader==0 indicates normail quantization method. + * [out] ppBitStream *ppBitStream is updated after the block is decoded, so that it points to the + * current byte in the bit stream buffer. + * [out] pBitOffset *pBitOffset is updated so that it points to the current bit position in the + * byte pointed by *ppBitStream + * [out] pDst pointer to the decoded residual buffer (a contiguous array of 64 elements of + * OMX_S16 data type). Must be 16-byte aligned. + * + * Return Value: + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments + * - At least one of the following pointers is Null: ppBitStream, *ppBitStream, pBitOffset , pDst + * - At least one of the below case: + * - *pBitOffset exceeds [0,7], QP <= 0; + * - pDst not 16-byte aligned + * OMX_Sts_Err - status error + * + */ +OMXResult omxVCM4P2_DecodeBlockCoef_Inter( + const OMX_U8 ** ppBitStream, + OMX_INT * pBitOffset, + OMX_S16 * pDst, + OMX_INT QP, + OMX_INT shortVideoHeader +) +{ + /* 64 elements are needed but to align it to 16 bytes need + 15 more elements of padding */ + OMX_S16 tempBuf[79]; + OMX_S16 *pTempBuf1; + OMXResult errorCode; + /* Aligning the local buffers */ + pTempBuf1 = armAlignTo16Bytes(tempBuf); + + + /* VLD and zigzag */ + errorCode = omxVCM4P2_DecodeVLCZigzag_Inter(ppBitStream, pBitOffset, + pTempBuf1,shortVideoHeader); + armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode); + + /* Dequantization */ + errorCode = omxVCM4P2_QuantInvInter_I( + pTempBuf1, + QP); + armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode); + + /* Inverse transform */ + errorCode = omxVCM4P2_IDCT8x8blk(pTempBuf1, pDst); + armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode); + + return OMX_Sts_NoErr; +} + +/* End of file */ + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c new file mode 100644 index 0000000000000000000000000000000000000000..c609a60e37bddaf0450053d52282a9a9d1df40d4 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c @@ -0,0 +1,208 @@ +/** + * + * File Name: omxVCM4P2_DecodeBlockCoef_Intra.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Contains modules for intra reconstruction + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armVC.h" + +/** + * Function: omxVCM4P2_DecodeBlockCoef_Intra + * + * Description: + * Decodes the INTRA block coefficients. Inverse quantization, inversely zigzag + * positioning, and IDCT, with appropriate clipping on each step, are performed + * on the coefficients. The results are then placed in the output frame/plane on + * a pixel basis. For INTRA block, the output values are clipped to [0, 255] and + * written to corresponding block buffer within the destination plane. + * + * Remarks: + * + * Parameters: + * [in] ppBitStream pointer to the pointer to the current byte in + * the bit stream buffer. There is no boundary + * check for the bit stream buffer. + * [in] pBitOffset pointer to the bit position in the byte pointed + * to by *ppBitStream. *pBitOffset is valid within + * [0-7]. + * [in] step width of the destination plane + * [in/out] pCoefBufRow [in] pointer to the coefficient row buffer + * [out] updated coefficient rwo buffer + * [in/out] pCoefBufCol [in] pointer to the coefficient column buffer + * [out] updated coefficient column buffer + * [in] curQP quantization parameter of the macroblock which + * the current block belongs to + * [in] pQpBuf Pointer to a 2-element QP array. pQpBuf[0] holds the QP of the 8x8 block left to + * the current block(QPa). pQpBuf[1] holds the QP of the 8x8 block just above the + * current block(QPc). + * Note, in case the corresponding block is out of VOP bound, the QP value will have + * no effect to the intra-prediction process. Refer to subclause "7.4.3.3 Adaptive + * ac coefficient prediction" of ISO/IEC 14496-2(MPEG4 Part2) for accurate description. + * [in] blockIndex block index indicating the component type and + * position as defined in subclause 6.1.3.8, + * Figure 6-5 of ISO/IEC 14496-2. + * [in] intraDCVLC a code determined by intra_dc_vlc_thr and QP. + * This allows a mechanism to switch between two VLC + * for coding of Intra DC coefficients as per Table + * 6-21 of ISO/IEC 14496-2. + * [in] ACPredFlag a flag equal to ac_pred_flag (of luminance) indicating + * if the ac coefficients of the first row or first + * column are differentially coded for intra coded + * macroblock. + * [in] shortVideoHeader a flag indicating presence of short_video_header; + * shortVideoHeader==1 selects linear intra DC mode, + * and shortVideoHeader==0 selects nonlinear intra DC mode. + * [out] ppBitStream *ppBitStream is updated after the block is + * decoded, so that it points to the current byte + * in the bit stream buffer + * [out] pBitOffset *pBitOffset is updated so that it points to the + * current bit position in the byte pointed by + * *ppBitStream + * [out] pDst pointer to the block in the destination plane. + * pDst should be 16-byte aligned. + * [out] pCoefBufRow pointer to the updated coefficient row buffer. + * + * Return Value: + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments + * - At least one of the following pointers is NULL: ppBitStream, *ppBitStream, pBitOffset, + * pCoefBufRow, pCoefBufCol, pQPBuf, pDst. + * or + * - At least one of the below case: *pBitOffset exceeds [0,7], curQP exceeds (1, 31), + * blockIndex exceeds [0,9], step is not the multiple of 8, intraDCVLC is zero while + * blockIndex greater than 5. + * or + * - pDst is not 16-byte aligned + * OMX_Sts_Err - status error + * + */ + +OMXResult omxVCM4P2_DecodeBlockCoef_Intra( + const OMX_U8 ** ppBitStream, + OMX_INT *pBitOffset, + OMX_U8 *pDst, + OMX_INT step, + OMX_S16 *pCoefBufRow, + OMX_S16 *pCoefBufCol, + OMX_U8 curQP, + const OMX_U8 *pQPBuf, + OMX_INT blockIndex, + OMX_INT intraDCVLC, + OMX_INT ACPredFlag, + OMX_INT shortVideoHeader + ) +{ + OMX_S16 tempBuf1[79], tempBuf2[79]; + OMX_S16 *pTempBuf1, *pTempBuf2; + OMX_INT predDir, predACDir; + OMX_INT predQP; + OMXVCM4P2VideoComponent videoComp; + OMXResult errorCode; + + + /* Aligning the local buffers */ + pTempBuf1 = armAlignTo16Bytes(tempBuf1); + pTempBuf2 = armAlignTo16Bytes(tempBuf2); + + /* Setting the AC prediction direction and prediction direction */ + armVCM4P2_SetPredDir( + blockIndex, + pCoefBufRow, + pCoefBufCol, + &predDir, + &predQP, + pQPBuf); + + predACDir = predDir; + + + if (ACPredFlag == 0) + { + predACDir = OMX_VC_NONE; + } + + /* Setting the videoComp */ + if (blockIndex <= 3) + { + videoComp = OMX_VC_LUMINANCE; + } + else + { + videoComp = OMX_VC_CHROMINANCE; + } + + + /* VLD and zigzag */ + if (intraDCVLC == 1) + { + errorCode = omxVCM4P2_DecodeVLCZigzag_IntraDCVLC( + ppBitStream, + pBitOffset, + pTempBuf1, + predACDir, + shortVideoHeader, + videoComp); + armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode); + } + else + { + errorCode = omxVCM4P2_DecodeVLCZigzag_IntraACVLC( + ppBitStream, + pBitOffset, + pTempBuf1, + predACDir, + shortVideoHeader); + armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode); + } + + /* AC DC prediction */ + errorCode = omxVCM4P2_PredictReconCoefIntra( + pTempBuf1, + pCoefBufRow, + pCoefBufCol, + curQP, + predQP, + predDir, + ACPredFlag, + videoComp); + armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode); + + /* Dequantization */ + errorCode = omxVCM4P2_QuantInvIntra_I( + pTempBuf1, + curQP, + videoComp, + shortVideoHeader); + armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode); + + /* Inverse transform */ + errorCode = omxVCM4P2_IDCT8x8blk (pTempBuf1, pTempBuf2); + armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode); + + /* Placing the linear array into the destination plane and clipping + it to 0 to 255 */ + + armVCM4P2_Clip8(pTempBuf2,pDst,step); + + + return OMX_Sts_NoErr; +} + +/* End of file */ + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP_s.s new file mode 100644 index 0000000000000000000000000000000000000000..a1861da9cf947eee509a7e727471e4733eb1760f --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP_s.s @@ -0,0 +1,364 @@ +; ********** +; * +; * File Name: omxVCM4P2_DecodePadMV_PVOP_s.s +; * OpenMAX DL: v1.0.2 +; * Revision: 9641 +; * Date: Thursday, February 7, 2008 +; * +; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +; * +; * +; * +; ** +; * Function: omxVCM4P2_DecodePadMV_PVOP +; * +; * Description: +; * Decodes and pads four motion vectors of the non-intra macroblock in P-VOP. +; * The motion vector padding process is specified in subclause 7.6.1.6 of +; * ISO/IEC 14496-2. +; * +; * Remarks: +; * +; * +; * Parameters: +; * [in] ppBitStream pointer to the pointer to the current byte in +; * the bit stream buffer +; * [in] pBitOffset pointer to the bit position in the byte pointed +; * to by *ppBitStream. *pBitOffset is valid within +; * [0-7]. +; * [in] pSrcMVLeftMB pointers to the motion vector buffers of the +; * macroblocks specially at the left side of the current macroblock +; * respectively. +; * [in] pSrcMVUpperMB pointers to the motion vector buffers of the +; * macroblocks specially at the upper side of the current macroblock +; * respectively. +; * [in] pSrcMVUpperRightMB pointers to the motion vector buffers of the +; * macroblocks specially at the upper-right side of the current macroblock +; * respectively. +; * [in] fcodeForward a code equal to vop_fcode_forward in MPEG-4 +; * bit stream syntax +; * [in] MBType the type of the current macroblock. If MBType +; * is not equal to OMX_VC_INTER4V, the destination +; * motion vector buffer is still filled with the +; * same decoded vector. +; * [out] ppBitStream *ppBitStream is updated after the block is decoded, +; * so that it points to the current byte in the bit +; * stream buffer +; * [out] pBitOffset *pBitOffset is updated so that it points to the +; * current bit position in the byte pointed by +; * *ppBitStream +; * [out] pDstMVCurMB pointer to the motion vector buffer of the current +; * macroblock which contains four decoded motion vectors +; * +; * Return Value: +; * OMX_Sts_NoErr -no error +; * +; * +; * OMX_Sts_Err - status error +; * +; * + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + INCLUDE armCOMM_BitDec_s.h + INCLUDE omxVC_s.h + + M_VARIANTS ARM1136JS + + + + + IF ARM1136JS + +;//Input Arguments + +ppBitStream RN 0 +pBitOffset RN 1 +pSrcMVLeftMB RN 2 +pSrcMVUpperMB RN 3 +pSrcMVUpperRightMB RN 4 +pDstMVCurMB RN 5 +fcodeForward RN 6 +MBType RN 7 + +;//Local Variables + +zero RN 4 +one RN 4 +scaleFactor RN 1 + + +Return RN 0 + +VlcMVD RN 0 +index RN 4 +Count RN 7 + +mvHorData RN 4 +mvHorResidual RN 0 + +mvVerData RN 4 +mvVerResidual RN 0 + +temp RN 1 + +temp1 RN 3 +High RN 4 +Low RN 2 +Range RN 1 + +BlkCount RN 14 + +diffMVdx RN 0 +diffMVdy RN 1 + +;// Scratch Registers + +RBitStream RN 8 +RBitCount RN 9 +RBitBuffer RN 10 + +T1 RN 11 +T2 RN 12 +LR RN 14 + + IMPORT armVCM4P2_aVlcMVD + IMPORT omxVCM4P2_FindMVpred + + ;// Allocate stack memory + + M_ALLOC4 ppDstMVCurMB,4 + M_ALLOC4 pDstMVPredME,4 + M_ALLOC4 pBlkCount,4 + + M_ALLOC4 pppBitStream,4 + M_ALLOC4 ppBitOffset,4 + M_ALLOC4 ppSrcMVLeftMB,4 + M_ALLOC4 ppSrcMVUpperMB,4 + + M_ALLOC4 pdiffMVdx,4 + M_ALLOC4 pdiffMVdy,4 + M_ALLOC4 pHigh,4 + + + + + M_START omxVCM4P2_DecodePadMV_PVOP,r11 + + M_ARG pSrcMVUpperRightMBonStack,4 ;// pointer to pSrcMVUpperRightMB on stack + M_ARG pDstMVCurMBonStack,4 ;// pointer to pDstMVCurMB on stack + M_ARG fcodeForwardonStack,4 ;// pointer to fcodeForward on stack + M_ARG MBTypeonStack,4 ;// pointer to MBType on stack + + + + + + ;// Initializing the BitStream Macro + + M_BD_INIT0 ppBitStream, pBitOffset, RBitStream, RBitBuffer, RBitCount + M_LDR MBType,MBTypeonStack ;// Load MBType from stack + M_LDR pDstMVCurMB,pDstMVCurMBonStack ;// Load pDstMVCurMB from stack + MOV zero,#0 + + TEQ MBType,#OMX_VC_INTRA ;// Check if MBType=OMX_VC_INTRA + TEQNE MBType,#OMX_VC_INTRA_Q ;// check if MBType=OMX_VC_INTRA_Q + STREQ zero,[pDstMVCurMB] + M_BD_INIT1 T1, T2, T2 + STREQ zero,[pDstMVCurMB,#4] + M_BD_INIT2 T1, T2, T2 + STREQ zero,[pDstMVCurMB,#4] + MOVEQ Return,#OMX_Sts_NoErr + MOV BlkCount,#0 + STREQ zero,[pDstMVCurMB,#4] + + BEQ ExitOK + + TEQ MBType,#OMX_VC_INTER4V ;// Check if MBType=OMX_VC_INTER4V + TEQNE MBType,#OMX_VC_INTER4V_Q ;// Check if MBType=OMX_VC_INTER4V_Q + MOVEQ Count,#4 + + TEQ MBType,#OMX_VC_INTER ;// Check if MBType=OMX_VC_INTER + TEQNE MBType,#OMX_VC_INTER_Q ;// Check if MBType=OMX_VC_INTER_Q + MOVEQ Count,#1 + + M_LDR fcodeForward,fcodeForwardonStack ;// Load fcodeForward from stack + + ;// Storing the values temporarily on stack + + M_STR ppBitStream,pppBitStream + M_STR pBitOffset,ppBitOffset + + + SUB temp,fcodeForward,#1 ;// temp=fcodeForward-1 + MOV one,#1 + M_STR pSrcMVLeftMB,ppSrcMVLeftMB + LSL scaleFactor,one,temp ;// scaleFactor=1<<(fcodeForward-1) + M_STR pSrcMVUpperMB,ppSrcMVUpperMB + LSL scaleFactor,scaleFactor,#5 + M_STR scaleFactor,pHigh ;// [pHigh]=32*scaleFactor + + ;// VLD Decoding + + +Loop + + LDR VlcMVD, =armVCM4P2_aVlcMVD ;// Load the optimized MVD VLC table + + ;// Horizontal Data and Residual calculation + + LDR temp,=0xFFF + M_BD_VLD index,T1,T2,VlcMVD,3,2 ;// variable lenght decoding using the macro + + TEQ index,temp + BEQ ExitError ;// Exit with an Error Message if the decoded symbol is an invalied symbol + + SUB mvHorData,index,#32 ;// mvHorData=index-32 + MOV mvHorResidual,#1 ;// mvHorResidual=1 + CMP fcodeForward,#1 + TEQNE mvHorData,#0 + MOVEQ diffMVdx,mvHorData ;// if scaleFactor=1(fcodeForward=1) or mvHorData=0 diffMVdx=mvHorData + BEQ VerticalData + + SUB temp,fcodeForward,#1 + M_BD_VREAD8 mvHorResidual,temp,T1,T2 ;// get mvHorResidual from bitstream if fcodeForward>1 and mvHorData!=0 + + CMP mvHorData,#0 + RSBLT mvHorData,mvHorData,#0 ;// mvHorData=abs(mvHorData) + SUB mvHorResidual,mvHorResidual,fcodeForward + SMLABB diffMVdx,mvHorData,fcodeForward,mvHorResidual ;// diffMVdx=abs(mvHorData)*fcodeForward+mvHorResidual-fcodeForward + ADD diffMVdx,diffMVdx,#1 + RSBLT diffMVdx,diffMVdx,#0 + + ;// Vertical Data and Residual calculation + +VerticalData + + M_STR diffMVdx,pdiffMVdx ;// Store the diffMVdx on stack + LDR VlcMVD, =armVCM4P2_aVlcMVD ;// Loading the address of optimized VLC tables + + LDR temp,=0xFFF + M_BD_VLD index,T1,T2,VlcMVD,3,2 ;// VLC decoding using the macro + + TEQ index,temp + BEQ ExitError ;// Exit with an Error Message if an Invalied Symbol occurs + + SUB mvVerData,index,#32 ;// mvVerData=index-32 + MOV mvVerResidual,#1 + CMP fcodeForward,#1 + TEQNE mvVerData,#0 + MOVEQ diffMVdy,mvVerData ;// diffMVdy = mvVerData if scaleFactor=1(fcodeForward=1) or mvVerData=0 + BEQ FindMVPred + + SUB temp,fcodeForward,#1 + M_BD_VREAD8 mvVerResidual,temp,T1,T2 ;// Get mvVerResidual from bit stream if fcodeForward>1 and mnVerData!=0 + + + CMP mvVerData,#0 + RSBLT mvVerData,mvVerData,#0 + SUB mvVerResidual,mvVerResidual,fcodeForward + SMLABB diffMVdy,mvVerData,fcodeForward,mvVerResidual ;// diffMVdy=abs(mvVerData)*fcodeForward+mvVerResidual-fcodeForward + ADD diffMVdy,diffMVdy,#1 + RSBLT diffMVdy,diffMVdy,#0 + + ;//Calling the Function omxVCM4P2_FindMVpred + +FindMVPred + + M_STR diffMVdy,pdiffMVdy + ADD temp,pDstMVCurMB,BlkCount,LSL #2 ;// temp=pDstMVCurMB[BlkCount] + M_STR temp,ppDstMVCurMB ;// store temp on stack for passing as an argument to FindMVPred + + MOV temp,#0 + M_STR temp,pDstMVPredME ;// Pass pDstMVPredME=NULL as an argument + M_STR BlkCount,pBlkCount ;// Passs BlkCount as Argument through stack + + MOV temp,pSrcMVLeftMB ;// temp (RN 1)=pSrcMVLeftMB + M_LDR pSrcMVUpperRightMB,pSrcMVUpperRightMBonStack + MOV pSrcMVLeftMB,pSrcMVUpperMB ;// pSrcMVLeftMB ( RN 2) = pSrcMVUpperMB + MOV ppBitStream,pDstMVCurMB ;// ppBitStream ( RN 0) = pDstMVCurMB + MOV pSrcMVUpperMB,pSrcMVUpperRightMB ;// pSrcMVUpperMB( RN 3) = pSrcMVUpperRightMB + BL omxVCM4P2_FindMVpred ;// Branch to subroutine omxVCM4P2_FindMVpred + + ;// Store Horizontal Motion Vector + + M_LDR BlkCount,pBlkCount ;// Load BlkCount from stack + M_LDR High,pHigh ;// High=32*scaleFactor + LSL temp1,BlkCount,#2 ;// temp=BlkCount*4 + M_LDR diffMVdx,pdiffMVdx ;// Laad diffMVdx + + LDRSH temp,[pDstMVCurMB,temp1] ;// temp=pDstMVCurMB[BlkCount] + + + RSB Low,High,#0 ;// Low = -32*scaleFactor + ADD diffMVdx,temp,diffMVdx ;// diffMVdx=pDstMVCurMB[BlkCount]+diffMVdx + ADD Range,High,High ;// Range=64*ScaleFactor + SUB High,High,#1 ;// High= 32*scaleFactor-1 + + CMP diffMVdx,Low ;// If diffMVdx High diffMVdx-=Range + STRH diffMVdx,[pDstMVCurMB,temp1] + + ;// Store Vertical + + ADD temp1,temp1,#2 ;// temp1=4*BlkCount+2 + M_LDR diffMVdx,pdiffMVdy ;// Laad diffMVdy + LDRSH temp,[pDstMVCurMB,temp1] ;// temp=pDstMVCurMB[BlkCount].diffMVdy + ADD BlkCount,BlkCount,#1 ;// BlkCount=BlkCount+1 + ADD diffMVdx,temp,diffMVdx + CMP diffMVdx,Low + ADDLT diffMVdx,diffMVdx,Range ;// If diffMVdy High diffMVdy-=Range + STRH diffMVdx,[pDstMVCurMB,temp1] + + CMP BlkCount,Count + M_LDR pSrcMVLeftMB,ppSrcMVLeftMB + M_LDR pSrcMVUpperMB,ppSrcMVUpperMB + + BLT Loop ;// If BlkCount> 1, j=0..3 + ;// + ;// Similar to UHADD8 instruction, but with a rounding value of 1 added to + ;// each sum before dividing by two, if round is 1 + ;// + ;// Syntax: + ;// M_UHADD8R $dest, $x, $y, $round, $mask + ;// + ;// Inputs: + ;// $x four packed bytes, x[3] : x[2] : x[1] : x[0] + ;// $y four packed bytes, y[3] : y[2] : y[1] : y[0] + ;// $round 0 if no rounding to be added, 1 if rounding to be done + ;// $mask some register set to 0x80808080 + ;// + ;// Outputs: + ;// $dest four packed bytes, z[3] : z[2] : z[1] : z[0] + + MACRO + M_UHADD8R $dest, $x, $y, $round, $mask + IF $round = 1 + IF $dest /= $y + MVN $dest, $x + UHSUB8 $dest, $y, $dest + EOR $dest, $dest, $mask + ELSE + MVN $dest, $y + UHSUB8 $dest, $x, $dest + EOR $dest, $dest, $mask + ENDIF + ELSE + UHADD8 $dest, $x, $y + ENDIF + MEND +;// *************************************************************************** + ;// Description: + ;// Load 8 bytes from $pSrc (aligned or unaligned locations) + ;// + ;// Syntax: + ;// M_LOAD_X $pSrc, $srcStep, $out0, $out1, $scratch, $offset + ;// + ;// Inputs: + ;// $pSrc 4 byte aligned source pointer to an address just less than + ;// or equal to the data location + ;// $srcStep The stride on source + ;// $scratch A scratch register, used internally for temp calculations + ;// $offset Difference of source data location to the source pointer + ;// Use when $offset != 0 (unaligned load) + ;// + ;// Outputs: + ;// $pSrc In case the macro accepts stride, it increments the pSrc by + ;// that value, else unchanged + ;// $out0 four packed bytes, z[3] : z[2] : z[1] : z[0] + ;// $out1 four packed bytes, z[7] : z[6] : z[5] : z[4] + ;// + ;// Note: {$out0, $out1, $scratch} should be registers with ascending + ;// register numbering. In case offset is 0, $scratch is not modified. + + MACRO + M_LOAD_X $pSrc, $srcStep, $out0, $out1, $scratch, $offset + IF $offset = 0 + LDM $pSrc, {$out0, $out1} + ADD $pSrc, $pSrc, $srcStep + ELSE + LDM $pSrc, {$out0, $out1, $scratch} + ADD $pSrc, $pSrc, $srcStep + + MOV $out0, $out0, LSR #8 * $offset + ORR $out0, $out0, $out1, LSL #(32 - 8 * ($offset)) + MOV $out1, $out1, LSR #8 * $offset + ORR $out1, $out1, $scratch, LSL #(32 - 8 * ($offset)) + ENDIF + MEND + +;// *************************************************************************** + ;// Description: + ;// Loads three words for X interpolation, update pointer to next row. For + ;// X interpolation, given a truncated-4byteAligned source pointer, + ;// invariably three continous words are required from there to get the + ;// nine bytes from the source pointer for filtering. + ;// + ;// Syntax: + ;// M_LOAD_XINT $pSrc, $srcStep, $offset, $word0, $word1, $word2, $word3 + ;// + ;// Inputs: + ;// $pSrc 4 byte aligned source pointer to an address just less than + ;// or equal to the data location + ;// + ;// $srcStep The stride on source + ;// + ;// $offset Difference of source data location to the source pointer + ;// Use when $offset != 0 (unaligned load) + ;// + ;// Outputs: + ;// $pSrc Incremented by $srcStep + ;// + ;// $word0, $word1, $word2, $word3 + ;// Three of these are outputs based on the $offset parameter. + ;// The outputs are specifically generated to be processed by + ;// the M_EXT_XINT macro. Following is the illustration to show + ;// how the nine bytes are spanned for different offsets from + ;// notTruncatedForAlignmentSourcePointer. + ;// + ;// ------------------------------------------------------ + ;// | Offset | Aligned Ptr | word0 | word1 | word2 | word3 | + ;// |------------------------------------------------------| + ;// | 0 | 0 | 0123 | 4567 | 8xxx | | + ;// | 1 | -1 | x012 | 3456 | 78xx | | + ;// | 2 | -2 | xx01 | 2345 | 678x | | + ;// | 3 | -3 | xxx0 | | 1234 | 5678 | + ;// ------------------------------------------------------ + ;// + ;// where the numbering (0-8) is to designate the 9 bytes from + ;// start of a particular row. The illustration doesn't take in + ;// account the positioning of bytes with in the word and the + ;// macro combination with M_EXT_XINT will work only in little + ;// endian environs + ;// + ;// Note: {$word0, $word1, $word2, $word3} should be registers with ascending + ;// register numbering + + MACRO + M_LOAD_XINT $pSrc, $srcStep, $offset, $word0, $word1, $word2, $word3 + IF $offset /= 3 + LDM $pSrc, {$word0, $word1, $word2} + ELSE + LDM $pSrc, {$word0, $word2, $word3} + ENDIF + ADD $pSrc, $pSrc, $srcStep + MEND + +;// *************************************************************************** + ;// Description: + ;// Extract four registers of four pixels for X interpolation + ;// + ;// Syntax: + ;// M_EXT_XINT $offset, $word0, $word1, $word2, $word3 + ;// + ;// Inputs: + ;// $offset Difference of source data location to the source pointer + ;// Use when $offset != 0 (unaligned load) + ;// + ;// $word0, $word1, $word2, $word3 + ;// Three of these are inputs based on the $offset parameter. + ;// The inputs are specifically selected to be processed by + ;// the M_EXT_XINT macro. + ;// + ;// ------------------------------------------------------ + ;// | Offset | Aligned Ptr | word0 | word1 | word2 | word3 | + ;// |------------------------------------------------------| + ;// | 0 | 0 | 0123 | 4567 | 8xxx | yyyy | + ;// | 1 | -1 | x012 | 3456 | 78xx | yyyy | + ;// | 2 | -2 | xx01 | 2345 | 678x | yyyy | + ;// | 3 | -3 | xxx0 | yyyy | 1234 | 5678 | + ;// ------------------------------------------------------ + ;// + ;// Outputs: + ;// $word0, $word1, $word2, $word3 + ;// Bytes from the original source pointer (not truncated for + ;// 4 byte alignment) as shown in the table. + ;// ------------------------------- + ;// | word0 | word1 | word2 | word3 | + ;// |-------------------------------| + ;// | 0123 | 4567 | 1234 | 5678 | + ;// ------------------------------- + ;// + ;// Note: {$word0, $word1, $word2, $word3} should be registers with ascending + ;// register numbering + + MACRO + M_EXT_XINT $offset, $word0, $word1, $word2, $word3 + IF $offset = 0 + ; $word0 and $word1 are ok + ; $word2, $word3 are just 8 shifted versions + MOV $word3, $word1, LSR #8 + ORR $word3, $word3, $word2, LSL #24 + MOV $word2, $word0, LSR #8 + ORR $word2, $word2, $word1, LSL #24 + ELIF $offset = 3 + ; $word2 and $word3 are ok (taken care while loading itself) + ; set $word0 & $word1 + MOV $word0, $word0, LSR #24 + ORR $word0, $word0, $word2, LSL #8 + MOV $word1, $word2, LSR #24 + ORR $word1, $word1, $word3, LSL #8 + ELSE + MOV $word0, $word0, LSR #8 * $offset + ORR $word0, $word0, $word1, LSL #(32 - 8 * ($offset)) + MOV $word1, $word1, LSR #8 * $offset + ORR $word1, $word1, $word2, LSL #(32 - 8 * ($offset)) + + MOV $word3, $word1, LSR #8 + ORR $word3, $word3, $word2, LSL #(32 - 8 * (($offset)+1)) + MOV $word2, $word0, LSR #8 + ORR $word2, $word2, $word1, LSL #24 + ENDIF + MEND + +;// *************************************************************************** + ;// Description: + ;// Computes half-sum and xor of two inputs and puts them in the input + ;// registers in that order + ;// + ;// Syntax: + ;// M_HSUM_XOR $v0, $v1, $tmp + ;// + ;// Inputs: + ;// $v0 a, first input + ;// $v1 b, second input + ;// $tmp scratch register + ;// + ;// Outputs: + ;// $v0 (a + b)/2 + ;// $v1 a ^ b + + MACRO + M_HSUM_XOR $v0, $v1, $tmp + UHADD8 $tmp, $v0, $v1 ;// s0 = a + b + EOR $v1, $v0, $v1 ;// l0 = a ^ b + MOV $v0, $tmp ;// s0 + MEND +;// *************************************************************************** + ;// Description: + ;// Calculates average of 4 values (a,b,c,d) for HalfPixelXY predict type in + ;// mcReconBlock module. Very specific to the implementation of + ;// M_MCRECONBLOCK_HalfPixelXY done here. Uses "tmp" as scratch register and + ;// "yMask" for mask variable "0x1010101x" set in it. In yMask 4 lsbs are + ;// not significant and are used by the callee for row counter (y) + ;// + ;// Some points to note are: + ;// 1. Input is pair of pair-averages and Xors + ;// 2. $sum1 and $lsb1 are not modified and hence can be reused in another + ;// running average + ;// 3. Output is in the first argument + ;// + ;// Syntax: + ;// M_AVG4 $sum0, $lsb0, $sum1, $lsb1, $rndVal + ;// + ;// Inputs: + ;// $sum0 (a + b) >> 1, where a and b are 1st and 2nd inputs to be averaged + ;// $lsb0 (a ^ b) + ;// $sum1 (c + d) >> 1. Not modified + ;// $lsb1 (c ^ d) Not modified + ;// $rndVal Assembler Variable. 0 for rounding, 1 for no rounding + ;// + ;// Outputs: + ;// $sum0 (a + b + c + d + 1) / 4 : If no rounding + ;// (a + b + c + d + 2) / 4 : If rounding + + MACRO + M_AVG4 $sum0, $lsb0, $sum1, $lsb1, $rndVal + LCLS OP1 + LCLS OP2 + IF $rndVal = 0 ;// rounding case +OP1 SETS "AND" +OP2 SETS "ORR" + ELSE ;// Not rounding case +OP1 SETS "ORR" +OP2 SETS "AND" + ENDIF + + LCLS lsb2 + LCLS sum2 + LCLS dest + +lsb2 SETS "tmp" +sum2 SETS "$lsb0" +dest SETS "$sum0" + + $OP1 $lsb0, $lsb0, $lsb1 ;// e0 = e0 & e1 + EOR $lsb2, $sum0, $sum1 ;// e2 = s0 ^ s1 + $OP2 $lsb2, $lsb2, $lsb0 ;// e2 = e2 | e0 + AND $lsb2, $lsb2, yMask, LSR # 4 ;// e2 = e2 & mask + UHADD8 $sum2, $sum0, $sum1 ;// s2 = (s0 + s1)/2 + UADD8 $dest, $sum2, $lsb2 ;// dest = s2 + e2 + MEND +;// *************************************************************************** +;// Motion compensation handler macros +;// *************************************************************************** + ;// Description: + ;// Implement motion compensation routines using the named registers in + ;// callee function. Each of the following 4 implement the 4 predict type + ;// Each handles 8 cases each ie all the combinations of 4 types of source + ;// alignment offsets and 2 types of rounding flag + ;// + ;// Syntax: + ;// M_MCRECONBLOCK_IntegerPixel $rndVal, $offset + ;// M_MCRECONBLOCK_HalfPixelX $rndVal, $offset + ;// M_MCRECONBLOCK_HalfPixelY $rndVal, $offset + ;// M_MCRECONBLOCK_HalfPixelXY $rndVal, $offset + ;// + ;// Inputs: + ;// $rndVal Assembler Variable. 0 for rounding, 1 for no rounding + ;// $offset $pSrc MOD 4 value. Offset from 4 byte aligned location. + ;// + ;// Outputs: + ;// Outputs come in the named registers of the callee functions + ;// The macro loads the data from the source pointer, processes it and + ;// stores in the destination pointer. Does the whole prediction cycle + ;// of Motion Compensation routine for a particular predictType + ;// After this only residue addition to the predicted values remain + + MACRO + M_MCRECONBLOCK_IntegerPixel $rndVal, $offset + ;// Algorithmic Description: + ;// This handles motion compensation for IntegerPixel predictType. Both + ;// rounding cases are handled by the same code base. It is just a copy + ;// from source to destination. Two lines are done per loop to reduce + ;// stalls. Loop has been software pipelined as well for that purpose. + ;// + ;// M_LOAD_X loads a whole row in two registers and then they are stored + +CaseIntegerPixelRnd0Offset$offset +CaseIntegerPixelRnd1Offset$offset + M_LOAD_X pSrc, srcStep, tmp1, tmp2, tmp3, $offset + M_LOAD_X pSrc, srcStep, tmp3, tmp4, tmp5, $offset +YloopIntegerPixelOffset$offset + SUBS y, y, #2 + STRD tmp1, tmp2, [pDst], dstStep + STRD tmp3, tmp4, [pDst], dstStep + M_LOAD_X pSrc, srcStep, tmp1, tmp2, tmp3, $offset + M_LOAD_X pSrc, srcStep, tmp3, tmp4, tmp5, $offset + BGT YloopIntegerPixelOffset$offset + + B SwitchPredictTypeEnd + MEND +;// *************************************************************************** + MACRO + M_MCRECONBLOCK_HalfPixelX $rndVal, $offset + ;// Algorithmic Description: + ;// This handles motion compensation for HalfPixelX predictType. The two + ;// rounding cases are handled by the different code base and spanned by + ;// different macro calls. Loop has been software pipelined to reduce + ;// stalls. + ;// + ;// Filtering involves averaging a pixel with the next horizontal pixel. + ;// M_LOAD_XINT and M_EXT_XINT combination generate 4 registers, 2 with + ;// all pixels in a row with 4 pixel in each register and another 2 + ;// registers with pixels corresponding to one horizontally shifted pixel + ;// corresponding to the initial row pixels. These are set of packed + ;// registers appropriate to do 4 lane SIMD. + ;// After that M_UHADD8R macro does the averaging taking care of the + ;// rounding as required + +CaseHalfPixelXRnd$rndVal.Offset$offset + IF $rndVal = 0 + LDR mask, =0x80808080 + ENDIF + + M_LOAD_XINT pSrc, srcStep, $offset, tmp1, tmp2, tmp3, tmp4 +YloopHalfPixelXRnd$rndVal.Offset$offset + SUBS y, y, #1 + M_EXT_XINT $offset, tmp1, tmp2, tmp3, tmp4 + M_UHADD8R tmp5, tmp1, tmp3, (1-$rndVal), mask + M_UHADD8R tmp6, tmp2, tmp4, (1-$rndVal), mask + STRD tmp5, tmp6, [pDst], dstStep + M_LOAD_XINT pSrc, srcStep, $offset, tmp1, tmp2, tmp3, tmp4 + BGT YloopHalfPixelXRnd$rndVal.Offset$offset + + B SwitchPredictTypeEnd + MEND +;// *************************************************************************** + MACRO + M_MCRECONBLOCK_HalfPixelY $rndVal, $offset + ;// Algorithmic Description: + ;// This handles motion compensation for HalfPixelY predictType. The two + ;// rounding cases are handled by the different code base and spanned by + ;// different macro calls. PreLoading is used to avoid reload of same data. + ;// + ;// Filtering involves averaging a pixel with the next vertical pixel. + ;// M_LOAD_X generates 2 registers with all pixels in a row with 4 pixel in + ;// each register. These are set of packed registers appropriate to do + ;// 4 lane SIMD. After that M_UHADD8R macro does the averaging taking care + ;// of the rounding as required + +CaseHalfPixelYRnd$rndVal.Offset$offset + IF $rndVal = 0 + LDR mask, =0x80808080 + ENDIF + + M_LOAD_X pSrc, srcStep, tmp1, tmp2, tmp5, $offset ;// Pre-load +YloopHalfPixelYRnd$rndVal.Offset$offset + SUBS y, y, #2 + ;// Processing one line + M_LOAD_X pSrc, srcStep, tmp3, tmp4, tmp5, $offset + M_UHADD8R tmp1, tmp1, tmp3, (1-$rndVal), mask + M_UHADD8R tmp2, tmp2, tmp4, (1-$rndVal), mask + STRD tmp1, tmp2, [pDst], dstStep + ;// Processing another line + M_LOAD_X pSrc, srcStep, tmp1, tmp2, tmp5, $offset + M_UHADD8R tmp3, tmp3, tmp1, (1-$rndVal), mask + M_UHADD8R tmp4, tmp4, tmp2, (1-$rndVal), mask + STRD tmp3, tmp4, [pDst], dstStep + + BGT YloopHalfPixelYRnd$rndVal.Offset$offset + + B SwitchPredictTypeEnd + MEND +;// *************************************************************************** + MACRO + M_MCRECONBLOCK_HalfPixelXY $rndVal, $offset + ;// Algorithmic Description: + ;// This handles motion compensation for HalfPixelXY predictType. The two + ;// rounding cases are handled by the different code base and spanned by + ;// different macro calls. PreLoading is used to avoid reload of same data. + ;// + ;// Filtering involves averaging a pixel with the next vertical, horizontal + ;// and right-down diagonal pixels. Just as in HalfPixelX case, M_LOAD_XINT + ;// and M_EXT_XINT combination generates 4 registers with a row and its + ;// 1 pixel right shifted version, with 4 pixels in one register. Another + ;// call of that macro-combination gets another row. Then M_HSUM_XOR is + ;// called to get mutual half-sum and xor combinations of a row with its + ;// shifted version as they are inputs to the M_AVG4 macro which computes + ;// the 4 element average with rounding. Note that it is the half-sum/xor + ;// values that are preserved for next row as they can be re-used in the + ;// next call to the M_AVG4 and saves recomputation. + ;// Due to lack of register, the row counter and a masking value required + ;// in M_AVG4 are packed into a single register yMask where the last nibble + ;// holds the row counter values and rest holds the masking variable left + ;// shifted by 4 + +CaseHalfPixelXYRnd$rndVal.Offset$offset + LDR yMask, =((0x01010101 << 4) + 8) + + M_LOAD_XINT pSrc, srcStep, $offset, t00, t01, t10, t11 ;// Load a, a', b, b' + M_EXT_XINT $offset, t00, t01, t10, t11 + M_HSUM_XOR t00, t10, tmp ;// s0, l0 + M_HSUM_XOR t01, t11, tmp ;// s0', l0' + +YloopHalfPixelXYRnd$rndVal.Offset$offset + ;// Processsing one line + ;// t00, t01, t10, t11 required from previous loop + M_LOAD_XINT pSrc, srcStep, $offset, t20, t21, t30, t31 ;// Load c, c', d, d' + SUB yMask, yMask, #2 + M_EXT_XINT $offset, t20, t21, t30, t31 + M_HSUM_XOR t20, t30, tmp ;// s1, l1 + M_HSUM_XOR t21, t31, tmp ;// s1', l1' + M_AVG4 t00, t10, t20, t30, $rndVal ;// s0, l0, s1, l1 + M_AVG4 t01, t11, t21, t31, $rndVal ;// s0', l0', s1', l1' + STRD t00, t01, [pDst], dstStep ;// store the average + + ;// Processsing another line + ;// t20, t21, t30, t31 required from above + M_LOAD_XINT pSrc, srcStep, $offset, t00, t01, t10, t11 ;// Load a, a', b, b' + TST yMask, #7 + M_EXT_XINT $offset, t00, t01, t10, t11 + M_HSUM_XOR t00, t10, tmp + M_HSUM_XOR t01, t11, tmp + M_AVG4 t20, t30, t00, t10, $rndVal + M_AVG4 t21, t31, t01, t11, $rndVal + STRD t20, t21, [pDst], dstStep + + BGT YloopHalfPixelXYRnd$rndVal.Offset$offset + + IF $offset/=3 :LOR: $rndVal/=1 + B SwitchPredictTypeEnd + ENDIF + MEND +;// *************************************************************************** +;// Motion compensation handler macros end here +;// *************************************************************************** + ;// Description: + ;// Populates all 4 kinds of offsets "cases" for each predictType and rndVal + ;// combination in the "switch" to prediction processing code segment + ;// + ;// Syntax: + ;// M_CASE_OFFSET $rnd, $predictType + ;// + ;// Inputs: + ;// $rnd 0 for rounding, 1 for no rounding + ;// $predictType The prediction mode + ;// + ;// Outputs: + ;// Populated list of "M_CASE"s for the "M_SWITCH" macro + + MACRO + M_CASE_OFFSET $rnd, $predictType + M_CASE Case$predictType.Rnd$rnd.Offset0 + M_CASE Case$predictType.Rnd$rnd.Offset1 + M_CASE Case$predictType.Rnd$rnd.Offset2 + M_CASE Case$predictType.Rnd$rnd.Offset3 + MEND +;// *************************************************************************** + ;// Description: + ;// Populates all 2 kinds of rounding "cases" for each predictType in the + ;// "switch" to prediction processing code segment + ;// + ;// Syntax: + ;// M_CASE_OFFSET $predictType + ;// + ;// Inputs: + ;// $predictType The prediction mode + ;// + ;// Outputs: + ;// Populated list of "M_CASE_OFFSET" macros + + MACRO + M_CASE_MCRECONBLOCK $predictType + M_CASE_OFFSET 0, $predictType ;// 0 for rounding + M_CASE_OFFSET 1, $predictType ;// 1 for no rounding + MEND +;// *************************************************************************** + ;// Description: + ;// Populates all 8 kinds of rounding and offset combinations handling macros + ;// for the specified predictType. In case of "IntegerPixel" predictType, + ;// rounding is not required so same code segment handles both cases + ;// + ;// Syntax: + ;// M_MCRECONBLOCK $predictType + ;// + ;// Inputs: + ;// $predictType The prediction mode + ;// + ;// Outputs: + ;// Populated list of "M_MCRECONBLOCK_" macros for specified + ;// predictType. Each + ;// M_MCRECONBLOCK_ $rnd, $offset + ;// is an code segment (starting with a label indicating the predictType, + ;// rounding and offset combination) + ;// Four calls of this macro with the 4 prediction modes populate all the 32 + ;// handlers + + MACRO + M_MCRECONBLOCK $predictType + M_MCRECONBLOCK_$predictType 0, 0 + M_MCRECONBLOCK_$predictType 0, 1 + M_MCRECONBLOCK_$predictType 0, 2 + M_MCRECONBLOCK_$predictType 0, 3 + IF "$predictType" /= "IntegerPixel" ;// If not IntegerPixel then rounding makes a difference + M_MCRECONBLOCK_$predictType 1, 0 + M_MCRECONBLOCK_$predictType 1, 1 + M_MCRECONBLOCK_$predictType 1, 2 + M_MCRECONBLOCK_$predictType 1, 3 + ENDIF + MEND +;// *************************************************************************** +;// Input/Output Registers +pSrc RN 0 +srcStep RN 1 +arg_pSrcResidue RN 2 +pSrcResidue RN 12 +pDst RN 3 +dstStep RN 2 +predictType RN 10 +rndVal RN 11 +mask RN 11 + +;// Local Scratch Registers +zero RN 12 +y RN 14 + +tmp1 RN 4 +tmp2 RN 5 +tmp3 RN 6 +tmp4 RN 7 +tmp5 RN 8 +tmp6 RN 9 +tmp7 RN 10 +tmp8 RN 11 +tmp9 RN 12 + +t00 RN 4 +t01 RN 5 +t10 RN 6 +t11 RN 7 +t20 RN 8 +t21 RN 9 +t30 RN 10 +t31 RN 11 +tmp RN 12 + +yMask RN 14 + +dst RN 1 +return RN 0 + + ;// Allocate memory on stack + M_ALLOC4 Stk_pDst, 4 + M_ALLOC4 Stk_pSrcResidue, 4 + ;// Function header + M_START omxVCM4P2_MCReconBlock, r11 + ;// Define stack arguments + M_ARG Arg_dstStep, 4 + M_ARG Arg_predictType, 4 + M_ARG Arg_rndVal, 4 + ;// Save on stack + M_STR pDst, Stk_pDst + M_STR arg_pSrcResidue, Stk_pSrcResidue + ;// Load argument from the stack + M_LDR dstStep, Arg_dstStep + M_LDR predictType, Arg_predictType + M_LDR rndVal, Arg_rndVal + + MOV y, #8 + + AND tmp1, pSrc, #3 + ORR predictType, tmp1, predictType, LSL #3 + ORR predictType, predictType, rndVal, LSL #2 + ;// Truncating source pointer to align to 4 byte location + BIC pSrc, pSrc, #3 + + ;// Implementation takes care of all combinations of different + ;// predictTypes, rounding cases and source pointer offsets to alignment + ;// of 4 bytes in different code bases unless one of these parameter wasn't + ;// making any difference to the implementation. Below M_CASE_MCRECONBLOCK + ;// macros branch into 8 M_CASE macros for all combinations of the 2 + ;// rounding cases and 4 offsets of the pSrc pointer to the 4 byte + ;// alignment. + M_SWITCH predictType + M_CASE_MCRECONBLOCK IntegerPixel + M_CASE_MCRECONBLOCK HalfPixelX + M_CASE_MCRECONBLOCK HalfPixelY + M_CASE_MCRECONBLOCK HalfPixelXY + M_ENDSWITCH + + ;// The M_MCRECONBLOCK macros populate the code bases by calling all 8 + ;// particular macros (4 in case of IntegerPixel as rounding makes no + ;// difference there) to generate the code for all cases of rounding and + ;// offsets. LTORG is used to segment the code as code size bloated beyond + ;// 4KB. + M_MCRECONBLOCK IntegerPixel + M_MCRECONBLOCK HalfPixelX + LTORG + M_MCRECONBLOCK HalfPixelY + M_MCRECONBLOCK HalfPixelXY +SwitchPredictTypeEnd + + ;// Residue Addition + ;// This is done in 2 lane SIMD though loads are further optimized and + ;// 4 bytes are loaded in case of destination buffer. Algorithmic + ;// details are in inlined comments + M_LDR pSrcResidue, Stk_pSrcResidue + CMP pSrcResidue, #0 + BEQ pSrcResidueConditionEnd +pSrcResidueNotNull + M_LDR pDst, Stk_pDst + MOV y, #8 + SUB dstStep, dstStep, #4 +Yloop_pSrcResidueNotNull + SUBS y, y, #1 + LDR dst, [pDst] ;// dst = [dcba] + LDMIA pSrcResidue!, {tmp1, tmp2} ;// tmp1=[DC] tmp2=[BA] + PKHBT tmp3, tmp1, tmp2, LSL #16 ;// Deltaval1 = [C A] + PKHTB tmp4, tmp2, tmp1, ASR #16 ;// DeltaVal2 = [D B] + UXTB16 tmp1, dst ;// tmp1 = [0c0a] + UXTB16 tmp2, dst, ROR #8 ;// tmp2 = [0d0b] + QADD16 tmp1, tmp1, tmp3 ;// Add and saturate to 16 bits + QADD16 tmp2, tmp2, tmp4 + USAT16 tmp1, #8, tmp1 + USAT16 tmp2, #8, tmp2 ;// armClip(0, 255, tmp2) + ORR tmp1, tmp1, tmp2, LSL #8 ;// tmp1 = [dcba] + STR tmp1, [pDst], #4 + + LDR dst, [pDst] + LDMIA pSrcResidue!, {tmp1, tmp2} + PKHBT tmp3, tmp1, tmp2, LSL #16 + PKHTB tmp4, tmp2, tmp1, ASR #16 + UXTB16 tmp1, dst + UXTB16 tmp2, dst, ROR #8 + QADD16 tmp1, tmp1, tmp3 + QADD16 tmp2, tmp2, tmp4 + USAT16 tmp1, #8, tmp1 + USAT16 tmp2, #8, tmp2 + ORR tmp1, tmp1, tmp2, LSL #8 + STR tmp1, [pDst], dstStep + + BGT Yloop_pSrcResidueNotNull +pSrcResidueConditionEnd + + MOV return, #OMX_Sts_NoErr + + M_END + ENDIF ;// ARM1136JS + +;// *************************************************************************** +;// CortexA8 implementation +;// *************************************************************************** + END +;// *************************************************************************** +;// omxVCM4P2_MCReconBlock ends +;// *************************************************************************** diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s new file mode 100644 index 0000000000000000000000000000000000000000..213444a3fb34fcf8d381a2e2a265d3c8f9b242b5 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s @@ -0,0 +1,283 @@ +; ********** +; * +; * File Name: omxVCM4P2_PredictReconCoefIntra_s.s +; * OpenMAX DL: v1.0.2 +; * Revision: 9641 +; * Date: Thursday, February 7, 2008 +; * +; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +; * +; * +; * +; * Description: +; * Contains module for DC/AC coefficient prediction +; * +; * +; * Function: omxVCM4P2_PredictReconCoefIntra +; * +; * Description: +; * Performs adaptive DC/AC coefficient prediction for an intra block. Prior +; * to the function call, prediction direction (predDir) should be selected +; * as specified in subclause 7.4.3.1 of ISO/IEC 14496-2. +; * +; * Remarks: +; * +; * Parameters: +; * [in] pSrcDst pointer to the coefficient buffer which contains the +; * quantized coefficient residuals (PQF) of the current +; * block; must be aligned on a 4-byte boundary. The +; * output coefficients are saturated to the range +; * [-2048, 2047]. +; * [in] pPredBufRow pointer to the coefficient row buffer; must be aligned +; * on a 4-byte boundary. +; * [in] pPredBufCol pointer to the coefficient column buffer; must be +; * aligned on a 4-byte boundary. +; * [in] curQP quantization parameter of the current block. curQP may +; * equal to predQP especially when the current block and +; * the predictor block are in the same macroblock. +; * [in] predQP quantization parameter of the predictor block +; * [in] predDir indicates the prediction direction which takes one +; * of the following values: +; * OMX_VIDEO_HORIZONTAL predict horizontally +; * OMX_VIDEO_VERTICAL predict vertically +; * [in] ACPredFlag a flag indicating if AC prediction should be +; * performed. It is equal to ac_pred_flag in the bit +; * stream syntax of MPEG-4 +; * [in] videoComp video component type (luminance, chrominance or +; * alpha) of the current block +; * [out] pSrcDst pointer to the coefficient buffer which contains +; * the quantized coefficients (QF) of the current +; * block +; * [out] pPredBufRow pointer to the updated coefficient row buffer +; * [out] pPredBufCol pointer to the updated coefficient column buffer +; * Return Value: +; * OMX_Sts_NoErr - no error +; * OMX_Sts_BadArgErr - Bad arguments +; * - At least one of the pointers is NULL: pSrcDst, pPredBufRow, or pPredBufCol. +; * - At least one the following cases: curQP <= 0, predQP <= 0, curQP >31, +; * predQP > 31, preDir exceeds [1,2]. +; * - At least one of the pointers pSrcDst, pPredBufRow, or pPredBufCol is not +; * 4-byte aligned. +; * +; ********* + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS ARM1136JS + + + + IMPORT armVCM4P2_Reciprocal_QP_S32 + IMPORT armVCM4P2_Reciprocal_QP_S16 + IMPORT armVCM4P2_DCScaler + + + + IF ARM1136JS + + +;// Input Arguments + +pSrcDst RN 0 +pPredBufRow RN 1 +pPredBufCol RN 2 +curQP RN 3 +QP RN 3 +predQP RN 4 +predDir RN 5 +ACPredFlag RN 6 +videoComp RN 7 + +;// Local Variables + +temp2 RN 5 +negCurQP RN 7 +negdcScaler RN 7 +tempPred RN 8 + +dcScaler RN 4 +CoeffTable RN 9 +absCoeffDC RN 9 +temp3 RN 6 +absCoeffAC RN 6 + +shortVideoHeader RN 9 +predCoeffTable RN 10 +Count RN 10 +temp1 RN 12 +index RN 12 +Rem RN 14 +temp RN 11 +Return RN 0 + + + + M_START omxVCM4P2_PredictReconCoefIntra,r12 + + ;// Assigning pointers to Input arguments on Stack + + M_ARG predQPonStack,4 + M_ARG predDironStack,4 + M_ARG ACPredFlagonStack,4 + M_ARG videoComponStack,4 + + ;// DC Prediction + + M_LDR videoComp,videoComponStack ;// Load videoComp From Stack + + M_LDR predDir,predDironStack ;// Load Prediction direction + + ;// dcScaler Calculation + + LDR index, =armVCM4P2_DCScaler + ADD index,index,videoComp,LSL #5 + LDRB dcScaler,[index,QP] + + +calDCVal + + + LDR predCoeffTable, =armVCM4P2_Reciprocal_QP_S16 ;// Loading the table with entries 32767/(1 to 63) + + CMP predDir,#2 ;// Check if the Prediction direction is vertical + + ;// Caulucate temp pred by performing Division + + LDREQSH absCoeffDC,[pPredBufRow] ;// If vetical load the coeff from Row Prediction Buffer + LDRNESH absCoeffDC,[pPredBufCol] ;// If horizontal load the coeff from column Prediction Buffer + + RSB negdcScaler,dcScaler,#0 ;// negdcScaler=-dcScaler + + MOV temp1,absCoeffDC ;// temp1=prediction coeff + CMP temp1,#0 + RSBLT absCoeffDC,temp1,#0 ;//absCoeffDC=abs(temp1) + + ADD temp,dcScaler,dcScaler + LDRH temp,[predCoeffTable,temp] ;// Load value from coeff table for performing division using multiplication + + SMULBB tempPred,temp,absCoeffDC ;// tempPred=pPredBufRow(Col)[0]*32767/dcScaler + ADD temp3,dcScaler,#1 + LSR tempPred,tempPred,#15 ;// tempPred=pPredBufRow(Col)[0]/dcScaler + LSR temp3,temp3,#1 ;// temp3=round(dcScaler/2) + + MLA Rem,negdcScaler,tempPred,absCoeffDC ;// Rem = pPredBufRow(Col)[0]-tempPred*dcScaler + + + LDRH temp,[pPredBufCol] + CMP Rem,temp3 + ADDGE tempPred,#1 ;// If Rem>=round(dcScaler/2);tempPred=tempPred+1 + CMP temp1,#0 + RSBLT tempPred,tempPred,#0 ;/ if pPredBufRow(Col)[0]<0; tempPred=-tempPred + + + STRH temp,[pPredBufRow,#-16] + + LDRH temp,[pSrcDst] ;// temp=pSrcDst[0] + M_LDR ACPredFlag,ACPredFlagonStack + ADD temp,temp,tempPred ;// temp=pSrcDst[0]+tempPred + SSAT16 temp,#12,temp ;// clip temp to [-2048,2047] + + SMULBB temp1,temp,dcScaler ;// temp1=clipped(pSrcDst[0])*dcScaler + M_LDR predQP,predQPonStack + STRH temp,[pSrcDst] + CMP ACPredFlag,#1 ;// Check if the AC prediction flag is set or not + STRH temp1,[pPredBufCol] ;// store temp1 to pPredBufCol + + ;// AC Prediction + + + BNE Exit ;// If not set Exit + + LDR predCoeffTable, =armVCM4P2_Reciprocal_QP_S32 ;// Loading the table with entries 0x1ffff/(1 to 63) + MOV temp1,#4 + MUL temp1,curQP,temp1 + CMP predDir,#2 ;// Check the Prediction direction + RSB negCurQP,curQP,#0 + LDR CoeffTable,[predCoeffTable,temp1] ;// CoeffTable=0x1ffff/curQP + ADD curQP,curQP,#1 ;// curQP=curQP+1 + LSR curQP,curQP,#1 ;// curQP=round(curQP/2) + MOV Count,#2 ;// Initializing the Loop Count + BNE Horizontal ;// If the Prediction direction is horizontal branch to Horizontal + + + +loop1 + ;// Calculate tempPred + + LDRSH absCoeffAC,[pPredBufRow,Count] ;// absCoeffAC=pPredBufRow[i], 1==round(curQP/2); tempPred=tempPred+1 + CMP temp1,#0 + RSBLT tempPred,tempPred,#0 ;// if pPredBufRow[i]<0 ; tempPred=-tempPred + + ;// Update source and Row Prediction buffers + + ADD temp,temp,tempPred ;// temp=tempPred+pSrcDst[i] + SSAT16 temp,#12,temp ;// Clip temp to [-2048,2047] + STRH temp,[pSrcDst,Count] + STRH temp,[pPredBufRow,Count] ;// pPredBufRow[i]=temp + ADD Count,Count,#2 ;// i=i+1 + CMP Count,#16 ;// compare if i=8 + BLT loop1 + B Exit ;// Branch to exit + +Horizontal + + MOV Count,#16 ;// Initializing i=8 + +loop2 + + LSR temp2,Count,#3 ;// temp2=i>>3 + + ;// Calculate tempPred + + LDRH absCoeffAC,[pPredBufCol,temp2] ;// absCoefAC=pPredBufCol[i>>3] + MOV temp1,absCoeffAC + CMP temp1,#0 ;// compare pPredBufRow[i] with zero, 1=>3]) + + SMULBB absCoeffAC,absCoeffAC,predQP ;// temp1=pPredBufCol[i>>3]*predQP + MUL tempPred,absCoeffAC,CoeffTable ;// tempPred=pPredBufCol[i>>3]*predQP*0x1ffff/curQP + LSR tempPred,tempPred,#17 ;// tempPred=pPredBufCol[i>>3]*predQP/curQP + + MLA Rem,negCurQP,tempPred,absCoeffAC + LDRH temp,[pSrcDst,Count] ;// temp=pSrcDst[i] + + CMP Rem,curQP ;// Compare Rem with round(curQP/2) + ADDGE tempPred,#1 ;// tempPred=tempPred+1 if Rem>=round(curQP/2) + CMP temp1,#0 + RSBLT tempPred,tempPred,#0 ;// if pPredBufCol[i>>3 <0 tempPred=-tempPred + + ;// Update source and Row Prediction buffers + + ADD temp,temp,tempPred ;// temp=pSrcDst[i]+tempPred + SSAT16 temp,#12,temp ;// Clip temp to [-2048,2047] + STRH temp,[pSrcDst,Count] ;// pSrcDst[0]= clipped value + STRH temp,[pPredBufCol,temp2] ;// pPredBufCol[i>>3]=temp + ADD Count,Count,#16 ;// i=i+8 + CMP Count,#128 ;// compare i with 64 + BLT loop2 + + +Exit + + MOV Return,#OMX_Sts_NoErr + + M_END + ENDIF + END + + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_QuantInvInter_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_QuantInvInter_I_s.s new file mode 100644 index 0000000000000000000000000000000000000000..c9591cb19fcbf95721f4be0e8f6d9b020c0745b1 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_QuantInvInter_I_s.s @@ -0,0 +1,141 @@ +;/** +; * +; * File Name: omxVCM4P2_QuantInvInter_I_s.s +; * OpenMAX DL: v1.0.2 +; * Revision: 9641 +; * Date: Thursday, February 7, 2008 +; * +; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +; * +; * +; * +; * Description: +; * Contains modules for inter reconstruction +; * +; * +; * +; * +; * +; * Function: omxVCM4P2_QuantInvInter_I +; * +; * Description: +; * Performs inverse quantization on intra/inter coded block. +; * This function supports bits_per_pixel = 8. Mismatch control +; * is performed for the first MPEG-4 mode inverse quantization method. +; * The output coefficients are clipped to the range: [-2048, 2047]. +; * Mismatch control is performed for the first inverse quantization method. +; * +; * Remarks: +; * +; * Parameters: +; * [in] pSrcDst pointer to the input (quantized) intra/inter block. Must be 16-byte aligned. +; * [in] QP quantization parameter (quantiser_scale) +; * [in] videoComp (Intra version only.) Video component type of the +; * current block. Takes one of the following flags: +; * OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE, +; * OMX_VC_ALPHA. +; * [in] shortVideoHeader a flag indicating presence of short_video_header; +; * shortVideoHeader==1 selects linear intra DC mode, +; * and shortVideoHeader==0 selects nonlinear intra DC mode. +; * [out] pSrcDst pointer to the output (dequantized) intra/inter block. Must be 16-byte aligned. +; * +; * Return Value: +; * OMX_Sts_NoErr - no error +; * OMX_Sts_BadArgErr - bad arguments +; * - If pSrcDst is NULL or is not 16-byte aligned. +; * or +; * - If QP <= 0. +; * or +; * - videoComp is none of OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE and OMX_VC_ALPHA. +; * +; */ + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS ARM1136JS + + + + IF ARM1136JS + +;//Input Arguments +pSrcDst RN 0 +QP RN 1 + +;//Local Variables +Return RN 0 +Count RN 4 +tempVal21 RN 2 +tempVal43 RN 3 +QP1 RN 5 +X2 RN 6 +X3 RN 14 +Result1 RN 8 +Result2 RN 9 +two RN 7 + + M_START omxVCM4P2_QuantInvInter_I,r9 + + MOV Count,#64 + TST QP,#1 + LDRD tempVal21,[pSrcDst] ;// Loads first two values of pSrcDst to tempVal21, + ;// next two values to tempVal43 + SUBEQ QP1,QP,#1 ;// QP1=QP if QP is odd , QP1=QP-1 if QP is even + MOVNE QP1,QP + MOV two,#2 + + + +Loop + + + SMULBB X2,tempVal21,two ;// X2= first val(lower 16 bits of tampVal21)*2 + CMP X2,#0 + + RSBLT X2,X2,#0 ;// X2=absoluteval(first val) + SMLABBNE X2,QP,X2,QP1 ;// X2=2*absval(first val)*QP+QP if QP is odd + ;// X2=2*absval(first val)*QP+QP-1 if QP is even + SMULTB X3,tempVal21,two ;// X3= second val(top 16 bits of tampVal21)*2 + RSBLT X2,X2,#0 + + CMP X3,#0 + + RSBLT X3,X3,#0 + SMLABBNE X3,QP,X3,QP1 + + RSBLT X3,X3,#0 + PKHBT Result1,X2,X3,LSL #16 ;// Result1[0-15]=X2[0-15],Result1[16-31]=X3[16-31] + SMULBB X2,tempVal43,two ;// X2= first val(lower 16 bits of tampVal43)*2 + SSAT16 Result1,#12,Result1 ;// clip to range [-2048,2047] + CMP X2,#0 + + + + RSBLE X2,X2,#0 + SMLABBNE X2,QP,X2,QP1 + SMULTB X3,tempVal43,two ;// X2= first val(top 16 bits of tampVal21)*2 + RSBLT X2,X2,#0 + CMP X3,#0 + + LDRD tempVal21,[pSrcDst,#8] ;// Load next four Values to tempVal21,tempVal43 + + RSBLT X3,X3,#0 + SMLABBNE X3,QP,X3,QP1 + RSBLT X3,X3,#0 + PKHBT Result2,X2,X3,LSL #16 ;// Result2[0-15]=X2[0-15],Result2[16-31]=X3[0-15] + SSAT16 Result2,#12,Result2 ;// clip to range [-2048,2047] + + SUBS Count,Count,#4 ;// Decrement Count by 4 and continue if it has not reached 0 + STRD Result1,[pSrcDst],#8 ;// Store Double words and increment the pointer to point the next store address + + + + BGT Loop + + MOV Return,#OMX_Sts_NoErr + + M_END + ENDIF + END + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I_s.s new file mode 100644 index 0000000000000000000000000000000000000000..6328e01463666ec718c45aa4db0cdcb004503d32 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I_s.s @@ -0,0 +1,188 @@ +;/** +; * +; * File Name: omxVCM4P2_QuantInvIntra_I_s.s +; * OpenMAX DL: v1.0.2 +; * Revision: 9641 +; * Date: Thursday, February 7, 2008 +; * +; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +; * +; * +; * +; * Description: +; * Contains modules for inter reconstruction +; * +; * +; * +; * +; * +; * +; * Function: omxVCM4P2_QuantInvIntra_I +; * +; * Description: +; * Performs inverse quantization on intra/inter coded block. +; * This function supports bits_per_pixel = 8. Mismatch control +; * is performed for the first MPEG-4 mode inverse quantization method. +; * The output coefficients are clipped to the range: [-2048, 2047]. +; * Mismatch control is performed for the first inverse quantization method. +; * +; * Remarks: +; * +; * Parameters: +; * [in] pSrcDst pointer to the input (quantized) intra/inter block. Must be 16-byte aligned. +; * [in] QP quantization parameter (quantiser_scale) +; * [in] videoComp (Intra version only.) Video component type of the +; * current block. Takes one of the following flags: +; * OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE, +; * OMX_VC_ALPHA. +; * [in] shortVideoHeader a flag indicating presence of short_video_header; +; * shortVideoHeader==1 selects linear intra DC mode, +; * and shortVideoHeader==0 selects nonlinear intra DC mode. +; * [out] pSrcDst pointer to the output (dequantized) intra/inter block. Must be 16-byte aligned. +; * +; * Return Value: +; * OMX_Sts_NoErr - no error +; * OMX_Sts_BadArgErr - bad arguments +; * - If pSrcDst is NULL or is not 16-byte aligned. +; * or +; * - If QP <= 0. +; * or +; * - videoComp is none of OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE and OMX_VC_ALPHA. +; * + + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS ARM1136JS + + + IMPORT armVCM4P2_DCScaler + + + + IF ARM1136JS + +;//Input Arguments +pSrcDst RN 0 +QP RN 1 +videoComp RN 2 +shortVideoHeader RN 3 + +;//Local Variables +Return RN 0 +dcScaler RN 4 +temp RN 12 +index RN 6 + +tempVal21 RN 4 +tempVal43 RN 5 +QP1 RN 6 +X2 RN 7 +X3 RN 14 +Result1 RN 8 +Result2 RN 9 +two RN 10 +Count RN 11 + + + + + M_START omxVCM4P2_QuantInvIntra_I,r11 + + + + ;// Perform Inverse Quantization for DC coefficient + + TEQ shortVideoHeader,#0 ;// Test if short Video Header flag =0 + MOVNE dcScaler,#8 ;// if shortVideoHeader is non zero dcScaler=8 + BNE calDCVal + LDR index, =armVCM4P2_DCScaler + ADD index,index,videoComp,LSL #5 + LDRB dcScaler,[index,QP] + + + ;//M_CalDCScalar shortVideoHeader,videoComp, QP + +calDCVal + + LDRH temp,[pSrcDst] + SMULBB temp,temp,dcScaler ;// dcCoeff = dcScaler * Quantized DC coefficient(from memory) + SSAT temp,#12,temp ;// Saturating to 12 bits + + + MOV Count,#64 + TST QP,#1 + LDRD tempVal21,[pSrcDst] ;// Loads first two values of pSrcDst to tempVal21, + ;// next two values to tempVal43 + SUBEQ QP1,QP,#1 ;// QP1=QP if QP is odd , QP1=QP-1 if QP is even + MOVNE QP1,QP + MOV two,#2 + + + + + +Loop + + + SMULBB X2,tempVal21,two ;// X2= first val(lower 16 bits of tampVal21)*2 + CMP X2,#0 + + RSBLT X2,X2,#0 ;// X2=absoluteval(first val) + SMLABBNE X2,QP,X2,QP1 ;// X2=2*absval(first val)*QP+QP if QP is odd + ;// X2=2*absval(first val)*QP+QP-1 if QP is even + SMULTB X3,tempVal21,two ;// X3= second val(top 16 bits of tampVal21)*2 + RSBLT X2,X2,#0 + + CMP X3,#0 + + RSBLT X3,X3,#0 + SMLABBNE X3,QP,X3,QP1 + + RSBLT X3,X3,#0 + PKHBT Result1,X2,X3,LSL #16 ;// Result1[0-15]=X2[0-15],Result1[16-31]=X3[16-31] + SMULBB X2,tempVal43,two ;// X2= first val(lower 16 bits of tampVal43)*2 + SSAT16 Result1,#12,Result1 ;// clip to range [-2048,2047] + CMP X2,#0 + + + + RSBLE X2,X2,#0 + SMLABBNE X2,QP,X2,QP1 + SMULTB X3,tempVal43,two ;// X2= first val(top 16 bits of tampVal21)*2 + RSBLT X2,X2,#0 + CMP X3,#0 + + LDRD tempVal21,[pSrcDst,#8] ;// Load next four Values to tempVal21,tempVal43 + + RSBLT X3,X3,#0 + SMLABBNE X3,QP,X3,QP1 + RSBLT X3,X3,#0 + PKHBT Result2,X2,X3,LSL #16 ;// Result2[0-15]=X2[0-15],Result2[16-31]=X3[16-31] + SSAT16 Result2,#12,Result2 ;// clip to range [-2048,2047] + + SUBS Count,Count,#4 ;// Decrement Count by 4 and continue if it has not reached 0 + STRD Result1,[pSrcDst],#8 ;// Store Double words and increment the pointer to point the next store address + + + + BGT Loop + + SUB pSrcDst,pSrcDst,#128 + + ;// Storing the Inverse Quantized DC coefficient + + STRH temp,[pSrcDst],#2 + + + + MOV Return,#OMX_Sts_NoErr + + + + + M_END + ENDIF + END + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/src/armVC_Version.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/src/armVC_Version.c new file mode 100644 index 0000000000000000000000000000000000000000..5d9368140505b5ca7168766b15bdd1b7f518b722 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/src/armVC_Version.c @@ -0,0 +1,6 @@ +#include "omxtypes.h" +#include "armCOMM_Version.h" + +#ifdef ARM_INCLUDE_VERSION_DESCRIPTIONS +const char * const omxVC_VersionDescription = "ARM OpenMAX DL v" ARM_VERSION_STRING " Rel=" OMX_ARM_RELEASE_TAG " Arch=" OMX_ARM_BUILD_ARCHITECTURE " Tools=" OMX_ARM_BUILD_TOOLCHAIN ; +#endif /* ARM_INCLUDE_VERSION_DESCRIPTIONS */ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/ARM_DELIVERY.TXT b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/ARM_DELIVERY.TXT new file mode 100755 index 0000000000000000000000000000000000000000..cc2d70a16cc3222079e930ddaac4036f29c2d566 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/ARM_DELIVERY.TXT @@ -0,0 +1,63 @@ +The contents of this transaction was created by Hedley Francis +of ARM on 19-Feb-2008. + +It contains the ARM data versions listed below. + +This data, unless otherwise stated, is ARM Proprietary and access to it +is subject to the agreements indicated below. + +If you experience problems with this data, please contact ARM support +quoting transaction reference <97414>. + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +- OX002-SW-98010-r0p0-00bet1 + Video codecs - optimised code + V7 code release for Hantro (Ver 1.0.2) + internal access + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +This transaction contains deliverables which are designated as being of +beta release status (BET). + +Beta release status has a particular meaning to ARM of which the recipient +must be aware. Beta is a pre-release status indicating that the deliverable +so described is believed to robustly demonstrate specified behaviour, to be +consistent across its included aspects and be ready for general deployment. +But Beta also indicates that pre-release reliability trials are ongoing and +that it is possible residual defects or errors in operation, consistency +and documentation may still be encountered. The recipient should consider +this position when using this Beta material supplied. ARM will normally +attempt to provide fixes or a work-around for defects identified by the +recipient, but the provision or timeliness of this support cannot be +guaranteed. ARM shall not be responsible for direct or consequential +damages as a result of encountering one or more of these residual defects. +By accepting a Beta release, the recipient agrees to these constraints and +to providing reasonable information to ARM to enable the replication of the +defects identified by the recipient. The specific Beta version supplied +will not be supported after release of a later or higher status version. +It should be noted that Support for the Beta release of the deliverable +will only be provided by ARM to a recipient who has a current support and +maintenance contract for the deliverable. + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +In addition to the data versions listed above, this transaction contains +two additional files at the top level. + +The first is this file, ARM_DELIVERY_97414.TXT, which is the delivery +note. + +The second is ARM_MANIFEST_97414.TXT which contains a manifest of all the +files included in this transaction, together with their checksums. + +The checksums provided are calculated using the RSA Data Security, Inc. +MD5 Message-Digest Algorithm. + +The checksums can be used to verify the integrity of this data using the +"md5sum" tool (which is part of the GNU "textutils" package) by running: + + % md5sum --check ARM_MANIFEST_97414.TXT + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/ARM_MANIFEST.TXT b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/ARM_MANIFEST.TXT new file mode 100755 index 0000000000000000000000000000000000000000..8310f6794da8533be10f35ce1310cd8c1d34aa53 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/ARM_MANIFEST.TXT @@ -0,0 +1,91 @@ + OX002-SW-98010-r0p0-00bet1/ + OX002-SW-98010-r0p0-00bet1/api/ +e049791cfab6060a08cbac7b3ad767d6 OX002-SW-98010-r0p0-00bet1/api/armCOMM_s.h +ed798face25497b2703ede736d6d52b6 OX002-SW-98010-r0p0-00bet1/api/omxtypes_s.h +4eebd63af087376811d6749f0646b864 OX002-SW-98010-r0p0-00bet1/api/armCOMM_BitDec_s.h +43cf46c2cf2fe1f93c615b57bcbe4809 OX002-SW-98010-r0p0-00bet1/api/armCOMM.h +8f248ceaac8f602e277a521b679dcbbe OX002-SW-98010-r0p0-00bet1/api/armCOMM_IDCTTable.h +8ac5fa80ea98e391f5730a375280b5bd OX002-SW-98010-r0p0-00bet1/api/armCOMM_Version.h +3a2f420ddf6a1b950470bd0f5ebd5c62 OX002-SW-98010-r0p0-00bet1/api/armCOMM_IDCT_s.h +511c0bb534fe223599e2c84eff24c9ed OX002-SW-98010-r0p0-00bet1/api/armCOMM_MaskTable.h +8971932d56eed6b1ad1ba507f0bff5f0 OX002-SW-98010-r0p0-00bet1/api/armCOMM_Bitstream.h +f87fedd9ca432fefa757008176864ef8 OX002-SW-98010-r0p0-00bet1/api/armOMX.h +8e49899a428822c36ef9dd94e0e05f18 OX002-SW-98010-r0p0-00bet1/api/omxtypes.h +323008b72e9f04099a8cb42e99a1face OX002-SW-98010-r0p0-00bet1/build_vc.pl +e72d96c0a415459748df9807f3dae72f OX002-SW-98010-r0p0-00bet1/filelist_vc.txt + OX002-SW-98010-r0p0-00bet1/src/ +5eeae659a29477f5c52296d24afffd3c OX002-SW-98010-r0p0-00bet1/src/armCOMM_IDCTTable.c +d64cdcf38f7749dc7f77465e5b7d356d OX002-SW-98010-r0p0-00bet1/src/armCOMM_MaskTable.c + OX002-SW-98010-r0p0-00bet1/vc/ + OX002-SW-98010-r0p0-00bet1/vc/m4p10/ + OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/ +e7e0c320978564a7c9b2c723749a98d6 OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_CAVLCTables.c +4adcd0df081990bdfc4729041a2a9152 OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_InterpolateChroma.c +852e0404142965dc1f3aa7f00ee5127b OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_UnpackBlock4x4_s.s +7054151c5bfea6b5e74feee86b2d7b01 OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c +5f7213a4f37627b3c58f6294ba477e30 OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_DequantTables_s.s +32ff4b8be62e2f0f3e764b83c1e5e2fd OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c +d066e3c81d82616f37ec1810ea49e7b7 OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_DeblockingLuma_unsafe_s.s +fe629a3e9d55395a6098bdf2431b5f02 OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_DeblockingChroma_unsafe_s.s +5b13fb954b7679de20076bb6a7f4ee1d OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_InterpolateLuma_s.s +01ba60eff66ea49a4f833ce6279f8e2f OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c +fa1072cf1d17e9666c9f1e215fa302b1 OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s +db387b9e66d32787f47ef9cf0347da2a OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s +ea537e4e2ad03a1940981055fa3ace01 OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s +29a4283885b9473a3550a81eff2559d2 OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair_s.s +2ddcaf60a8ea1e6e6b77737f768bfb9d OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_QuantTables_s.s +c3002aad5600f872b70a5d7fe3915846 OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_Align_unsafe_s.s +a2900f2c47f1c61d20bd6c1eda33d6d4 OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_Copy_unsafe_s.s +c921df73397a32c947dc996ba6858553 OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_PredictIntra_4x4_s.s +3769e14f2fc3f514d025fe6ab73ff67a OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair_s.s +c029d1cebea0a09e1d235a37e2155002 OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s +076a033f8161750a685756f9f51f04c9 OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s +c5b5d22842822e6e5e31094882cbeb46 OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_PredictIntra_16x16_s.s +f6bdf6d914a4a1479f524951a3409846 OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s +ebeb0713a9b2ea25986360ef262138c4 OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s +78ed9ea200faa7be665445a713859af1 OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.s +c2d995f787b6f44ef10c751c12d1935f OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s +40bed679a9f6e0d3efe216b7d4a9cf45 OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8_s.s +4a52b3e9e268b8a8f07829bf500d03af OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair_s.s +11249f8a98c5d4b84cb5575b0e37ca9c OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_Average_4x_Align_unsafe_s.s +2513b60559ba71ae495c6053fb779fa9 OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_Interpolate_Chroma_s.s +2fb1ee17c36e3c1469c170f6dac11bf1 OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_TransformResidual4x4_s.s +cc4a6f32db0b72a91d3f278f6855df69 OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c + OX002-SW-98010-r0p0-00bet1/vc/m4p10/api/ +6e530ddaa7c2b57ffe88162c020cb662 OX002-SW-98010-r0p0-00bet1/vc/m4p10/api/armVCM4P10_CAVLCTables.h + OX002-SW-98010-r0p0-00bet1/vc/m4p2/ + OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/ +bec6de348b113438498867b869001622 OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_Clip8_s.s +dba9824e959b21d401cac925e68a11a6 OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter_s.s +dfa7e5b58027be3542dda0593b77b2d3 OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I_s.s +4fba4c431a783a78a2eb6497a94ac967 OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c +39991961179ca03b6381b6e653b1f14b OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_MCReconBlock_s.s +1b0b2990c2669dfb87cf6b810611c01b OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c +1c9b87abf3283e957816b3937c680701 OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP_s.s +4fe1afca659a9055fc1172e58f78a506 OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c +2ea067f0436f91ba1351edaf411cb4ea OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_Lookup_Tables.c +6ce363aadc9d65c308b40cca8902e4f6 OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_IDCT8x8blk_s.s +bf212f786772aed2bc705d22ff4e74f5 OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_FindMVpred_s.s +293a48a648a3085456e6665bb7366fad OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_SetPredDir_s.s +2bb47ed9c9e25c5709c6d9b4ad39a38a OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_QuantInvInter_I_s.s +437dfa204508850d61d4b87091446e9f OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s +bc9778898dd41101dc0fb0139eaf83cc OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s +fc191eeae43f8ce735dbd311cc7bcb8d OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s +a0d85f4f517c945a4c9317ac021f2d08 OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s +386020dee8b725c7fe2526f1fc211d7d OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c + OX002-SW-98010-r0p0-00bet1/vc/m4p2/api/ +4624e7c838e10a249abcc3d3f4f40748 OX002-SW-98010-r0p0-00bet1/vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h +65e1057d04e2cb844559dc9f6e09795a OX002-SW-98010-r0p0-00bet1/vc/m4p2/api/armVCM4P2_ZigZag_Tables.h + OX002-SW-98010-r0p0-00bet1/vc/src/ +e627b3346b0dc9aff14446005ce0fa43 OX002-SW-98010-r0p0-00bet1/vc/src/armVC_Version.c + OX002-SW-98010-r0p0-00bet1/vc/api/ +7ca94b1c33ac0211e17d38baadd7d1dd OX002-SW-98010-r0p0-00bet1/vc/api/armVC.h +12cf7596edbbf6048b626d15e8d0ed48 OX002-SW-98010-r0p0-00bet1/vc/api/omxVC.h +11726e286a81257cb45f5547fb4d374c OX002-SW-98010-r0p0-00bet1/vc/api/omxVC_s.h +a5b2af605c319cd2491319e430741377 OX002-SW-98010-r0p0-00bet1/vc/api/armVCCOMM_s.h + OX002-SW-98010-r0p0-00bet1/vc/comm/ + OX002-SW-98010-r0p0-00bet1/vc/comm/src/ +1f81187b48487a8ea6dbc327648e3e4f OX002-SW-98010-r0p0-00bet1/vc/comm/src/omxVCCOMM_Copy16x16_s.s +936d3f2038a6f8613ec25e50cc601fe8 OX002-SW-98010-r0p0-00bet1/vc/comm/src/omxVCCOMM_Copy8x8_s.s +8f6708a249130962e0bc5c044ac6dd93 OX002-SW-98010-r0p0-00bet1/vc/comm/src/omxVCCOMM_ExpandFrame_I_s.s +aab7713414428e95de0ba799a2679b36 ARM_DELIVERY_97414.TXT diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM.h new file mode 100755 index 0000000000000000000000000000000000000000..64c19586c6f0942969b87fa9dc813ecbdf38fb40 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM.h @@ -0,0 +1,785 @@ +/** + * + * File Name: armCOMM.h + * OpenMAX DL: v1.0.2 + * Revision: 12290 + * Date: Wednesday, April 9, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * File: armCOMM.h + * Brief: Declares Common APIs/Data Types used across OpenMAX API's + * + */ + + +#ifndef _armCommon_H_ +#define _armCommon_H_ + +#include "omxtypes.h" + +typedef struct +{ + OMX_F32 Re; /** Real part */ + OMX_F32 Im; /** Imaginary part */ + +} OMX_FC32; /** single precision floating point complex number */ + +typedef struct +{ + OMX_F64 Re; /** Real part */ + OMX_F64 Im; /** Imaginary part */ + +} OMX_FC64; /** double precision floating point complex number */ + + +/* Used by both IP and IC domains for 8x8 JPEG blocks. */ +typedef OMX_S16 ARM_BLOCK8x8[64]; + + +#include "armOMX.h" + +#define armPI (OMX_F64)(3.1415926535897932384626433832795) + +/***********************************************************************/ + +/* Compiler extensions */ +#ifdef ARM_DEBUG +/* debug version */ +#include +#include +#include +#define armError(str) {printf((str)); printf("\n"); exit(-1);} +#define armWarn(str) {printf((str)); printf("\n");} +#define armIgnore(a) ((void)a) +#define armAssert(a) assert(a) +#else +/* release version */ +#define armError(str) ((void) (str)) +#define armWarn(str) ((void) (str)) +#define armIgnore(a) ((void) (a)) +#define armAssert(a) ((void) (a)) +#endif /* ARM_DEBUG */ + +/* Arithmetic operations */ + +#define armMin(a,b) ( (a) > (b) ? (b):(a) ) +#define armMax(a,b) ( (a) > (b) ? (a):(b) ) +#define armAbs(a) ( (a) < 0 ? -(a):(a) ) + +/* Alignment operation */ + +#define armAlignToBytes(Ptr,N) (Ptr + ( ((N-(int)Ptr)&(N-1)) / sizeof(*Ptr) )) +#define armAlignTo2Bytes(Ptr) armAlignToBytes(Ptr,2) +#define armAlignTo4Bytes(Ptr) armAlignToBytes(Ptr,4) +#define armAlignTo8Bytes(Ptr) armAlignToBytes(Ptr,8) +#define armAlignTo16Bytes(Ptr) armAlignToBytes(Ptr,16) + +/* Error and Alignment check */ + +#define armRetArgErrIf(condition, code) if(condition) { return (code); } +#define armRetDataErrIf(condition, code) if(condition) { return (code); } + +#ifndef ALIGNMENT_DOESNT_MATTER +#define armIsByteAligned(Ptr,N) ((((int)(Ptr)) % N)==0) +#define armNotByteAligned(Ptr,N) ((((int)(Ptr)) % N)!=0) +#else +#define armIsByteAligned(Ptr,N) (1) +#define armNotByteAligned(Ptr,N) (0) +#endif + +#define armIs2ByteAligned(Ptr) armIsByteAligned(Ptr,2) +#define armIs4ByteAligned(Ptr) armIsByteAligned(Ptr,4) +#define armIs8ByteAligned(Ptr) armIsByteAligned(Ptr,8) +#define armIs16ByteAligned(Ptr) armIsByteAligned(Ptr,16) + +#define armNot2ByteAligned(Ptr) armNotByteAligned(Ptr,2) +#define armNot4ByteAligned(Ptr) armNotByteAligned(Ptr,4) +#define armNot8ByteAligned(Ptr) armNotByteAligned(Ptr,8) +#define armNot16ByteAligned(Ptr) armNotByteAligned(Ptr,16) +#define armNot32ByteAligned(Ptr) armNotByteAligned(Ptr,32) + +/** + * Function: armRoundFloatToS16_ref/armRoundFloatToS32_ref/armRoundFloatToS64 + * + * Description: + * Converts a double precision value into a short int/int after rounding + * + * Parameters: + * [in] Value Float value to be converted + * + * Return Value: + * [out] converted value in OMX_S16/OMX_S32 format + * + */ + +OMX_S16 armRoundFloatToS16 (OMX_F64 Value); +OMX_S32 armRoundFloatToS32 (OMX_F64 Value); +OMX_S64 armRoundFloatToS64 (OMX_F64 Value); + +/** + * Function: armSatRoundFloatToS16_ref/armSatRoundFloatToS32 + * + * Description: + * Converts a double precision value into a short int/int after rounding and saturation + * + * Parameters: + * [in] Value Float value to be converted + * + * Return Value: + * [out] converted value in OMX_S16/OMX_S32 format + * + */ + +OMX_S16 armSatRoundFloatToS16 (OMX_F64 Value); +OMX_S32 armSatRoundFloatToS32 (OMX_F64 Value); + +/** + * Function: armSatRoundFloatToU16_ref/armSatRoundFloatToU32 + * + * Description: + * Converts a double precision value into a unsigned short int/int after rounding and saturation + * + * Parameters: + * [in] Value Float value to be converted + * + * Return Value: + * [out] converted value in OMX_U16/OMX_U32 format + * + */ + +OMX_U16 armSatRoundFloatToU16 (OMX_F64 Value); +OMX_U32 armSatRoundFloatToU32 (OMX_F64 Value); + +/** + * Function: armSignCheck + * + * Description: + * Checks the sign of a variable: + * returns 1 if it is Positive + * returns 0 if it is 0 + * returns -1 if it is Negative + * + * Remarks: + * + * Parameters: + * [in] var Variable to be checked + * + * Return Value: + * OMX_INT -- returns 1 if it is Positive + * returns 0 if it is 0 + * returns -1 if it is Negative + */ + +OMX_INT armSignCheck (OMX_S16 var); + +/** + * Function: armClip + * + * Description: Clips the input between MAX and MIN value + * + * + * Remarks: + * + * Parameters: + * [in] Min lower bound + * [in] Max upper bound + * [in] src variable to the clipped + * + * Return Value: + * OMX_S32 -- returns clipped value + */ + +OMX_S32 armClip ( + OMX_INT min, + OMX_INT max, + OMX_S32 src + ); + +/** + * Function: armClip_F32 + * + * Description: Clips the input between MAX and MIN value + * + * + * Remarks: + * + * Parameters: + * [in] Min lower bound + * [in] Max upper bound + * [in] src variable to the clipped + * + * Return Value: + * OMX_F32 -- returns clipped value + */ + +OMX_F32 armClip_F32 ( + OMX_F32 min, + OMX_F32 max, + OMX_F32 src + ); + +/** + * Function: armShiftSat_F32 + * + * Description: Divides a float value by 2^shift and + * saturates it for unsigned value range for satBits. + * Second parameter is like "shifting" the corresponding + * integer value. Takes care of rounding while clipping the final + * value. + * + * Parameters: + * [in] v Number to be operated upon + * [in] shift Divides the input "v" by "2^shift" + * [in] satBits Final range is [0, 2^satBits) + * + * Return Value: + * OMX_S32 -- returns "shifted" saturated value + */ + +OMX_U32 armShiftSat_F32( + OMX_F32 v, + OMX_INT shift, + OMX_INT satBits + ); + +/** + * Functions: armSwapElem + * + * Description: + * This function swaps two elements at the specified pointer locations. + * The size of each element could be anything as specified by + * + * Return Value: + * OMXResult -- Error status from the function + */ +OMXResult armSwapElem(OMX_U8 *pBuf1, OMX_U8 *pBuf2, OMX_INT elemSize); + + +/** + * Function: armMedianOf3 + * + * Description: Finds the median of three numbers + * + * Remarks: + * + * Parameters: + * [in] fEntry First entry + * [in] sEntry second entry + * [in] tEntry Third entry + * + * Return Value: + * OMX_S32 -- returns the median value + */ + +OMX_S32 armMedianOf3 ( + OMX_S32 fEntry, + OMX_S32 sEntry, + OMX_S32 tEntry + ); + +/** + * Function: armLogSize + * + * Description: Finds the size of a positive value and returns the same + * + * Remarks: + * + * Parameters: + * [in] value Positive value + * + * Return Value: + * OMX_U8 -- returns the size of the positive value + */ + +OMX_U8 armLogSize ( + OMX_U16 value + ); + +/***********************************************************************/ + /* Saturating Arithmetic operations */ + +/** + * Function :armSatAdd_S32() + * + * Description : + * Returns the result of saturated addition of the two inputs Value1, Value2 + * + * Parametrs: + * [in] Value1 First Operand + * [in] Value2 Second Operand + * + * Return: + * [out] Result of operation + * + * + **/ + +OMX_S32 armSatAdd_S32( + OMX_S32 Value1, + OMX_S32 Value2 + ); + +/** + * Function :armSatAdd_S64() + * + * Description : + * Returns the result of saturated addition of the two inputs Value1, Value2 + * + * Parametrs: + * [in] Value1 First Operand + * [in] Value2 Second Operand + * + * Return: + * [out] Result of operation + * + * + **/ + +OMX_S64 armSatAdd_S64( + OMX_S64 Value1, + OMX_S64 Value2 + ); + +/** Function :armSatSub_S32() + * + * Description : + * Returns the result of saturated substraction of the two inputs Value1, Value2 + * + * Parametrs: + * [in] Value1 First Operand + * [in] Value2 Second Operand + * + * Return: + * [out] Result of operation + * + **/ + +OMX_S32 armSatSub_S32( + OMX_S32 Value1, + OMX_S32 Value2 + ); + +/** + * Function :armSatMac_S32() + * + * Description : + * Returns the result of Multiplication of Value1 and Value2 and subesquent saturated + * accumulation with Mac + * + * Parametrs: + * [in] Value1 First Operand + * [in] Value2 Second Operand + * [in] Mac Accumulator + * + * Return: + * [out] Result of operation + **/ + +OMX_S32 armSatMac_S32( + OMX_S32 Mac, + OMX_S16 Value1, + OMX_S16 Value2 + ); + +/** + * Function :armSatMac_S16S32_S32 + * + * Description : + * Returns the result of saturated MAC operation of the three inputs delayElem, filTap , mac + * + * mac = mac + Saturate_in_32Bits(delayElem * filTap) + * + * Parametrs: + * [in] delayElem First 32 bit Operand + * [in] filTap Second 16 bit Operand + * [in] mac Result of MAC operation + * + * Return: + * [out] mac Result of operation + * + **/ + +OMX_S32 armSatMac_S16S32_S32( + OMX_S32 mac, + OMX_S32 delayElem, + OMX_S16 filTap ); + +/** + * Function :armSatRoundRightShift_S32_S16 + * + * Description : + * Returns the result of rounded right shift operation of input by the scalefactor + * + * output = Saturate_in_16Bits( ( RightShift( (Round(input) , scaleFactor ) ) + * + * Parametrs: + * [in] input The input to be operated on + * [in] scaleFactor The shift number + * + * Return: + * [out] Result of operation + * + **/ + + +OMX_S16 armSatRoundRightShift_S32_S16( + OMX_S32 input, + OMX_INT scaleFactor); + +/** + * Function :armSatRoundLeftShift_S32() + * + * Description : + * Returns the result of saturating left-shift operation on input + * Or rounded Right shift if the input Shift is negative. + * + * Parametrs: + * [in] Value Operand + * [in] shift Operand for shift operation + * + * Return: + * [out] Result of operation + * + **/ + +OMX_S32 armSatRoundLeftShift_S32( + OMX_S32 Value, + OMX_INT shift + ); + +/** + * Function :armSatRoundLeftShift_S64() + * + * Description : + * Returns the result of saturating left-shift operation on input + * Or rounded Right shift if the input Shift is negative. + * + * Parametrs: + * [in] Value Operand + * [in] shift Operand for shift operation + * + * Return: + * [out] Result of operation + * + **/ + +OMX_S64 armSatRoundLeftShift_S64( + OMX_S64 Value, + OMX_INT shift + ); + +/** + * Function :armSatMulS16S32_S32() + * + * Description : + * Returns the result of a S16 data type multiplied with an S32 data type + * in a S32 container + * + * Parametrs: + * [in] input1 Operand 1 + * [in] input2 Operand 2 + * + * Return: + * [out] Result of operation + * + **/ + + +OMX_S32 armSatMulS16S32_S32( + OMX_S16 input1, + OMX_S32 input2); + +/** + * Function :armSatMulS32S32_S32() + * + * Description : + * Returns the result of a S32 data type multiplied with an S32 data type + * in a S32 container + * + * Parametrs: + * [in] input1 Operand 1 + * [in] input2 Operand 2 + * + * Return: + * [out] Result of operation + * + **/ + +OMX_S32 armSatMulS32S32_S32( + OMX_S32 input1, + OMX_S32 input2); + + +/** + * Function :armIntDivAwayFromZero() + * + * Description : Integer division with rounding to the nearest integer. + * Half-integer values are rounded away from zero + * unless otherwise specified. For example 3//2 is rounded + * to 2, and -3//2 is rounded to -2. + * + * Parametrs: + * [in] Num Operand 1 + * [in] Deno Operand 2 + * + * Return: + * [out] Result of operation input1//input2 + * + **/ + +OMX_S32 armIntDivAwayFromZero (OMX_S32 Num, OMX_S32 Deno); + + +/***********************************************************************/ +/* + * Debugging macros + * + */ + + +/* + * Definition of output stream - change to stderr if necessary + */ +#define DEBUG_STREAM stdout + +/* + * Debug printf macros, one for each argument count. + * Add more if needed. + */ +#ifdef DEBUG_ON +#include + +#define DEBUG_PRINTF_0(a) fprintf(DEBUG_STREAM, a) +#define DEBUG_PRINTF_1(a, b) fprintf(DEBUG_STREAM, a, b) +#define DEBUG_PRINTF_2(a, b, c) fprintf(DEBUG_STREAM, a, b, c) +#define DEBUG_PRINTF_3(a, b, c, d) fprintf(DEBUG_STREAM, a, b, c, d) +#define DEBUG_PRINTF_4(a, b, c, d, e) fprintf(DEBUG_STREAM, a, b, c, d, e) +#define DEBUG_PRINTF_5(a, b, c, d, e, f) fprintf(DEBUG_STREAM, a, b, c, d, e, f) +#define DEBUG_PRINTF_6(a, b, c, d, e, f, g) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g) +#define DEBUG_PRINTF_7(a, b, c, d, e, f, g, h) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h) +#define DEBUG_PRINTF_8(a, b, c, d, e, f, g, h, i) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i) +#define DEBUG_PRINTF_9(a, b, c, d, e, f, g, h, i, j) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j) +#define DEBUG_PRINTF_10(a, b, c, d, e, f, g, h, i, j, k) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k) +#define DEBUG_PRINTF_11(a, b, c, d, e, f, g, h, i, j, k, l) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k, l) +#define DEBUG_PRINTF_12(a, b, c, d, e, f, g, h, i, j, k, l, m) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k, l, m) +#define DEBUG_PRINTF_13(a, b, c, d, e, f, g, h, i, j, k, l, m, n) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k, l, m, n) +#define DEBUG_PRINTF_14(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o) +#else /* DEBUG_ON */ +#define DEBUG_PRINTF_0(a) +#define DEBUG_PRINTF_1(a, b) +#define DEBUG_PRINTF_2(a, b, c) +#define DEBUG_PRINTF_3(a, b, c, d) +#define DEBUG_PRINTF_4(a, b, c, d, e) +#define DEBUG_PRINTF_5(a, b, c, d, e, f) +#define DEBUG_PRINTF_6(a, b, c, d, e, f, g) +#define DEBUG_PRINTF_7(a, b, c, d, e, f, g, h) +#define DEBUG_PRINTF_8(a, b, c, d, e, f, g, h, i) +#define DEBUG_PRINTF_9(a, b, c, d, e, f, g, h, i, j) +#define DEBUG_PRINTF_10(a, b, c, d, e, f, g, h, i, j, k) +#define DEBUG_PRINTF_11(a, b, c, d, e, f, g, h, i, j, k, l) +#define DEBUG_PRINTF_12(a, b, c, d, e, f, g, h, i, j, k, l, m) +#define DEBUG_PRINTF_13(a, b, c, d, e, f, g, h, i, j, k, l, m, n) +#define DEBUG_PRINTF_14(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o) +#endif /* DEBUG_ON */ + + +/* + * Domain and sub domain definitions + * + * In order to turn on debug for an entire domain or sub-domain + * at compile time, one of the DEBUG_DOMAIN_* below may be defined, + * which will activate debug in all of the defines it contains. + */ + +#ifdef DEBUG_DOMAIN_AC +#define DEBUG_OMXACAAC_DECODECHANPAIRELT_MPEG4 +#define DEBUG_OMXACAAC_DECODECHANPAIRELT +#define DEBUG_OMXACAAC_DECODEDATSTRELT +#define DEBUG_OMXACAAC_DECODEFILLELT +#define DEBUG_OMXACAAC_DECODEISSTEREO_S32 +#define DEBUG_OMXACAAC_DECODEMSPNS_S32 +#define DEBUG_OMXACAAC_DECODEMSSTEREO_S32_I +#define DEBUG_OMXACAAC_DECODEPRGCFGELT +#define DEBUG_OMXACAAC_DECODETNS_S32_I +#define DEBUG_OMXACAAC_DEINTERLEAVESPECTRUM_S32 +#define DEBUG_OMXACAAC_ENCODETNS_S32_I +#define DEBUG_OMXACAAC_LONGTERMPREDICT_S32 +#define DEBUG_OMXACAAC_LONGTERMRECONSTRUCT_S32 +#define DEBUG_OMXACAAC_MDCTFWD_S32 +#define DEBUG_OMXACAAC_MDCTINV_S32_S16 +#define DEBUG_OMXACAAC_NOISELESSDECODE +#define DEBUG_OMXACAAC_QUANTINV_S32_I +#define DEBUG_OMXACAAC_UNPACKADIFHEADER +#define DEBUG_OMXACAAC_UNPACKADTSFRAMEHEADER +#define DEBUG_OMXACMP3_HUFFMANDECODESFBMBP_S32 +#define DEBUG_OMXACMP3_HUFFMANDECODESFB_S32 +#define DEBUG_OMXACMP3_HUFFMANDECODE_S32 +#define DEBUG_OMXACMP3_MDCTINV_S32 +#define DEBUG_OMXACMP3_REQUANTIZESFB_S32_I +#define DEBUG_OMXACMP3_REQUANTIZE_S32_I +#define DEBUG_OMXACMP3_SYNTHPQMF_S32_S16 +#define DEBUG_OMXACMP3_UNPACKFRAMEHEADER +#define DEBUG_OMXACMP3_UNPACKSCALEFACTORS_S8 +#define DEBUG_OMXACMP3_UNPACKSIDEINFO +#endif /* DEBUG_DOMAIN_AC */ + + +#ifdef DEBUG_DOMAIN_VC +#define DEBUG_OMXVCM4P10_AVERAGE_16X +#define DEBUG_OMXVCM4P10_AVERAGE_4X +#define DEBUG_OMXVCM4P10_AVERAGE_8X +#define DEBUG_OMXVCM4P10_DEBLOCKCHROMA_U8_C1IR +#define DEBUG_OMXVCM4P10_DEBLOCKLUMA_U8_C1IR +#define DEBUG_OMXVCM4P10_DECODECHROMADCCOEFFSTOPAIRCAVLC_U8 +#define DEBUG_OMXVCM4P10_DECODECOEFFSTOPAIRCAVLC_U8 +#define DEBUG_OMXVCM4P10_DEQUANTTRANSFORMACFROMPAIR_U8_S16_C1_DLX +#define DEBUG_OMXVCM4P10_EXPANDFRAME +#define DEBUG_OMXVCM4P10_FILTERDEBLOCKINGCHROMA_HOREDGE_U8_C1IR +#define DEBUG_OMXVCM4P10_FILTERDEBLOCKINGCHROMA_VEREDGE_U8_C1IR +#define DEBUG_OMXVCM4P10_FILTERDEBLOCKINGLUMA_HOREDGE_U8_C1IR +#define DEBUG_OMXVCM4P10_FILTERDEBLOCKINGLUMA_VEREDGE_U8_C1IR +#define DEBUG_OMXVCM4P10_PREDICTINTRACHROMA8X8_U8_C1R +#define DEBUG_OMXVCM4P10_PREDICTINTRA_16X16_U8_C1R +#define DEBUG_OMXVCM4P10_PREDICTINTRA_4X4_U8_C1R +#define DEBUG_OMXVCM4P10_SADQUAR_16X +#define DEBUG_OMXVCM4P10_SADQUAR_4X +#define DEBUG_OMXVCM4P10_SADQUAR_8X +#define DEBUG_OMXVCM4P10_SAD_16X +#define DEBUG_OMXVCM4P10_SAD_4X +#define DEBUG_OMXVCM4P10_SAD_8X +#define DEBUG_OMXVCM4P10_SATD_4X4 +#define DEBUG_OMXVCM4P10_TRANSFORMDEQUANTCHROMADCFROMPAIR_U8_S16_C1 +#define DEBUG_OMXVCM4P10_TRANSFORMDEQUANTLUMADCFROMPAIR_U8_S16_C1 +#define DEBUG_OMXVCM4P10_TRANSFORMQUANT_CHROMADC +#define DEBUG_OMXVCM4P10_TRANSFORMQUANT_LUMADC +#define DEBUG_OMXVCM4P2_BLOCKMATCH_HALF_16X16 +#define DEBUG_OMXVCM4P2_BLOCKMATCH_HALF_8X8 +#define DEBUG_OMXVCM4P2_BLOCKMATCH_INTEGER_16X16 +#define DEBUG_OMXVCM4P2_BLOCKMATCH_INTEGER_8X8 +#define DEBUG_OMXVCM4P2_COMPUTETEXTUREERRORBLOCK_SAD_U8_S16 +#define DEBUG_OMXVCM4P2_COMPUTETEXTUREERRORBLOCK_U8_S16 +#define DEBUG_OMXVCM4P2_DCT8X8BLKDLX +#define DEBUG_OMXVCM4P2_DECODEBLOCKCOEF_INTER_S16 +#define DEBUG_OMXVCM4P2_DECODEPADMV_PVOP +#define DEBUG_OMXVCM4P2_DECODEVLCZIGZAG_INTER_S16 +#define DEBUG_OMXVCM4P2_DECODEVLCZIGZAG_INTRAACVLC_S16 +#define DEBUG_OMXVCM4P2_DECODEVLCZIGZAG_INTRADCVLC_S16 +#define DEBUG_OMXVCM4P2_ENCODEMV_U8_S16 +#define DEBUG_OMXVCM4P2_ENCODEVLCZIGZAG_INTER_S16 +#define DEBUG_OMXVCM4P2_ENCODEVLCZIGZAG_INTRAACVLC_S16 +#define DEBUG_OMXVCM4P2_ENCODEVLCZIGZAG_INTRADCVLC_S16 +#define DEBUG_OMXVCM4P2_FINDMVPRED +#define DEBUG_OMXVCM4P2_IDCT8X8BLKDLX +#define DEBUG_OMXVCM4P2_LIMITMVTORECT +#define DEBUG_OMXVCM4P2_MOTIONESTIMATIONMB +#define DEBUG_OMXVCM4P2_PADMBGRAY_U8 +#define DEBUG_OMXVCM4P2_PADMBHORIZONTAL_U8 +#define DEBUG_OMXVCM4P2_PADMBVERTICAL_U8 +#define DEBUG_OMXVCM4P2_PADMV +#define DEBUG_OMXVCM4P2_QUANTINTER_S16_I +#define DEBUG_OMXVCM4P2_QUANTINTRA_S16_I +#define DEBUG_OMXVCM4P2_QUANTINVINTER_S16_I +#define DEBUG_OMXVCM4P2_QUANTINVINTRA_S16_I +#define DEBUG_OMXVCM4P2_TRANSRECBLOCKCEOF_INTER +#define DEBUG_OMXVCM4P2_TRANSRECBLOCKCEOF_INTRA +#endif /* DEBUG_DOMAIN_VC */ + + +#ifdef DEBUG_DOMAIN_IC +/* To be filled in */ +#endif /* DEBUG_DOMAIN_IC */ + + +#ifdef DEBUG_DOMAIN_SP +#define DEBUG_OMXACSP_DOTPROD_S16 +#define DEBUG_OMXACSP_BLOCKEXP_S16 +#define DEBUG_OMXACSP_BLOCKEXP_S32 +#define DEBUG_OMXACSP_COPY_S16 +#define DEBUG_OMXACSP_DOTPROD_S16 +#define DEBUG_OMXACSP_DOTPROD_S16_SFS +#define DEBUG_OMXACSP_FFTFWD_CTOC_SC16_SFS +#define DEBUG_OMXACSP_FFTFWD_CTOC_SC32_SFS +#define DEBUG_OMXACSP_FFTFWD_RTOCCS_S16S32_SFS +#define DEBUG_OMXACSP_FFTFWD_RTOCCS_S32_SFS +#define DEBUG_OMXACSP_FFTGETBUFSIZE_C_SC16 +#define DEBUG_OMXACSP_FFTGETBUFSIZE_C_SC32 +#define DEBUG_OMXACSP_FFTGETBUFSIZE_R_S16_S32 +#define DEBUG_OMXACSP_FFTGETBUFSIZE_R_S32 +#define DEBUG_OMXACSP_FFTINIT_C_SC16 +#define DEBUG_OMXACSP_FFTINIT_C_SC32 +#define DEBUG_OMXACSP_FFTINIT_R_S16_S32 +#define DEBUG_OMXACSP_FFTINIT_R_S32 +#define DEBUG_OMXACSP_FFTINV_CCSTOR_S32S16_SFS +#define DEBUG_OMXACSP_FFTINV_CCSTOR_S32_SFS +#define DEBUG_OMXACSP_FFTINV_CTOC_SC16_SFS +#define DEBUG_OMXACSP_FFTINV_CTOC_SC32_SFS +#define DEBUG_OMXACSP_FILTERMEDIAN_S32_I +#define DEBUG_OMXACSP_FILTERMEDIAN_S32 +#define DEBUG_OMXACSP_FIRONE_DIRECT_S16_ISFS +#define DEBUG_OMXACSP_FIRONE_DIRECT_S16_I +#define DEBUG_OMXACSP_FIRONE_DIRECT_S16 +#define DEBUG_OMXACSP_FIRONE_DIRECT_S16_SFS +#define DEBUG_OMXACSP_FIR_DIRECT_S16_ISFS +#define DEBUG_OMXACSP_FIR_DIRECT_S16_I +#define DEBUG_OMXACSP_FIR_DIRECT_S16 +#define DEBUG_OMXACSP_FIR_DIRECT_S16_SFS +#define DEBUG_OMXACSP_IIRONE_BIQUADDIRECT_S16_I +#define DEBUG_OMXACSP_IIRONE_BIQUADDIRECT_S16 +#define DEBUG_OMXACSP_IIRONE_DIRECT_S16_I +#define DEBUG_OMXACSP_IIRONE_DIRECT_S16 +#define DEBUG_OMXACSP_IIR_BIQUADDIRECT_S16_I +#define DEBUG_OMXACSP_IIR_BIQUADDIRECT_S16 +#define DEBUG_OMXACSP_IIR_DIRECT_S16_I +#define DEBUG_OMXACSP_IIR_DIRECT_S16 +#endif /* DEBUG_DOMAIN_SP */ + + +#ifdef DEBUG_DOMAIN_IP +#define DEBUG_OMXIPBM_ADDC_U8_C1R_SFS +#define DEBUG_OMXIPBM_COPY_U8_C1R +#define DEBUG_OMXIPBM_COPY_U8_C3R +#define DEBUG_OMXIPBM_MIRROR_U8_C1R +#define DEBUG_OMXIPBM_MULC_U8_C1R_SFS +#define DEBUG_OMXIPCS_COLORTWISTQ14_U8_C3R +#define DEBUG_OMXIPCS_RGB565TOYCBCR420LS_MCU_U16_S16_C3P3R +#define DEBUG_OMXIPCS_RGB565TOYCBCR422LS_MCU_U16_S16_C3P3R +#define DEBUG_OMXIPCS_RGB565TOYCBCR444LS_MCU_U16_S16_C3P3R +#define DEBUG_OMXIPCS_RGBTOYCBCR420LS_MCU_U8_S16_C3P3R +#define DEBUG_OMXIPCS_RGBTOYCBCR422LS_MCU_U8_S16_C3P3R +#define DEBUG_OMXIPCS_RGBTOYCBCR444LS_MCU_U8_S16_C3P3R +#define DEBUG_OMXIPCS_YCBCR420RSZROT_U8_P3R +#define DEBUG_OMXIPCS_YCBCR420TORGB565LS_MCU_S16_U16_P3C3R +#define DEBUG_OMXIPCS_YCBCR420TORGB565_U8_U16_P3C3R +#define DEBUG_OMXIPCS_YCBCR420TORGBLS_MCU_S16_U8_P3C3R +#define DEBUG_OMXIPCS_YCBCR422RSZCSCROTRGB_U8_C2R +#define DEBUG_OMXIPCS_YCBCR422RSZROT_U8_P3R +#define DEBUG_OMXIPCS_YCBCR422TORGB565LS_MCU_S16_U16_P3C3R +#define DEBUG_OMXIPCS_YCBCR422TORGB565_U8_U16_C2C3R +#define DEBUG_OMXIPCS_YCBCR422TORGBLS_MCU_S16_U8_P3C3R +#define DEBUG_OMXIPCS_YCBCR422TORGB_U8_C2C3R +#define DEBUG_OMXIPCS_YCBCR422TOYCBCR420ROTATE_U8_C2P3R +#define DEBUG_OMXIPCS_YCBCR422TOYCBCR420ROTATE_U8_P3R +#define DEBUG_OMXIPCS_YCBCR444TORGB565LS_MCU_S16_U16_P3C3R +#define DEBUG_OMXIPCS_YCBCR444TORGBLS_MCU_S16_U8_P3C3R +#define DEBUG_OMXIPCS_YCBCRTORGB565_U8_U16_C3R +#define DEBUG_OMXIPCS_YCBCRTORGB565_U8_U16_P3C3R +#define DEBUG_OMXIPCS_YCBCRTORGB_U8_C3R +#define DEBUG_OMXIPPP_GETCENTRALMOMENT_S64 +#define DEBUG_OMXIPPP_GETSPATIALMOMENT_S64 +#define DEBUG_OMXIPPP_MOMENTGETSTATESIZE_S64 +#define DEBUG_OMXIPPP_MOMENTINIT_S64 +#define DEBUG_OMXIPPP_MOMENTS64S_U8_C1R +#define DEBUG_OMXIPPP_MOMENTS64S_U8_C3R +#endif /* DEBUG_DOMAIN_IP */ + + +#endif /* _armCommon_H_ */ + +/*End of File*/ + + + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_BitDec_s.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_BitDec_s.h new file mode 100755 index 0000000000000000000000000000000000000000..c738f72d8125c03c57d13e6250d1ba034f89abb5 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_BitDec_s.h @@ -0,0 +1,670 @@ +;// +;// +;// File Name: armCOMM_BitDec_s.h +;// OpenMAX DL: v1.0.2 +;// Revision: 12290 +;// Date: Wednesday, April 9, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// +;// OpenMAX optimized bitstream decode module +;// +;// You must include armCOMM_s.h before including this file +;// +;// This module provides macros to perform assembly optimized fixed and +;// variable length decoding from a read-only bitstream. The variable +;// length decode modules take as input a pointer to a table of 16-bit +;// entries of the following format. +;// +;// VLD Table Entry format +;// +;// 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 +;// +------------------------------------------------+ +;// | Len | Symbol | 1 | +;// +------------------------------------------------+ +;// | Offset | 0 | +;// +------------------------------------------------+ +;// +;// If the table entry is a leaf entry then bit 0 set: +;// Len = Number of bits overread (0 to 7) +;// Symbol = Symbol payload (unsigned 12 bits) +;// +;// If the table entry is an internal node then bit 0 is clear: +;// Offset = Number of (16-bit) half words from the table +;// start to the next table node +;// +;// The table is accessed by successive lookup up on the +;// next Step bits of the input bitstream until a leaf node +;// is obtained. The Step sizes are supplied to the VLD macro. +;// +;// USAGE: +;// +;// To use any of the macros in this package, first call: +;// +;// M_BD_INIT ppBitStream, pBitOffset, pBitStream, RBitBuffer, RBitCount, Tmp +;// +;// This caches the current bitstream position and next available +;// bits in registers pBitStream, RBitBuffer, RBitCount. These registers +;// are reserved for use by the bitstream decode package until you +;// call M_BD_FINI. +;// +;// Next call the following macro(s) as many times as you need: +;// +;// M_BD_LOOK8 - Look ahead constant 1<=N<=8 bits into the bitstream +;// M_BD_LOOK16 - Look ahead constant 1<=N<=16 bits into the bitstream +;// M_BD_READ8 - Read constant 1<=N<=8 bits from the bitstream +;// M_BD_READ16 - Read constant 1<=N<=16 bits from the bitstream +;// M_BD_VREAD8 - Read variable 1<=N<=8 bits from the bitstream +;// M_BD_VREAD16 - Read variable 1<=N<=16 bits from the bitstream +;// M_BD_VLD - Perform variable length decode using lookup table +;// +;// Finally call the macro: +;// +;// M_BD_FINI ppBitStream, pBitOffset +;// +;// This writes the bitstream state back to memory. +;// +;// The three bitstream cache register names are assigned to the following global +;// variables: +;// + + GBLS pBitStream ;// Register name for pBitStream + GBLS BitBuffer ;// Register name for BitBuffer + GBLS BitCount ;// Register name for BitCount + +;// +;// These register variables must have a certain defined state on entry to every bitstream +;// macro (except M_BD_INIT) and on exit from every bitstream macro (except M_BD_FINI). +;// The state may depend on implementation. +;// +;// For the default (ARM11) implementation the following hold: +;// pBitStream - points to the first byte not held in the BitBuffer +;// BitBuffer - is a cache of (4 bytes) 32 bits, bit 31 the first bit +;// BitCount - is offset (from the top bit) to the next unused bitstream bit +;// 0<=BitCount<=15 (so BitBuffer holds at least 17 unused bits) +;// +;// + + ;// Bitstream Decode initialise + ;// + ;// Initialises the bitstream decode global registers from + ;// bitstream pointers. This macro is split into 3 parts to enable + ;// scheduling. + ;// + ;// Input Registers: + ;// + ;// $ppBitStream - pointer to pointer to the next bitstream byte + ;// $pBitOffset - pointer to the number of bits used in the current byte (0..7) + ;// $RBitStream - register to use for pBitStream (can be $ppBitStream) + ;// $RBitBuffer - register to use for BitBuffer + ;// $RBitCount - register to use for BitCount (can be $pBitOffset) + ;// + ;// Output Registers: + ;// + ;// $T1,$T2,$T3 - registers that must be preserved between calls to + ;// M_BD_INIT1 and M_BD_INIT2 + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_INIT0 $ppBitStream, $pBitOffset, $RBitStream, $RBitBuffer, $RBitCount + +pBitStream SETS "$RBitStream" +BitBuffer SETS "$RBitBuffer" +BitCount SETS "$RBitCount" + + ;// load inputs + LDR $pBitStream, [$ppBitStream] + LDR $BitCount, [$pBitOffset] + MEND + + MACRO + M_BD_INIT1 $T1, $T2, $T3 + LDRB $T2, [$pBitStream, #2] + LDRB $T1, [$pBitStream, #1] + LDRB $BitBuffer, [$pBitStream], #3 + ADD $BitCount, $BitCount, #8 + MEND + + MACRO + M_BD_INIT2 $T1, $T2, $T3 + ORR $T2, $T2, $T1, LSL #8 + ORR $BitBuffer, $T2, $BitBuffer, LSL #16 + MEND + + ;// + ;// Look ahead fixed 1<=N<=8 bits without consuming any bits + ;// The next bits will be placed at bit 31..24 of destination register + ;// + ;// Input Registers: + ;// + ;// $N - number of bits to look + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $Symbol - the next N bits of the bitstream + ;// $T1 - corrupted temp/scratch register + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_LOOK8 $Symbol, $N + ASSERT ($N>=1):LAND:($N<=8) + MOV $Symbol, $BitBuffer, LSL $BitCount + MEND + + ;// + ;// Look ahead fixed 1<=N<=16 bits without consuming any bits + ;// The next bits will be placed at bit 31..16 of destination register + ;// + ;// Input Registers: + ;// + ;// $N - number of bits to look + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $Symbol - the next N bits of the bitstream + ;// $T1 - corrupted temp/scratch register + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_LOOK16 $Symbol, $N, $T1 + ASSERT ($N >= 1):LAND:($N <= 16) + MOV $Symbol, $BitBuffer, LSL $BitCount + MEND + + ;// + ;// Skips fixed 1<=N<=8 bits from the bitstream, advancing the bitstream pointer + ;// + ;// Input Registers: + ;// + ;// $N - number of bits + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $T1 - corrupted temp/scratch register + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_SKIP8 $N, $T1 + ASSERT ($N>=1):LAND:($N<=8) + SUBS $BitCount, $BitCount, #(8-$N) + LDRCSB $T1, [$pBitStream], #1 + ADDCC $BitCount, $BitCount, #8 + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 + MEND + + + ;// + ;// Read fixed 1<=N<=8 bits from the bitstream, advancing the bitstream pointer + ;// + ;// Input Registers: + ;// + ;// $N - number of bits to read + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $Symbol - the next N bits of the bitstream + ;// $T1 - corrupted temp/scratch register + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_READ8 $Symbol, $N, $T1 + ASSERT ($N>=1):LAND:($N<=8) + MOVS $Symbol, $BitBuffer, LSL $BitCount + SUBS $BitCount, $BitCount, #(8-$N) + LDRCSB $T1, [$pBitStream], #1 + ADDCC $BitCount, $BitCount, #8 + MOV $Symbol, $Symbol, LSR #(32-$N) + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 + MEND + + ;// + ;// Read fixed 1<=N<=16 bits from the bitstream, advancing the bitstream pointer + ;// + ;// Input Registers: + ;// + ;// $N - number of bits to read + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $Symbol - the next N bits of the bitstream + ;// $T1 - corrupted temp/scratch register + ;// $T2 - corrupted temp/scratch register + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_READ16 $Symbol, $N, $T1, $T2 + ASSERT ($N>=1):LAND:($N<=16) + ASSERT $Symbol<>$T1 + IF ($N<=8) + M_BD_READ8 $Symbol, $N, $T1 + ELSE + ;// N>8 so we will be able to refill at least one byte + LDRB $T1, [$pBitStream], #1 + MOVS $Symbol, $BitBuffer, LSL $BitCount + ORR $BitBuffer, $T1, $BitBuffer, LSL #8 + SUBS $BitCount, $BitCount, #(16-$N) + LDRCSB $T1, [$pBitStream], #1 + MOV $Symbol, $Symbol, LSR #(32-$N) + ADDCC $BitCount, $BitCount, #8 + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 + ENDIF + MEND + + ;// + ;// Skip variable 1<=N<=8 bits from the bitstream, advancing the bitstream pointer. + ;// + ;// Input Registers: + ;// + ;// $N - number of bits. 1<=N<=8 + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $T1 - corrupted temp/scratch register + ;// $T2 - corrupted temp/scratch register + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_VSKIP8 $N, $T1 + ADD $BitCount, $BitCount, $N + SUBS $BitCount, $BitCount, #8 + LDRCSB $T1, [$pBitStream], #1 + ADDCC $BitCount, $BitCount, #8 + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 + MEND + + ;// + ;// Skip variable 1<=N<=16 bits from the bitstream, advancing the bitstream pointer. + ;// + ;// Input Registers: + ;// + ;// $N - number of bits. 1<=N<=16 + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $T1 - corrupted temp/scratch register + ;// $T2 - corrupted temp/scratch register + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_VSKIP16 $N, $T1, $T2 + ADD $BitCount, $BitCount, $N + SUBS $BitCount, $BitCount, #8 + LDRCSB $T1, [$pBitStream], #1 + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 + SUBCSS $BitCount, $BitCount, #8 + LDRCSB $T1, [$pBitStream], #1 + ADDCC $BitCount, $BitCount, #8 + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 + MEND + + ;// + ;// Read variable 1<=N<=8 bits from the bitstream, advancing the bitstream pointer. + ;// + ;// Input Registers: + ;// + ;// $N - number of bits to read. 1<=N<=8 + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $Symbol - the next N bits of the bitstream + ;// $T1 - corrupted temp/scratch register + ;// $T2 - corrupted temp/scratch register + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_VREAD8 $Symbol, $N, $T1, $T2 + MOV $Symbol, $BitBuffer, LSL $BitCount + ADD $BitCount, $BitCount, $N + SUBS $BitCount, $BitCount, #8 + LDRCSB $T1, [$pBitStream], #1 + RSB $T2, $N, #32 + ADDCC $BitCount, $BitCount, #8 + MOV $Symbol, $Symbol, LSR $T2 + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 + MEND + + + ;// + ;// Read variable 1<=N<=16 bits from the bitstream, advancing the bitstream pointer. + ;// + ;// Input Registers: + ;// + ;// $N - number of bits to read. 1<=N<=16 + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $Symbol - the next N bits of the bitstream + ;// $T1 - corrupted temp/scratch register + ;// $T2 - corrupted temp/scratch register + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_VREAD16 $Symbol, $N, $T1, $T2 + MOV $Symbol, $BitBuffer, LSL $BitCount + ADD $BitCount, $BitCount, $N + SUBS $BitCount, $BitCount, #8 + LDRCSB $T1, [$pBitStream], #1 + RSB $T2, $N, #32 + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 + SUBCSS $BitCount, $BitCount, #8 + LDRCSB $T1, [$pBitStream], #1 + ADDCC $BitCount, $BitCount, #8 + MOV $Symbol, $Symbol, LSR $T2 + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 + MEND + + + ;// + ;// Decode a code of the form 0000...001 where there + ;// are N zeros before the 1 and N<=15 (code length<=16) + ;// + ;// Input Registers: + ;// + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $Symbol - the number of zeros before the next 1 + ;// >=16 is an illegal code + ;// $T1 - corrupted temp/scratch register + ;// $T2 - corrupted temp/scratch register + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_CLZ16 $Symbol, $T1, $T2 + MOVS $Symbol, $BitBuffer, LSL $BitCount + CLZ $Symbol, $Symbol + ADD $BitCount, $BitCount, $Symbol + SUBS $BitCount, $BitCount, #7 ;// length is Symbol+1 + LDRCSB $T1, [$pBitStream], #1 + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 + SUBCSS $BitCount, $BitCount, #8 + LDRCSB $T1, [$pBitStream], #1 + ADDCC $BitCount, $BitCount, #8 + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 + MEND + + ;// + ;// Decode a code of the form 1111...110 where there + ;// are N ones before the 0 and N<=15 (code length<=16) + ;// + ;// Input Registers: + ;// + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $Symbol - the number of zeros before the next 1 + ;// >=16 is an illegal code + ;// $T1 - corrupted temp/scratch register + ;// $T2 - corrupted temp/scratch register + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_CLO16 $Symbol, $T1, $T2 + MOV $Symbol, $BitBuffer, LSL $BitCount + MVN $Symbol, $Symbol + CLZ $Symbol, $Symbol + ADD $BitCount, $BitCount, $Symbol + SUBS $BitCount, $BitCount, #7 ;// length is Symbol+1 + LDRCSB $T1, [$pBitStream], #1 + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 + SUBCSS $BitCount, $BitCount, #8 + LDRCSB $T1, [$pBitStream], #1 + ADDCC $BitCount, $BitCount, #8 + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 + MEND + + + ;// + ;// Variable Length Decode module + ;// + ;// Decodes one VLD Symbol from a bitstream and refill the bitstream + ;// buffer. + ;// + ;// Input Registers: + ;// + ;// $pVLDTable - pointer to VLD decode table of 16-bit entries. + ;// The format is described above at the start of + ;// this file. + ;// $S0 - The number of bits to look up for the first step + ;// 1<=$S0<=8 + ;// $S1 - The number of bits to look up for each subsequent + ;// step 1<=$S1<=$S0. + ;// + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $Symbol - decoded VLD symbol value + ;// $T1 - corrupted temp/scratch register + ;// $T2 - corrupted temp/scratch register + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_VLD $Symbol, $T1, $T2, $pVLDTable, $S0, $S1 + ASSERT (1<=$S0):LAND:($S0<=8) + ASSERT (1<=$S1):LAND:($S1<=$S0) + + ;// Note 0<=BitCount<=15 on entry and exit + + MOVS $T1, $BitBuffer, LSL $BitCount ;// left align next bits + MOVS $Symbol, #(2<<$S0)-2 ;// create mask + AND $Symbol, $Symbol, $T1, LSR #(31-$S0) ;// 2*(next $S0 bits) + SUBS $BitCount, $BitCount, #8 ;// CS if buffer can be filled +01 + LDRCSB $T1, [$pBitStream], #1 ;// load refill byte + LDRH $Symbol, [$pVLDTable, $Symbol] ;// load table entry + ADDCC $BitCount, $BitCount, #8 ;// refill not possible + ADD $BitCount, $BitCount, #$S0 ;// assume $S0 bits used + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 ;// merge in refill byte + MOVS $T1, $Symbol, LSR #1 ;// CS=leaf entry + BCS %FT02 + + MOVS $T1, $BitBuffer, LSL $BitCount ;// left align next bit + IF (2*$S0-$S1<=8) + ;// Can combine refill check and -S0+S1 and keep $BitCount<=15 + SUBS $BitCount, $BitCount, #8+($S0-$S1) + ELSE + ;// Separate refill check and -S0+S1 offset + SUBS $BitCount, $BitCount, #8 + SUB $BitCount, $BitCount, #($S0-$S1) + ENDIF + ADD $Symbol, $Symbol, $T1, LSR #(31-$S1) ;// add 2*(next $S1 bits) to + BIC $Symbol, $Symbol, #1 ;// table offset + B %BT01 ;// load next table entry +02 + ;// BitCount range now depend on the route here + ;// if (first step) S0 <= BitCount <= 7+S0 <=15 + ;// else if (2*S0-S1<=8) S0 <= BitCount <= 7+(2*S0-S1) <=15 + ;// else S1 <= BitCount <= 7+S1 <=15 + + SUB $BitCount, $BitCount, $Symbol, LSR#13 + BIC $Symbol, $T1, #0xF000 + MEND + + + ;// Add an offset number of bits + ;// + ;// Outputs destination byte and bit index values which corresponds to an offset number of bits + ;// from the current location. This is used to compare bitstream positions using. M_BD_CMP. + ;// + ;// Input Registers: + ;// + ;// $Offset - Offset to be added in bits. + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $ByteIndex - Destination pBitStream pointer after adding the Offset. + ;// This value will be 4 byte ahead and needs to subtract by 4 to get exact + ;// pointer (as in M_BD_FINI). But for using with M_BD_CMP subtract is not needed. + ;// $BitIndex - Destination BitCount after the addition of Offset number of bits + ;// + MACRO + M_BD_ADD $ByteIndex, $BitIndex, $Offset + + ;// ($ByteIndex,$BitIndex) = Current position + $Offset bits + ADD $Offset, $Offset, $BitCount + AND $BitIndex, $Offset, #7 + ADD $ByteIndex, $pBitStream, $Offset, ASR #3 + MEND + + ;// Move bitstream pointers to the location given + ;// + ;// Outputs destination byte and bit index values which corresponds to + ;// the current location given (calculated using M_BD_ADD). + ;// + ;// Input Registers: + ;// + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// $ByteIndex - Destination pBitStream pointer after move. + ;// This value will be 4 byte ahead and needs to subtract by 4 to get exact + ;// pointer (as in M_BD_FINI). + ;// $BitIndex - Destination BitCount after the move + ;// + ;// Output Registers: + ;// + ;// $pBitStream \ + ;// } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_MOV $ByteIndex, $BitIndex + + ;// ($pBitStream, $Offset) = ($ByteIndex,$BitIndex) + MOV $BitCount, $BitIndex + MOV $pBitStream, $ByteIndex + MEND + + ;// Bitstream Compare + ;// + ;// Compares bitstream position with that of a destination position. Destination position + ;// is held in two input registers which are calculated using M_BD_ADD macro + ;// + ;// Input Registers: + ;// + ;// $ByteIndex - Destination pBitStream pointer, (4 byte ahead as described in M_BD_ADD) + ;// $BitIndex - Destination BitCount + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// FLAGS - GE if destination is reached, LT = is destination is ahead + ;// $T1 - corrupted temp/scratch register + ;// + MACRO + M_BD_CMP $ByteIndex, $BitIndex, $T1 + + ;// Return flags set by (current positon)-($ByteIndex,$BitIndex) + ;// so GE means that we have reached the indicated position + + ADD $T1, $pBitStream, $BitCount, LSR #3 + CMP $T1, $ByteIndex + AND $T1, $BitCount, #7 + CMPEQ $T1, $BitIndex + MEND + + + ;// Bitstream Decode finalise + ;// + ;// Writes back the bitstream state to the bitstream pointers + ;// + ;// Input Registers: + ;// + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $ppBitStream - pointer to pointer to the next bitstream byte + ;// $pBitOffset - pointer to the number of bits used in the current byte (0..7) + ;// $pBitStream \ + ;// $BitBuffer } these register are corrupted + ;// $BitCount / + ;// + MACRO + M_BD_FINI $ppBitStream, $pBitOffset + + ;// Advance pointer by the number of free bits in the buffer + ADD $pBitStream, $pBitStream, $BitCount, LSR#3 + AND $BitCount, $BitCount, #7 + + ;// Now move back 32 bits to reach the first usued bit + SUB $pBitStream, $pBitStream, #4 + + ;// Store out bitstream state + STR $BitCount, [$pBitOffset] + STR $pBitStream, [$ppBitStream] + MEND + + END + \ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_Bitstream.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_Bitstream.h new file mode 100755 index 0000000000000000000000000000000000000000..b6990340065e77144e5bdf2654c6f93a25fef56f --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_Bitstream.h @@ -0,0 +1,212 @@ +/** + * + * File Name: armCOMM_Bitstream.h + * OpenMAX DL: v1.0.2 + * Revision: 12290 + * Date: Wednesday, April 9, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * File: armCOMM_Bitstream.h + * Brief: Declares common API's/Data types used across the OpenMax Encoders/Decoders. + * + */ + +#ifndef _armCodec_H_ +#define _armCodec_H_ + +#include "omxtypes.h" + +typedef struct { + OMX_U8 codeLen; + OMX_U32 codeWord; +} ARM_VLC32; + +/* The above should be renamed as "ARM_VLC32" */ + +/** + * Function: armLookAheadBits() + * + * Description: + * Get the next N bits from the bitstream without advancing the bitstream pointer + * + * Parameters: + * [in] **ppBitStream + * [in] *pOffset + * [in] N=1...32 + * + * Returns Value + */ + +OMX_U32 armLookAheadBits(const OMX_U8 **ppBitStream, OMX_INT *pOffset, OMX_INT N); + +/** + * Function: armGetBits() + * + * Description: + * Read N bits from the bitstream + * + * Parameters: + * [in] *ppBitStream + * [in] *pOffset + * [in] N=1..32 + * + * [out] *ppBitStream + * [out] *pOffset + * Returns Value + */ + +OMX_U32 armGetBits(const OMX_U8 **ppBitStream, OMX_INT *pOffset, OMX_INT N); + +/** + * Function: armByteAlign() + * + * Description: + * Align the pointer *ppBitStream to the next byte boundary + * + * Parameters: + * [in] *ppBitStream + * [in] *pOffset + * + * [out] *ppBitStream + * [out] *pOffset + * + **/ + +OMXVoid armByteAlign(const OMX_U8 **ppBitStream,OMX_INT *pOffset); + +/** + * Function: armSkipBits() + * + * Description: + * Skip N bits from the value at *ppBitStream + * + * Parameters: + * [in] *ppBitStream + * [in] *pOffset + * [in] N + * + * [out] *ppBitStream + * [out] *pOffset + * + **/ + +OMXVoid armSkipBits(const OMX_U8 **ppBitStream,OMX_INT *pOffset,OMX_INT N); + +/*************************************** + * Variable bit length Decode + ***************************************/ + +/** + * Function: armUnPackVLC32() + * + * Description: + * Variable length decode of variable length symbol (max size 32 bits) read from + * the bit stream pointed by *ppBitStream at *pOffset by using the table + * pointed by pCodeBook + * + * Parameters: + * [in] **ppBitStream + * [in] *pOffset + * [in] pCodeBook + * + * [out] **ppBitStream + * [out] *pOffset + * + * Returns : Code Book Index if successfull. + * : "ARM_NO_CODEBOOK_INDEX = 0xFFFF" if search fails. + **/ + +#define ARM_NO_CODEBOOK_INDEX (OMX_U16)(0xFFFF) + +OMX_U16 armUnPackVLC32( + const OMX_U8 **ppBitStream, + OMX_INT *pOffset, + const ARM_VLC32 *pCodeBook +); + +/*************************************** + * Fixed bit length Encode + ***************************************/ + +/** + * Function: armPackBits + * + * Description: + * Pack a VLC code word into the bitstream + * + * Remarks: + * + * Parameters: + * [in] ppBitStream pointer to the pointer to the current byte + * in the bit stream. + * [in] pOffset pointer to the bit position in the byte + * pointed by *ppBitStream. Valid within 0 + * to 7. + * [in] codeWord Code word that need to be inserted in to the + * bitstream + * [in] codeLength Length of the code word valid range 1...32 + * + * [out] ppBitStream *ppBitStream is updated after the block is encoded, + * so that it points to the current byte in the bit + * stream buffer. + * [out] pBitOffset *pBitOffset is updated so that it points to the + * current bit position in the byte pointed by + * *ppBitStream. + * + * Return Value: + * Standard OMX_RESULT result. See enumeration for possible result codes. + * + */ + +OMXResult armPackBits ( + OMX_U8 **ppBitStream, + OMX_INT *pOffset, + OMX_U32 codeWord, + OMX_INT codeLength +); + +/*************************************** + * Variable bit length Encode + ***************************************/ + +/** + * Function: armPackVLC32 + * + * Description: + * Pack a VLC code word into the bitstream + * + * Remarks: + * + * Parameters: + * [in] ppBitStream pointer to the pointer to the current byte + * in the bit stream. + * [in] pBitOffset pointer to the bit position in the byte + * pointed by *ppBitStream. Valid within 0 + * to 7. + * [in] code VLC code word that need to be inserted in to the + * bitstream + * + * [out] ppBitStream *ppBitStream is updated after the block is encoded, + * so that it points to the current byte in the bit + * stream buffer. + * [out] pBitOffset *pBitOffset is updated so that it points to the + * current bit position in the byte pointed by + * *ppBitStream. + * + * Return Value: + * Standard OMX_RESULT result. See enumeration for possible result codes. + * + */ + +OMXResult armPackVLC32 ( + OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + ARM_VLC32 code +); + +#endif /*_armCodec_H_*/ + +/*End of File*/ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_IDCTTable.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_IDCTTable.h new file mode 100755 index 0000000000000000000000000000000000000000..e0cfdaad84fef69509894c210518bbd696dbcb98 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_IDCTTable.h @@ -0,0 +1,40 @@ +/** + * + * + * File Name: armCOMM_IDCTTable.h + * OpenMAX DL: v1.0.2 + * Revision: 12290 + * Date: Wednesday, April 9, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * File : armCOMM_IDCTTable.h + * Description : Contains declarations of tables for IDCT calculation. + * + */ + +#ifndef _armCOMM_IDCTTable_H_ +#define _armCOMM_IDCTTable_H_ + +#include "omxtypes.h" + + /* Table of s(u)*A(u)*A(v)/16 at Q15 + * s(u)=1.0 0 <= u <= 5 + * s(6)=2.0 + * s(7)=4.0 + * A(0) = 2*sqrt(2) + * A(u) = 4*cos(u*pi/16) for (u!=0) + */ +extern const OMX_U16 armCOMM_IDCTPreScale [64]; +extern const OMX_U16 armCOMM_IDCTCoef [4]; + +#endif /* _armCOMM_IDCTTable_H_ */ + + +/* End of File */ + + + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_IDCT_s.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_IDCT_s.h new file mode 100755 index 0000000000000000000000000000000000000000..0baa0870640564ec8dffa061a2e7f11ad53235e7 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_IDCT_s.h @@ -0,0 +1,1451 @@ +;// +;// This confidential and proprietary software may be used only as +;// authorised by a licensing agreement from ARM Limited +;// (C) COPYRIGHT 2004 ARM Limited +;// ALL RIGHTS RESERVED +;// The entire notice above must be reproduced on all authorised +;// copies and copies may only be made to the extent permitted +;// by a licensing agreement from ARM Limited. +;// +;// IDCT_s.s +;// +;// Inverse DCT module +;// +;// +;// ALGORITHM DESCRIPTION +;// +;// The 8x8 2D IDCT is performed by calculating a 1D IDCT for each +;// column and then a 1D IDCT for each row. +;// +;// The 8-point 1D IDCT is defined by +;// f(x) = (C(0)*T(0)*c(0,x) + ... + C(7)*T(7)*c(7,x))/2 +;// +;// C(u) = 1/sqrt(2) if u=0 or 1 if u!=0 +;// c(u,x) = cos( (2x+1)*u*pi/16 ) +;// +;// We compute the 8-point 1D IDCT using the reverse of +;// the Arai-Agui-Nakajima flow graph which we split into +;// 5 stages named in reverse order to identify with the +;// forward DCT. Direct inversion of the forward formulae +;// in file FDCT_s.s gives: +;// +;// IStage 5: j(u) = T(u)*A(u) [ A(u)=4*C(u)*c(u,0) ] +;// [ A(0) = 2*sqrt(2) +;// A(u) = 4*cos(u*pi/16) for (u!=0) ] +;// +;// IStage 4: i0 = j0 i1 = j4 +;// i3 = (j2+j6)/2 i2 = (j2-j6)/2 +;// i7 = (j5+j3)/2 i4 = (j5-j3)/2 +;// i5 = (j1+j7)/2 i6 = (j1-j7)/2 +;// +;// IStage 3: h0 = (i0+i1)/2 h1 = (i0-i1)/2 +;// h2 = (i2*sqrt2)-i3 h3 = i3 +;// h4 = cos(pi/8)*i4 + sin(pi/8)*i6 +;// h6 = -sin(pi/8)*i4 + cos(pi/8)*i6 +;// [ The above two lines rotate by -(pi/8) ] +;// h5 = (i5-i7)/sqrt2 h7 = (i5+i7)/2 +;// +;// IStage 2: g0 = (h0+h3)/2 g3 = (h0-h3)/2 +;// g1 = (h1+h2)/2 g2 = (h1-h2)/2 +;// g7 = h7 g6 = h6 - h7 +;// g5 = h5 - g6 g4 = h4 - g5 +;// +;// IStage 1: f0 = (g0+g7)/2 f7 = (g0-g7)/2 +;// f1 = (g1+g6)/2 f6 = (g1-g6)/2 +;// f2 = (g2+g5)/2 f5 = (g2-g5)/2 +;// f3 = (g3+g4)/2 f4 = (g3-g4)/2 +;// +;// Note that most coefficients are halved 3 times during the +;// above calculation. We can rescale the algorithm dividing +;// the input by 8 to remove the halvings. +;// +;// IStage 5: j(u) = T(u)*A(u)/8 +;// +;// IStage 4: i0 = j0 i1 = j4 +;// i3 = j2 + j6 i2 = j2 - j6 +;// i7 = j5 + j3 i4 = j5 - j3 +;// i5 = j1 + j7 i6 = j1 - j7 +;// +;// IStage 3: h0 = i0 + i1 h1 = i0 - i1 +;// h2 = (i2*sqrt2)-i3 h3 = i3 +;// h4 = 2*( cos(pi/8)*i4 + sin(pi/8)*i6) +;// h6 = 2*(-sin(pi/8)*i4 + cos(pi/8)*i6) +;// h5 = (i5-i7)*sqrt2 h7 = i5 + i7 +;// +;// IStage 2: g0 = h0 + h3 g3 = h0 - h3 +;// g1 = h1 + h2 g2 = h1 - h2 +;// g7 = h7 g6 = h6 - h7 +;// g5 = h5 - g6 g4 = h4 - g5 +;// +;// IStage 1: f0 = g0 + g7 f7 = g0 - g7 +;// f1 = g1 + g6 f6 = g1 - g6 +;// f2 = g2 + g5 f5 = g2 - g5 +;// f3 = g3 + g4 f4 = g3 - g4 +;// +;// Note: +;// 1. The scaling by A(u)/8 can often be combined with inverse +;// quantization. The column and row scalings can be combined. +;// 2. The flowgraph in the AAN paper has h4,g6 negated compared +;// to the above code but is otherwise identical. +;// 3. The rotation by -pi/8 can be peformed using three multiplies +;// Eg c*i4+s*i6 = (i6-i4)*s + (c+s)*i4 +;// -s*i4+c*i6 = (i6-i4)*s + (c-s)*i6 +;// 4. If |T(u)|<=1 then from the IDCT definition, +;// |f(x)| <= ((1/sqrt2) + |c(1,x)| + .. + |c(7,x)|)/2 +;// = ((1/sqrt2) + cos(pi/16) + ... + cos(7*pi/16))/2 +;// = ((1/sqrt2) + (cot(pi/32)-1)/2)/2 +;// = (1 + cos(pi/16) + cos(2pi/16) + cos(3pi/16))/sqrt(2) +;// = (approx)2.64 +;// So the max gain of the 2D IDCT is ~x7.0 = 3 bits. +;// The table below shows input patterns generating the maximum +;// value of |f(u)| for input in the range |T(x)|<=1. M=-1, P=+1 +;// InputPattern Max |f(x)| +;// PPPPPPPP |f0| = 2.64 +;// PPPMMMMM |f1| = 2.64 +;// PPMMMPPP |f2| = 2.64 +;// PPMMPPMM |f3| = 2.64 +;// PMMPPMMP |f4| = 2.64 +;// PMMPMMPM |f5| = 2.64 +;// PMPPMPMP |f6| = 2.64 +;// PMPMPMPM |f7| = 2.64 +;// Note that this input pattern is the transpose of the +;// corresponding max input patter for the FDCT. + +;// Arguments + +pSrc RN 0 ;// source data buffer +Stride RN 1 ;// destination stride in bytes +pDest RN 2 ;// destination data buffer +pScale RN 3 ;// pointer to scaling table + + + ;// DCT Inverse Macro + ;// The DCT code should be parametrized according + ;// to the following inputs: + ;// $outsize = "u8" : 8-bit unsigned data saturated (0 to +255) + ;// "s9" : 16-bit signed data saturated to 9-bit (-256 to +255) + ;// "s16" : 16-bit signed data not saturated (max size ~+/-14273) + ;// $inscale = "s16" : signed 16-bit aan-scale table, Q15 format, with 4 byte alignment + ;// "s32" : signed 32-bit aan-scale table, Q23 format, with 4 byte alignment + ;// + ;// Inputs: + ;// pSrc = r0 = Pointer to input data + ;// Range is -256 to +255 (9-bit) + ;// Stride = r1 = Stride between input lines + ;// pDest = r2 = Pointer to output data + ;// pScale = r3 = Pointer to aan-scale table in the format defined by $inscale + + + + MACRO + M_IDCT $outsize, $inscale, $stride + LCLA SHIFT + + + IF ARM1136JS + +;// REGISTER ALLOCATION +;// This is hard since we have 8 values, 9 free registers and each +;// butterfly requires a temporary register. We also want to +;// maintain register order so we can use LDM/STM. The table below +;// summarises the register allocation that meets all these criteria. +;// a=1stcol, b=2ndcol, f,g,h,i are dataflow points described above. +;// +;// r1 a01 g0 h0 +;// r4 b01 f0 g1 h1 i0 +;// r5 a23 f1 g2 i1 +;// r6 b23 f2 g3 h2 i2 +;// r7 a45 f3 h3 i3 +;// r8 b45 f4 g4 h4 i4 +;// r9 a67 f5 g5 h5 i5 +;// r10 b67 f6 g6 h6 i6 +;// r11 f7 g7 h7 i7 +;// +ra01 RN 1 +rb01 RN 4 +ra23 RN 5 +rb23 RN 6 +ra45 RN 7 +rb45 RN 8 +ra67 RN 9 +rb67 RN 10 +rtmp RN 11 +csPiBy8 RN 12 ;// [ (Sin(pi/8)@Q15), (Cos(pi/8)@Q15) ] +LoopRR2 RN 14 ;// [ LoopNumber<<13 , (1/Sqrt(2))@Q15 ] +;// Transpose allocation +xft RN ra01 +xf0 RN rb01 +xf1 RN ra23 +xf2 RN rb23 +xf3 RN ra45 +xf4 RN rb45 +xf5 RN ra67 +xf6 RN rb67 +xf7 RN rtmp +;// IStage 1 allocation +xg0 RN xft +xg1 RN xf0 +xg2 RN xf1 +xg3 RN xf2 +xgt RN xf3 +xg4 RN xf4 +xg5 RN xf5 +xg6 RN xf6 +xg7 RN xf7 +;// IStage 2 allocation +xh0 RN xg0 +xh1 RN xg1 +xht RN xg2 +xh2 RN xg3 +xh3 RN xgt +xh4 RN xg4 +xh5 RN xg5 +xh6 RN xg6 +xh7 RN xg7 +;// IStage 3,4 allocation +xit RN xh0 +xi0 RN xh1 +xi1 RN xht +xi2 RN xh2 +xi3 RN xh3 +xi4 RN xh4 +xi5 RN xh5 +xi6 RN xh6 +xi7 RN xh7 + + M_STR pDest, ppDest + IF "$stride"="s" + M_STR Stride, pStride + ENDIF + M_ADR pDest, pBlk + LDR csPiBy8, =0x30fc7642 + LDR LoopRR2, =0x00005a82 + +v6_idct_col$_F + ;// Load even values + LDR xi4, [pSrc], #4 ;// j0 + LDR xi5, [pSrc, #4*16-4] ;// j4 + LDR xi6, [pSrc, #2*16-4] ;// j2 + LDR xi7, [pSrc, #6*16-4] ;// j6 + + ;// Scale Even Values + IF "$inscale"="s16" ;// 16x16 mul +SHIFT SETA 12 + LDR xi0, [pScale], #4 + LDR xi1, [pScale, #4*16-4] + LDR xi2, [pScale, #2*16-4] + MOV xit, #1<<(SHIFT-1) + SMLABB xi3, xi0, xi4, xit + SMLATT xi4, xi0, xi4, xit + SMLABB xi0, xi1, xi5, xit + SMLATT xi5, xi1, xi5, xit + MOV xi3, xi3, ASR #SHIFT + PKHBT xi4, xi3, xi4, LSL #(16-SHIFT) + LDR xi3, [pScale, #6*16-4] + SMLABB xi1, xi2, xi6, xit + SMLATT xi6, xi2, xi6, xit + MOV xi0, xi0, ASR #SHIFT + PKHBT xi5, xi0, xi5, LSL #(16-SHIFT) + SMLABB xi2, xi3, xi7, xit + SMLATT xi7, xi3, xi7, xit + MOV xi1, xi1, ASR #SHIFT + PKHBT xi6, xi1, xi6, LSL #(16-SHIFT) + MOV xi2, xi2, ASR #SHIFT + PKHBT xi7, xi2, xi7, LSL #(16-SHIFT) + ENDIF + IF "$inscale"="s32" ;// 32x16 mul +SHIFT SETA (12+8-16) + MOV xit, #1<<(SHIFT-1) + LDR xi0, [pScale], #8 + LDR xi1, [pScale, #0*32+4-8] + LDR xi2, [pScale, #4*32-8] + LDR xi3, [pScale, #4*32+4-8] + SMLAWB xi0, xi0, xi4, xit + SMLAWT xi1, xi1, xi4, xit + SMLAWB xi2, xi2, xi5, xit + SMLAWT xi3, xi3, xi5, xit + MOV xi0, xi0, ASR #SHIFT + PKHBT xi4, xi0, xi1, LSL #(16-SHIFT) + MOV xi2, xi2, ASR #SHIFT + PKHBT xi5, xi2, xi3, LSL #(16-SHIFT) + LDR xi0, [pScale, #2*32-8] + LDR xi1, [pScale, #2*32+4-8] + LDR xi2, [pScale, #6*32-8] + LDR xi3, [pScale, #6*32+4-8] + SMLAWB xi0, xi0, xi6, xit + SMLAWT xi1, xi1, xi6, xit + SMLAWB xi2, xi2, xi7, xit + SMLAWT xi3, xi3, xi7, xit + MOV xi0, xi0, ASR #SHIFT + PKHBT xi6, xi0, xi1, LSL #(16-SHIFT) + MOV xi2, xi2, ASR #SHIFT + PKHBT xi7, xi2, xi3, LSL #(16-SHIFT) + ENDIF + + ;// Load odd values + LDR xi0, [pSrc, #1*16-4] ;// j1 + LDR xi1, [pSrc, #7*16-4] ;// j7 + LDR xi2, [pSrc, #5*16-4] ;// j5 + LDR xi3, [pSrc, #3*16-4] ;// j3 + + IF {TRUE} + ;// shortcut if odd values 0 + TEQ xi0, #0 + TEQEQ xi1, #0 + TEQEQ xi2, #0 + TEQEQ xi3, #0 + BEQ v6OddZero$_F + ENDIF + + ;// Store scaled even values + STMIA pDest, {xi4, xi5, xi6, xi7} + + ;// Scale odd values + IF "$inscale"="s16" + ;// Perform AAN Scale + LDR xi4, [pScale, #1*16-4] + LDR xi5, [pScale, #7*16-4] + LDR xi6, [pScale, #5*16-4] + SMLABB xi7, xi0, xi4, xit + SMLATT xi0, xi0, xi4, xit + SMLABB xi4, xi1, xi5, xit + SMLATT xi1, xi1, xi5, xit + MOV xi7, xi7, ASR #SHIFT + PKHBT xi0, xi7, xi0, LSL #(16-SHIFT) + LDR xi7, [pScale, #3*16-4] + SMLABB xi5, xi2, xi6, xit + SMLATT xi2, xi2, xi6, xit + MOV xi4, xi4, ASR #SHIFT + PKHBT xi1, xi4, xi1, LSL #(16-SHIFT) + SMLABB xi6, xi3, xi7, xit + SMLATT xi3, xi3, xi7, xit + MOV xi5, xi5, ASR #SHIFT + PKHBT xi2, xi5, xi2, LSL #(16-SHIFT) + MOV xi6, xi6, ASR #SHIFT + PKHBT xi3, xi6, xi3, LSL #(16-SHIFT) + ENDIF + IF "$inscale"="s32" ;// 32x16 mul + LDR xi4, [pScale, #1*32-8] + LDR xi5, [pScale, #1*32+4-8] + LDR xi6, [pScale, #7*32-8] + LDR xi7, [pScale, #7*32+4-8] + SMLAWB xi4, xi4, xi0, xit + SMLAWT xi5, xi5, xi0, xit + SMLAWB xi6, xi6, xi1, xit + SMLAWT xi7, xi7, xi1, xit + MOV xi4, xi4, ASR #SHIFT + PKHBT xi0, xi4, xi5, LSL #(16-SHIFT) + MOV xi6, xi6, ASR #SHIFT + PKHBT xi1, xi6, xi7, LSL #(16-SHIFT) + LDR xi4, [pScale, #5*32-8] + LDR xi5, [pScale, #5*32+4-8] + LDR xi6, [pScale, #3*32-8] + LDR xi7, [pScale, #3*32+4-8] + SMLAWB xi4, xi4, xi2, xit + SMLAWT xi5, xi5, xi2, xit + SMLAWB xi6, xi6, xi3, xit + SMLAWT xi7, xi7, xi3, xit + MOV xi4, xi4, ASR #SHIFT + PKHBT xi2, xi4, xi5, LSL #(16-SHIFT) + MOV xi6, xi6, ASR #SHIFT + PKHBT xi3, xi6, xi7, LSL #(16-SHIFT) + ENDIF + + LDR xit, =0x00010001 ;// rounding constant + SADD16 xi5, xi0, xi1 ;// (j1+j7)/2 + SHADD16 xi5, xi5, xit + + SSUB16 xi6, xi0, xi1 ;// j1-j7 + SADD16 xi7, xi2, xi3 ;// (j5+j3)/2 + SHADD16 xi7, xi7, xit + + SSUB16 xi4, xi2, xi3 ;// j5-j3 + + SSUB16 xi3, xi5, xi7 ;// (i5-i7)/2 + + PKHBT xi0, xi6, xi4, LSL#16 ;// [i4,i6] row a + PKHTB xi1, xi4, xi6, ASR#16 ;// [i4,i6] row b + + SMUADX xi2, xi0, csPiBy8 ;// rowa by [c,s] + SMUADX xi4, xi1, csPiBy8 ;// rowb by [c,s] + SMUSD xi0, xi0, csPiBy8 ;// rowa by [-s,c] + SMUSD xi6, xi1, csPiBy8 ;// rowb by [-s,c] + + SMULBB xi1, xi3, LoopRR2 + SMULTB xi3, xi3, LoopRR2 + + PKHTB xh4, xi4, xi2, ASR#16 ;// h4/4 + PKHTB xh6, xi6, xi0, ASR#16 ;// h6/4 + SHADD16 xh7, xi5, xi7 ;// (i5+i7)/4 + + ;// xi0,xi1,xi2,xi3 now free + ;// IStage 4,3, rows 2to3 x1/2 + + MOV xi3, xi3, LSL #1 + PKHTB xh5, xi3, xi1, ASR#15 ;// h5/4 + LDRD xi0, [pDest, #8] ;// j2,j6 scaled + + ;// IStage 2, rows4to7 + SSUB16 xg6, xh6, xh7 + SSUB16 xg5, xh5, xg6 + SSUB16 xg4, xh4, xg5 + + SSUB16 xi2, xi0, xi1 ;// (j2-j6) + + SHADD16 xi3, xi0, xi1 ;// (j2+j6)/2 + + SMULBB xi0, xi2, LoopRR2 + SMULTB xi2, xi2, LoopRR2 + + MOV xi2, xi2, LSL #1 + PKHTB xh2, xi2, xi0, ASR#15 ;// i2*sqrt(2)/4 + + ;// xi0, xi1 now free + ;// IStage 4,3 rows 0to1 x 1/2 + LDRD xi0, [pDest] ;// j0, j4 scaled + SSUB16 xh2, xh2, xi3 + ADDS LoopRR2, LoopRR2, #2<<29 ;// done two rows + + SHADD16 xh0, xi0, xi1 + SHSUB16 xh1, xi0, xi1 + + ;// IStage 2 rows 0to3 x 1/2 + SHSUB16 xg2, xh1, xh2 + SHADD16 xg1, xh1, xh2 + SHSUB16 xg3, xh0, xh3 + SHADD16 xg0, xh0, xh3 + + ;// IStage 1 all rows + SADD16 xf3, xg3, xg4 + SSUB16 xf4, xg3, xg4 + SADD16 xf2, xg2, xg5 + SSUB16 xf5, xg2, xg5 + SADD16 xf1, xg1, xg6 + SSUB16 xf6, xg1, xg6 + SADD16 xf0, xg0, xg7 + SSUB16 xf7, xg0, xg7 + + ;// Transpose, store and loop + PKHBT ra01, xf0, xf1, LSL #16 + PKHTB rb01, xf1, xf0, ASR #16 + + PKHBT ra23, xf2, xf3, LSL #16 + PKHTB rb23, xf3, xf2, ASR #16 + + PKHBT ra45, xf4, xf5, LSL #16 + PKHTB rb45, xf5, xf4, ASR #16 + + PKHBT ra67, xf6, xf7, LSL #16 + STMIA pDest!, {ra01, ra23, ra45, ra67} + PKHTB rb67, xf7, xf6, ASR #16 + STMIA pDest!, {rb01, rb23, rb45, rb67} + BCC v6_idct_col$_F + + SUB pSrc, pDest, #(64*2) + M_LDR pDest, ppDest + IF "$stride"="s" + M_LDR pScale, pStride + ENDIF + B v6_idct_row$_F + +v6OddZero$_F + SSUB16 xi2, xi6, xi7 ;// (j2-j6) + SHADD16 xi3, xi6, xi7 ;// (j2+j6)/2 + + SMULBB xi0, xi2, LoopRR2 + SMULTB xi2, xi2, LoopRR2 + + MOV xi2, xi2, LSL #1 + PKHTB xh2, xi2, xi0, ASR#15 ;// i2*sqrt(2)/4 + SSUB16 xh2, xh2, xi3 + + ;// xi0, xi1 now free + ;// IStage 4,3 rows 0to1 x 1/2 + + SHADD16 xh0, xi4, xi5 + SHSUB16 xh1, xi4, xi5 + + ;// IStage 2 rows 0to3 x 1/2 + SHSUB16 xg2, xh1, xh2 + SHADD16 xg1, xh1, xh2 + SHSUB16 xg3, xh0, xh3 + SHADD16 xg0, xh0, xh3 + + ;// IStage 1 all rows + MOV xf3, xg3 + MOV xf4, xg3 + MOV xf2, xg2 + MOV xf5, xg2 + MOV xf1, xg1 + MOV xf6, xg1 + MOV xf0, xg0 + MOV xf7, xg0 + + ;// Transpose + PKHBT ra01, xf0, xf1, LSL #16 + PKHTB rb01, xf1, xf0, ASR #16 + + PKHBT ra23, xf2, xf3, LSL #16 + PKHTB rb23, xf3, xf2, ASR #16 + + PKHBT ra45, xf4, xf5, LSL #16 + PKHTB rb45, xf5, xf4, ASR #16 + + PKHBT ra67, xf6, xf7, LSL #16 + PKHTB rb67, xf7, xf6, ASR #16 + + STMIA pDest!, {ra01, ra23, ra45, ra67} + ADDS LoopRR2, LoopRR2, #2<<29 ;// done two rows + STMIA pDest!, {rb01, rb23, rb45, rb67} + + BCC v6_idct_col$_F + SUB pSrc, pDest, #(64*2) + M_LDR pDest, ppDest + IF "$stride"="s" + M_LDR pScale, pStride + ENDIF + + +v6_idct_row$_F + ;// IStage 4,3, rows4to7 x1/4 + LDR xit, =0x00010001 ;// rounding constant + LDR xi0, [pSrc, #1*16] ;// j1 + LDR xi1, [pSrc, #7*16] ;// 4*j7 + LDR xi2, [pSrc, #5*16] ;// j5 + LDR xi3, [pSrc, #3*16] ;// j3 + + SHADD16 xi1, xi1, xit ;// 2*j7 + SHADD16 xi1, xi1, xit ;// j7 + + SHADD16 xi5, xi0, xi1 ;// (j1+j7)/2 + SSUB16 xi6, xi0, xi1 ;// j1-j7 + SHADD16 xi7, xi2, xi3 ;// (j5+j3)/2 + SSUB16 xi4, xi2, xi3 ;// j5-j3 + + SSUB16 xi3, xi5, xi7 ;// (i5-i7)/2 + + PKHBT xi0, xi6, xi4, LSL#16 ;// [i4,i6] row a + PKHTB xi1, xi4, xi6, ASR#16 ;// [i4,i6] row b + + SMUADX xi2, xi0, csPiBy8 ;// rowa by [c,s] + SMUADX xi4, xi1, csPiBy8 ;// rowb by [c,s] + SMUSD xi0, xi0, csPiBy8 ;// rowa by [-s,c] + SMUSD xi6, xi1, csPiBy8 ;// rowb by [-s,c] + + SMULBB xi1, xi3, LoopRR2 + SMULTB xi3, xi3, LoopRR2 + + PKHTB xh4, xi4, xi2, ASR#16 ;// h4/4 + PKHTB xh6, xi6, xi0, ASR#16 ;// h6/4 + SHADD16 xh7, xi5, xi7 ;// (i5+i7)/4 + + MOV xi3, xi3, LSL #1 + PKHTB xh5, xi3, xi1, ASR#15 ;// h5/4 + + ;// xi0,xi1,xi2,xi3 now free + ;// IStage 4,3, rows 2to3 x1/2 + + LDR xi0, [pSrc, #2*16] ;// j2 + LDR xi1, [pSrc, #6*16] ;// 2*j6 + + ;// IStage 2, rows4to7 + SSUB16 xg6, xh6, xh7 + SSUB16 xg5, xh5, xg6 + SSUB16 xg4, xh4, xg5 + + SHADD16 xi1, xi1, xit ;// j6 + SSUB16 xi2, xi0, xi1 ;// (j2-j6) + SHADD16 xi3, xi0, xi1 ;// (j2+j6)/2 + + SMULBB xi0, xi2, LoopRR2 + SMULTB xi2, xi2, LoopRR2 + + MOV xi2, xi2, LSL #1 + + PKHTB xh2, xi2, xi0, ASR#15 ;// i2*sqrt(2)/4 + + ;// xi0, xi1 now free + ;// IStage 4,3 rows 0to1 x 1/2 + LDR xi1, [pSrc, #4*16] ;// j4 + LDR xi0, [pSrc], #4 ;// j0 + + SSUB16 xh2, xh2, xi3 + ADDS LoopRR2, LoopRR2, #2<<29 ;// done two rows + + ADD xi0, xi0, xit, LSL #2 ;// ensure correct round + SHADD16 xh0, xi0, xi1 ;// of DC result + SHSUB16 xh1, xi0, xi1 + + ;// IStage 2 rows 0to3 x 1/2 + SHSUB16 xg2, xh1, xh2 + SHADD16 xg1, xh1, xh2 + SHSUB16 xg3, xh0, xh3 + SHADD16 xg0, xh0, xh3 + + ;// IStage 1 all rows + SHADD16 xf3, xg3, xg4 + SHSUB16 xf4, xg3, xg4 + SHADD16 xf2, xg2, xg5 + SHSUB16 xf5, xg2, xg5 + SHADD16 xf1, xg1, xg6 + SHSUB16 xf6, xg1, xg6 + SHADD16 xf0, xg0, xg7 + SHSUB16 xf7, xg0, xg7 + + ;// Saturate + IF ("$outsize"="u8") + USAT16 xf0, #8, xf0 + USAT16 xf1, #8, xf1 + USAT16 xf2, #8, xf2 + USAT16 xf3, #8, xf3 + USAT16 xf4, #8, xf4 + USAT16 xf5, #8, xf5 + USAT16 xf6, #8, xf6 + USAT16 xf7, #8, xf7 + ENDIF + IF ("$outsize"="s9") + SSAT16 xf0, #9, xf0 + SSAT16 xf1, #9, xf1 + SSAT16 xf2, #9, xf2 + SSAT16 xf3, #9, xf3 + SSAT16 xf4, #9, xf4 + SSAT16 xf5, #9, xf5 + SSAT16 xf6, #9, xf6 + SSAT16 xf7, #9, xf7 + ENDIF + + ;// Transpose to Row, Pack and store + IF ("$outsize"="u8") + ORR xf0, xf0, xf1, LSL #8 ;// [ b1 b0 a1 a0 ] + ORR xf2, xf2, xf3, LSL #8 ;// [ b3 b2 a3 a2 ] + ORR xf4, xf4, xf5, LSL #8 ;// [ b5 b4 a5 a4 ] + ORR xf6, xf6, xf7, LSL #8 ;// [ b7 b6 a7 a6 ] + PKHBT ra01, xf0, xf2, LSL #16 + PKHTB rb01, xf2, xf0, ASR #16 + PKHBT ra23, xf4, xf6, LSL #16 + PKHTB rb23, xf6, xf4, ASR #16 + STMIA pDest, {ra01, ra23} + IF "$stride"="s" + ADD pDest, pDest, pScale + STMIA pDest, {rb01, rb23} + ADD pDest, pDest, pScale + ELSE + ADD pDest, pDest, #($stride) + STMIA pDest, {rb01, rb23} + ADD pDest, pDest, #($stride) + ENDIF + ENDIF + IF ("$outsize"="s9"):LOR:("$outsize"="s16") + PKHBT ra01, xf0, xf1, LSL #16 + PKHTB rb01, xf1, xf0, ASR #16 + + PKHBT ra23, xf2, xf3, LSL #16 + PKHTB rb23, xf3, xf2, ASR #16 + + PKHBT ra45, xf4, xf5, LSL #16 + PKHTB rb45, xf5, xf4, ASR #16 + + PKHBT ra67, xf6, xf7, LSL #16 + PKHTB rb67, xf7, xf6, ASR #16 + + STMIA pDest, {ra01, ra23, ra45, ra67} + IF "$stride"="s" + ADD pDest, pDest, pScale + STMIA pDest, {rb01, rb23, rb45, rb67} + ADD pDest, pDest, pScale + ELSE + ADD pDest, pDest, #($stride) + STMIA pDest, {rb01, rb23, rb45, rb67} + ADD pDest, pDest, #($stride) + ENDIF + ENDIF + + BCC v6_idct_row$_F + ENDIF ;// ARM1136JS + + + IF CortexA8 + +Src0 EQU 7 +Src1 EQU 8 +Src2 EQU 9 +Src3 EQU 10 +Src4 EQU 11 +Src5 EQU 12 +Src6 EQU 13 +Src7 EQU 14 +Tmp EQU 15 + +qXj0 QN Src0.S16 +qXj1 QN Src1.S16 +qXj2 QN Src2.S16 +qXj3 QN Src3.S16 +qXj4 QN Src4.S16 +qXj5 QN Src5.S16 +qXj6 QN Src6.S16 +qXj7 QN Src7.S16 +qXjt QN Tmp.S16 + +dXj0lo DN (Src0*2).S16 +dXj0hi DN (Src0*2+1).S16 +dXj1lo DN (Src1*2).S16 +dXj1hi DN (Src1*2+1).S16 +dXj2lo DN (Src2*2).S16 +dXj2hi DN (Src2*2+1).S16 +dXj3lo DN (Src3*2).S16 +dXj3hi DN (Src3*2+1).S16 +dXj4lo DN (Src4*2).S16 +dXj4hi DN (Src4*2+1).S16 +dXj5lo DN (Src5*2).S16 +dXj5hi DN (Src5*2+1).S16 +dXj6lo DN (Src6*2).S16 +dXj6hi DN (Src6*2+1).S16 +dXj7lo DN (Src7*2).S16 +dXj7hi DN (Src7*2+1).S16 +dXjtlo DN (Tmp*2).S16 +dXjthi DN (Tmp*2+1).S16 + +qXi0 QN qXj0 +qXi1 QN qXj4 +qXi2 QN qXj2 +qXi3 QN qXj7 +qXi4 QN qXj5 +qXi5 QN qXjt +qXi6 QN qXj1 +qXi7 QN qXj6 +qXit QN qXj3 + +dXi0lo DN dXj0lo +dXi0hi DN dXj0hi +dXi1lo DN dXj4lo +dXi1hi DN dXj4hi +dXi2lo DN dXj2lo +dXi2hi DN dXj2hi +dXi3lo DN dXj7lo +dXi3hi DN dXj7hi +dXi4lo DN dXj5lo +dXi4hi DN dXj5hi +dXi5lo DN dXjtlo +dXi5hi DN dXjthi +dXi6lo DN dXj1lo +dXi6hi DN dXj1hi +dXi7lo DN dXj6lo +dXi7hi DN dXj6hi +dXitlo DN dXj3lo +dXithi DN dXj3hi + +qXh0 QN qXit +qXh1 QN qXi0 +qXh2 QN qXi2 +qXh3 QN qXi3 +qXh4 QN qXi7 +qXh5 QN qXi5 +qXh6 QN qXi4 +qXh7 QN qXi1 +qXht QN qXi6 + +dXh0lo DN dXitlo +dXh0hi DN dXithi +dXh1lo DN dXi0lo +dXh1hi DN dXi0hi +dXh2lo DN dXi2lo +dXh2hi DN dXi2hi +dXh3lo DN dXi3lo +dXh3hi DN dXi3hi +dXh4lo DN dXi7lo +dXh4hi DN dXi7hi +dXh5lo DN dXi5lo +dXh5hi DN dXi5hi +dXh6lo DN dXi4lo +dXh6hi DN dXi4hi +dXh7lo DN dXi1lo +dXh7hi DN dXi1hi +dXhtlo DN dXi6lo +dXhthi DN dXi6hi + +qXg0 QN qXh2 +qXg1 QN qXht +qXg2 QN qXh1 +qXg3 QN qXh0 +qXg4 QN qXh4 +qXg5 QN qXh5 +qXg6 QN qXh6 +qXg7 QN qXh7 +qXgt QN qXh3 + +qXf0 QN qXg6 +qXf1 QN qXg5 +qXf2 QN qXg4 +qXf3 QN qXgt +qXf4 QN qXg3 +qXf5 QN qXg2 +qXf6 QN qXg1 +qXf7 QN qXg0 +qXft QN qXg7 + + +qXt0 QN 1.S32 +qXt1 QN 2.S32 +qT0lo QN 1.S32 +qT0hi QN 2.S32 +qT1lo QN 3.S32 +qT1hi QN 4.S32 +qScalelo QN 5.S32 ;// used to read post scale values +qScalehi QN 6.S32 +qTemp0 QN 5.S32 +qTemp1 QN 6.S32 + + +Scale1 EQU 6 +Scale2 EQU 15 +qScale1 QN Scale1.S16 +qScale2 QN Scale2.S16 +dScale1lo DN (Scale1*2).S16 +dScale1hi DN (Scale1*2+1).S16 +dScale2lo DN (Scale2*2).S16 +dScale2hi DN (Scale2*2+1).S16 + +dCoefs DN 0.S16 ;// Scale coefficients in format {[0] [C] [S] [InvSqrt2]} +InvSqrt2 DN dCoefs[0] ;// 1/sqrt(2) in Q15 +S DN dCoefs[1] ;// Sin(PI/8) in Q15 +C DN dCoefs[2] ;// Cos(PI/8) in Q15 + +pTemp RN 12 + + + IMPORT armCOMM_IDCTCoef + + VLD1 {qXj0,qXj1}, [pSrc @64]! + VLD1 {qXj2,qXj3}, [pSrc @64]! + VLD1 {qXj4,qXj5}, [pSrc @64]! + VLD1 {qXj6,qXj7}, [pSrc @64]! + + ;// Load PreScale and multiply with Src + ;// IStage 4 + + IF "$inscale"="s16" ;// 16X16 Mul + M_IDCT_PRESCALE16 + ENDIF + + IF "$inscale"="s32" ;// 32X32 ,ul + M_IDCT_PRESCALE32 + ENDIF + + ;// IStage 3 + VQDMULH qXi2, qXi2, InvSqrt2 ;// i2/sqrt(2) + VHADD qXh0, qXi0, qXi1 ;// (i0+i1)/2 + VHSUB qXh1, qXi0, qXi1 ;// (i0-i1)/2 + VHADD qXh7, qXi5, qXi7 ;// (i5+i7)/4 + VSUB qXh5, qXi5, qXi7 ;// (i5-i7)/2 + VQDMULH qXh5, qXh5, InvSqrt2 ;// h5/sqrt(2) + VSUB qXh2, qXi2, qXi3 ;// h2, h3 + + VMULL qXt0, dXi4lo, C ;// c*i4 + VMLAL qXt0, dXi6lo, S ;// c*i4+s*i6 + VMULL qXt1, dXi4hi, C + VMLAL qXt1, dXi6hi, S + VSHRN dXh4lo, qXt0, #16 ;// h4 + VSHRN dXh4hi, qXt1, #16 + + VMULL qXt0, dXi6lo, C ;// c*i6 + VMLSL qXt0, dXi4lo, S ;// -s*i4 + c*h6 + VMULL qXt1, dXi6hi, C + VMLSL qXt1, dXi4hi, S + VSHRN dXh6lo, qXt0, #16 ;// h6 + VSHRN dXh6hi, qXt1, #16 + + ;// IStage 2 + VSUB qXg6, qXh6, qXh7 + VSUB qXg5, qXh5, qXg6 + VSUB qXg4, qXh4, qXg5 + VHADD qXg1, qXh1, qXh2 ;// (h1+h2)/2 + VHSUB qXg2, qXh1, qXh2 ;// (h1-h2)/2 + VHADD qXg0, qXh0, qXh3 ;// (h0+h3)/2 + VHSUB qXg3, qXh0, qXh3 ;// (h0-h3)/2 + + ;// IStage 1 all rows + VADD qXf3, qXg3, qXg4 + VSUB qXf4, qXg3, qXg4 + VADD qXf2, qXg2, qXg5 + VSUB qXf5, qXg2, qXg5 + VADD qXf1, qXg1, qXg6 + VSUB qXf6, qXg1, qXg6 + VADD qXf0, qXg0, qXg7 + VSUB qXf7, qXg0, qXg7 + + ;// Transpose, store and loop +XTR0 EQU Src5 +XTR1 EQU Tmp +XTR2 EQU Src6 +XTR3 EQU Src7 +XTR4 EQU Src3 +XTR5 EQU Src0 +XTR6 EQU Src1 +XTR7 EQU Src2 +XTRt EQU Src4 + +qA0 QN XTR0.S32 ;// for XTRpose +qA1 QN XTR1.S32 +qA2 QN XTR2.S32 +qA3 QN XTR3.S32 +qA4 QN XTR4.S32 +qA5 QN XTR5.S32 +qA6 QN XTR6.S32 +qA7 QN XTR7.S32 + +dB0 DN XTR0*2+1 ;// for using VSWP +dB1 DN XTR1*2+1 +dB2 DN XTR2*2+1 +dB3 DN XTR3*2+1 +dB4 DN XTR4*2 +dB5 DN XTR5*2 +dB6 DN XTR6*2 +dB7 DN XTR7*2 + + + VTRN qXf0, qXf1 + VTRN qXf2, qXf3 + VTRN qXf4, qXf5 + VTRN qXf6, qXf7 + VTRN qA0, qA2 + VTRN qA1, qA3 + VTRN qA4, qA6 + VTRN qA5, qA7 + VSWP dB0, dB4 + VSWP dB1, dB5 + VSWP dB2, dB6 + VSWP dB3, dB7 + + +qYj0 QN qXf0 +qYj1 QN qXf1 +qYj2 QN qXf2 +qYj3 QN qXf3 +qYj4 QN qXf4 +qYj5 QN qXf5 +qYj6 QN qXf6 +qYj7 QN qXf7 +qYjt QN qXft + +dYj0lo DN (XTR0*2).S16 +dYj0hi DN (XTR0*2+1).S16 +dYj1lo DN (XTR1*2).S16 +dYj1hi DN (XTR1*2+1).S16 +dYj2lo DN (XTR2*2).S16 +dYj2hi DN (XTR2*2+1).S16 +dYj3lo DN (XTR3*2).S16 +dYj3hi DN (XTR3*2+1).S16 +dYj4lo DN (XTR4*2).S16 +dYj4hi DN (XTR4*2+1).S16 +dYj5lo DN (XTR5*2).S16 +dYj5hi DN (XTR5*2+1).S16 +dYj6lo DN (XTR6*2).S16 +dYj6hi DN (XTR6*2+1).S16 +dYj7lo DN (XTR7*2).S16 +dYj7hi DN (XTR7*2+1).S16 +dYjtlo DN (XTRt*2).S16 +dYjthi DN (XTRt*2+1).S16 + +qYi0 QN qYj0 +qYi1 QN qYj4 +qYi2 QN qYj2 +qYi3 QN qYj7 +qYi4 QN qYj5 +qYi5 QN qYjt +qYi6 QN qYj1 +qYi7 QN qYj6 +qYit QN qYj3 + +dYi0lo DN dYj0lo +dYi0hi DN dYj0hi +dYi1lo DN dYj4lo +dYi1hi DN dYj4hi +dYi2lo DN dYj2lo +dYi2hi DN dYj2hi +dYi3lo DN dYj7lo +dYi3hi DN dYj7hi +dYi4lo DN dYj5lo +dYi4hi DN dYj5hi +dYi5lo DN dYjtlo +dYi5hi DN dYjthi +dYi6lo DN dYj1lo +dYi6hi DN dYj1hi +dYi7lo DN dYj6lo +dYi7hi DN dYj6hi +dYitlo DN dYj3lo +dYithi DN dYj3hi + +qYh0 QN qYit +qYh1 QN qYi0 +qYh2 QN qYi2 +qYh3 QN qYi3 +qYh4 QN qYi7 +qYh5 QN qYi5 +qYh6 QN qYi4 +qYh7 QN qYi1 +qYht QN qYi6 + +dYh0lo DN dYitlo +dYh0hi DN dYithi +dYh1lo DN dYi0lo +dYh1hi DN dYi0hi +dYh2lo DN dYi2lo +dYh2hi DN dYi2hi +dYh3lo DN dYi3lo +dYh3hi DN dYi3hi +dYh4lo DN dYi7lo +dYh4hi DN dYi7hi +dYh5lo DN dYi5lo +dYh5hi DN dYi5hi +dYh6lo DN dYi4lo +dYh6hi DN dYi4hi +dYh7lo DN dYi1lo +dYh7hi DN dYi1hi +dYhtlo DN dYi6lo +dYhthi DN dYi6hi + +qYg0 QN qYh2 +qYg1 QN qYht +qYg2 QN qYh1 +qYg3 QN qYh0 +qYg4 QN qYh4 +qYg5 QN qYh5 +qYg6 QN qYh6 +qYg7 QN qYh7 +qYgt QN qYh3 + +qYf0 QN qYg6 +qYf1 QN qYg5 +qYf2 QN qYg4 +qYf3 QN qYgt +qYf4 QN qYg3 +qYf5 QN qYg2 +qYf6 QN qYg1 +qYf7 QN qYg0 +qYft QN qYg7 + + VRSHR qYj7, qYj7, #2 + VRSHR qYj6, qYj6, #1 + + VHADD qYi5, qYj1, qYj7 ;// i5 = (j1+j7)/2 + VSUB qYi6, qYj1, qYj7 ;// i6 = j1-j7 + VHADD qYi3, qYj2, qYj6 ;// i3 = (j2+j6)/2 + VSUB qYi2, qYj2, qYj6 ;// i2 = j2-j6 + VHADD qYi7, qYj5, qYj3 ;// i7 = (j5+j3)/2 + VSUB qYi4, qYj5, qYj3 ;// i4 = j5-j3 + + VQDMULH qYi2, qYi2, InvSqrt2 ;// i2/sqrt(2) + ;// IStage 4,3 rows 0to1 x 1/2 + + MOV pTemp, #0x4 ;// ensure correct round + VDUP qScale1, pTemp ;// of DC result + VADD qYi0, qYi0, qScale1 + + VHADD qYh0, qYi0, qYi1 ;// (i0+i1)/2 + VHSUB qYh1, qYi0, qYi1 ;// (i0-i1)/2 + + VHADD qYh7, qYi5, qYi7 ;// (i5+i7)/4 + VSUB qYh5, qYi5, qYi7 ;// (i5-i7)/2 + VSUB qYh2, qYi2, qYi3 ;// h2, h3 + VQDMULH qYh5, qYh5, InvSqrt2 ;// h5/sqrt(2) + + VMULL qXt0, dYi4lo, C ;// c*i4 + VMLAL qXt0, dYi6lo, S ;// c*i4+s*i6 + VMULL qXt1, dYi4hi, C + VMLAL qXt1, dYi6hi, S + VSHRN dYh4lo, qXt0, #16 ;// h4 + VSHRN dYh4hi, qXt1, #16 + + VMULL qXt0, dYi6lo, C ;// c*i6 + VMLSL qXt0, dYi4lo, S ;// -s*i4 + c*h6 + VMULL qXt1, dYi6hi, C + VMLSL qXt1, dYi4hi, S + VSHRN dYh6lo, qXt0, #16 ;// h6 + VSHRN dYh6hi, qXt1, #16 + + VSUB qYg6, qYh6, qYh7 + VSUB qYg5, qYh5, qYg6 + VSUB qYg4, qYh4, qYg5 + + ;// IStage 2 rows 0to3 x 1/2 + VHADD qYg1, qYh1, qYh2 ;// (h1+h2)/2 + VHSUB qYg2, qYh1, qYh2 ;// (h1-h2)/2 + VHADD qYg0, qYh0, qYh3 ;// (h0+h3)/2 + VHSUB qYg3, qYh0, qYh3 ;// (h0-h3)/2 + + + ;// IStage 1 all rows + VHADD qYf3, qYg3, qYg4 + VHSUB qYf4, qYg3, qYg4 + VHADD qYf2, qYg2, qYg5 + VHSUB qYf5, qYg2, qYg5 + VHADD qYf1, qYg1, qYg6 + VHSUB qYf6, qYg1, qYg6 + VHADD qYf0, qYg0, qYg7 + VHSUB qYf7, qYg0, qYg7 + +YTR0 EQU Src0 +YTR1 EQU Src4 +YTR2 EQU Src1 +YTR3 EQU Src2 +YTR4 EQU Src7 +YTR5 EQU Src5 +YTR6 EQU Tmp +YTR7 EQU Src6 +YTRt EQU Src3 + +qC0 QN YTR0.S32 ;// for YTRpose +qC1 QN YTR1.S32 +qC2 QN YTR2.S32 +qC3 QN YTR3.S32 +qC4 QN YTR4.S32 +qC5 QN YTR5.S32 +qC6 QN YTR6.S32 +qC7 QN YTR7.S32 + +dD0 DN YTR0*2+1 ;// for using VSWP +dD1 DN YTR1*2+1 +dD2 DN YTR2*2+1 +dD3 DN YTR3*2+1 +dD4 DN YTR4*2 +dD5 DN YTR5*2 +dD6 DN YTR6*2 +dD7 DN YTR7*2 + + VTRN qYf0, qYf1 + VTRN qYf2, qYf3 + VTRN qYf4, qYf5 + VTRN qYf6, qYf7 + VTRN qC0, qC2 + VTRN qC1, qC3 + VTRN qC4, qC6 + VTRN qC5, qC7 + VSWP dD0, dD4 + VSWP dD1, dD5 + VSWP dD2, dD6 + VSWP dD3, dD7 + + +dYf0U8 DN YTR0*2.U8 +dYf1U8 DN YTR1*2.U8 +dYf2U8 DN YTR2*2.U8 +dYf3U8 DN YTR3*2.U8 +dYf4U8 DN YTR4*2.U8 +dYf5U8 DN YTR5*2.U8 +dYf6U8 DN YTR6*2.U8 +dYf7U8 DN YTR7*2.U8 + + ;// + ;// Do saturation if outsize is other than S16 + ;// + + IF ("$outsize"="u8") + ;// Output range [0-255] + VQMOVN dYf0U8, qYf0 + VQMOVN dYf1U8, qYf1 + VQMOVN dYf2U8, qYf2 + VQMOVN dYf3U8, qYf3 + VQMOVN dYf4U8, qYf4 + VQMOVN dYf5U8, qYf5 + VQMOVN dYf6U8, qYf6 + VQMOVN dYf7U8, qYf7 + ENDIF + + IF ("$outsize"="s9") + ;// Output range [-256 to +255] + VQSHL qYf0, qYf0, #16-9 + VQSHL qYf1, qYf1, #16-9 + VQSHL qYf2, qYf2, #16-9 + VQSHL qYf3, qYf3, #16-9 + VQSHL qYf4, qYf4, #16-9 + VQSHL qYf5, qYf5, #16-9 + VQSHL qYf6, qYf6, #16-9 + VQSHL qYf7, qYf7, #16-9 + + VSHR qYf0, qYf0, #16-9 + VSHR qYf1, qYf1, #16-9 + VSHR qYf2, qYf2, #16-9 + VSHR qYf3, qYf3, #16-9 + VSHR qYf4, qYf4, #16-9 + VSHR qYf5, qYf5, #16-9 + VSHR qYf6, qYf6, #16-9 + VSHR qYf7, qYf7, #16-9 + ENDIF + + ;// Store output depending on the Stride size + IF "$stride"="s" + VST1 qYf0, [pDest @64], Stride + VST1 qYf1, [pDest @64], Stride + VST1 qYf2, [pDest @64], Stride + VST1 qYf3, [pDest @64], Stride + VST1 qYf4, [pDest @64], Stride + VST1 qYf5, [pDest @64], Stride + VST1 qYf6, [pDest @64], Stride + VST1 qYf7, [pDest @64] + ELSE + IF ("$outsize"="u8") + VST1 dYf0U8, [pDest @64], #8 + VST1 dYf1U8, [pDest @64], #8 + VST1 dYf2U8, [pDest @64], #8 + VST1 dYf3U8, [pDest @64], #8 + VST1 dYf4U8, [pDest @64], #8 + VST1 dYf5U8, [pDest @64], #8 + VST1 dYf6U8, [pDest @64], #8 + VST1 dYf7U8, [pDest @64] + ELSE + ;// ("$outsize"="s9") or ("$outsize"="s16") + VST1 qYf0, [pDest @64], #16 + VST1 qYf1, [pDest @64], #16 + VST1 qYf2, [pDest @64], #16 + VST1 qYf3, [pDest @64], #16 + VST1 qYf4, [pDest @64], #16 + VST1 qYf5, [pDest @64], #16 + VST1 qYf6, [pDest @64], #16 + VST1 qYf7, [pDest @64] + ENDIF + + ENDIF + + + + ENDIF ;// CortexA8 + + + + MEND + + ;// Scale TWO input rows with TWO rows of 16 bit scale values + ;// + ;// This macro is used by M_IDCT_PRESCALE16 to pre-scale one row + ;// input (Eight input values) with one row of scale values. Also + ;// Loads next scale values from pScale, if $LastRow flag is not set. + ;// + ;// Input Registers: + ;// + ;// $dAlo - Input D register with first four S16 values of row n + ;// $dAhi - Input D register with next four S16 values of row n + ;// $dBlo - Input D register with first four S16 values of row n+1 + ;// $dBhi - Input D register with next four S16 values of row n+1 + ;// pScale - Pointer to next row of scale values + ;// qT0lo - Temporary scratch register + ;// qT0hi - Temporary scratch register + ;// qT1lo - Temporary scratch register + ;// qT1hi - Temporary scratch register + ;// dScale1lo - Scale value of row n + ;// dScale1hi - Scale value of row n + ;// dScale2lo - Scale value of row n+1 + ;// dScale2hi - Scale value of row n+1 + ;// + ;// Input Flag + ;// + ;// $LastRow - Flag to indicate whether current row is last row + ;// + ;// Output Registers: + ;// + ;// $dAlo - Scaled output values (first four S16 of row n) + ;// $dAhi - Scaled output values (next four S16 of row n) + ;// $dBlo - Scaled output values (first four S16 of row n+1) + ;// $dBhi - Scaled output values (next four S16 of row n+1) + ;// qScale1 - Scale values for next row + ;// qScale2 - Scale values for next row+1 + ;// pScale - Pointer to next row of scale values + ;// + MACRO + M_IDCT_SCALE16 $dAlo, $dAhi, $dBlo, $dBhi, $LastRow + VMULL qT0lo, $dAlo, dScale1lo + VMULL qT0hi, $dAhi, dScale1hi + VMULL qT1lo, $dBlo, dScale2lo + VMULL qT1hi, $dBhi, dScale2hi + IF "$LastRow"="0" + VLD1 qScale1, [pScale], #16 ;// Load scale for row n+1 + VLD1 qScale2, [pScale], #16 ;// Load scale for row n+2 + ENDIF + VQRSHRN $dAlo, qT0lo, #12 + VQRSHRN $dAhi, qT0hi, #12 + VQRSHRN $dBlo, qT1lo, #12 + VQRSHRN $dBhi, qT1hi, #12 + MEND + + ;// Scale 8x8 block input values with 16 bit scale values + ;// + ;// This macro is used to pre-scale block of 8x8 input. + ;// This also do the Ist stage transformations of IDCT. + ;// + ;// Input Registers: + ;// + ;// dXjnlo - n th input D register with first four S16 values + ;// dXjnhi - n th input D register with next four S16 values + ;// qXjn - n th input Q register with eight S16 values + ;// pScale - Pointer to scale values + ;// + ;// Output Registers: + ;// + ;// qXin - n th output Q register with eight S16 output values of 1st stage + ;// + MACRO + M_IDCT_PRESCALE16 + VLD1 qScale1, [pScale], #16 ;// Load Pre scale for row 0 + VLD1 qScale2, [pScale], #16 ;// Load Pre scale for row 0 + M_IDCT_SCALE16 dXj0lo, dXj0hi, dXj1lo, dXj1hi, 0 ;// Pre scale row 0 & 1 + M_IDCT_SCALE16 dXj2lo, dXj2hi, dXj3lo, dXj3hi, 0 + M_IDCT_SCALE16 dXj4lo, dXj4hi, dXj5lo, dXj5hi, 0 + M_IDCT_SCALE16 dXj6lo, dXj6hi, dXj7lo, dXj7hi, 1 + VHADD qXi5, qXj1, qXj7 ;// (j1+j7)/2 + VSUB qXi6, qXj1, qXj7 ;// j1-j7 + LDR pSrc, =armCOMM_IDCTCoef ;// Address of DCT inverse AAN constants + VHADD qXi3, qXj2, qXj6 ;// (j2+j6)/2 + VSUB qXi2, qXj2, qXj6 ;// j2-j6 + VLDR dCoefs, [pSrc] ;// Load DCT inverse AAN constants + VHADD qXi7, qXj5, qXj3 ;// (j5+j3)/2 + VSUB qXi4, qXj5, qXj3 ;// j5-j3 + MEND + + + ;// Scale 8x8 block input values with 32 bit scale values + ;// + ;// This macro is used to pre-scale block of 8x8 input. + ;// This also do the Ist stage transformations of IDCT. + ;// + ;// Input Registers: + ;// + ;// dXjnlo - n th input D register with first four S16 values + ;// dXjnhi - n th input D register with next four S16 values + ;// qXjn - n th input Q register with eight S16 values + ;// pScale - Pointer to 32bit scale values in Q23 format + ;// + ;// Output Registers: + ;// + ;// dXinlo - n th output D register with first four S16 output values of 1st stage + ;// dXinhi - n th output D register with next four S16 output values of 1st stage + ;// + MACRO + M_IDCT_PRESCALE32 +qScale0lo QN 0.S32 +qScale0hi QN 1.S32 +qScale1lo QN 2.S32 +qScale1hi QN 3.S32 +qScale2lo QN qScale1lo +qScale2hi QN qScale1hi +qScale3lo QN qScale1lo +qScale3hi QN qScale1hi +qScale4lo QN qScale1lo +qScale4hi QN qScale1hi +qScale5lo QN qScale0lo +qScale5hi QN qScale0hi +qScale6lo QN qScale0lo +qScale6hi QN qScale0hi +qScale7lo QN qScale0lo +qScale7hi QN qScale0hi + +qSrc0lo QN 4.S32 +qSrc0hi QN 5.S32 +qSrc1lo QN 6.S32 +qSrc1hi QN Src4.S32 +qSrc2lo QN qSrc0lo +qSrc2hi QN qSrc0hi +qSrc3lo QN qSrc0lo +qSrc3hi QN qSrc0hi +qSrc4lo QN qSrc0lo +qSrc4hi QN qSrc0hi +qSrc5lo QN qSrc1lo +qSrc5hi QN qSrc1hi +qSrc6lo QN qSrc1lo +qSrc6hi QN qSrc1hi +qSrc7lo QN qSrc0lo +qSrc7hi QN qSrc0hi + +qRes17lo QN qScale0lo +qRes17hi QN qScale0hi +qRes26lo QN qScale0lo +qRes26hi QN qScale0hi +qRes53lo QN qScale0lo +qRes53hi QN qScale0hi + + ADD pTemp, pScale, #4*8*7 ;// Address of pScale[7] + + ;// Row 0 + VLD1 {qScale0lo, qScale0hi}, [pScale]! + VSHLL qSrc0lo, dXj0lo, #(12-1) + VSHLL qSrc0hi, dXj0hi, #(12-1) + VLD1 {qScale1lo, qScale1hi}, [pScale]! + VQRDMULH qSrc0lo, qScale0lo, qSrc0lo + VQRDMULH qSrc0hi, qScale0hi, qSrc0hi + VLD1 {qScale7lo, qScale7hi}, [pTemp]! + VSHLL qSrc1lo, dXj1lo, #(12-1) + VSHLL qSrc1hi, dXj1hi, #(12-1) + VMOVN dXi0lo, qSrc0lo ;// Output i0 + VMOVN dXi0hi, qSrc0hi + VSHLL qSrc7lo, dXj7lo, #(12-1) + VSHLL qSrc7hi, dXj7hi, #(12-1) + SUB pTemp, pTemp, #((16*2)+(4*8*1)) + VQRDMULH qSrc1lo, qScale1lo, qSrc1lo + VQRDMULH qSrc1hi, qScale1hi, qSrc1hi + VQRDMULH qSrc7lo, qScale7lo, qSrc7lo + VQRDMULH qSrc7hi, qScale7hi, qSrc7hi + VLD1 {qScale2lo, qScale2hi}, [pScale]! + + ;// Row 1 & 7 + VHADD qRes17lo, qSrc1lo, qSrc7lo ;// (j1+j7)/2 + VHADD qRes17hi, qSrc1hi, qSrc7hi ;// (j1+j7)/2 + VMOVN dXi5lo, qRes17lo ;// Output i5 + VMOVN dXi5hi, qRes17hi + VSUB qRes17lo, qSrc1lo, qSrc7lo ;// j1-j7 + VSUB qRes17hi, qSrc1hi, qSrc7hi ;// j1-j7 + VMOVN dXi6lo, qRes17lo ;// Output i6 + VMOVN dXi6hi, qRes17hi + VSHLL qSrc2lo, dXj2lo, #(12-1) + VSHLL qSrc2hi, dXj2hi, #(12-1) + VLD1 {qScale6lo, qScale6hi}, [pTemp]! + VSHLL qSrc6lo, dXj6lo, #(12-1) + VSHLL qSrc6hi, dXj6hi, #(12-1) + SUB pTemp, pTemp, #((16*2)+(4*8*1)) + VQRDMULH qSrc2lo, qScale2lo, qSrc2lo + VQRDMULH qSrc2hi, qScale2hi, qSrc2hi + VQRDMULH qSrc6lo, qScale6lo, qSrc6lo + VQRDMULH qSrc6hi, qScale6hi, qSrc6hi + VLD1 {qScale3lo, qScale3hi}, [pScale]! + + ;// Row 2 & 6 + VHADD qRes26lo, qSrc2lo, qSrc6lo ;// (j2+j6)/2 + VHADD qRes26hi, qSrc2hi, qSrc6hi ;// (j2+j6)/2 + VMOVN dXi3lo, qRes26lo ;// Output i3 + VMOVN dXi3hi, qRes26hi + VSUB qRes26lo, qSrc2lo, qSrc6lo ;// j2-j6 + VSUB qRes26hi, qSrc2hi, qSrc6hi ;// j2-j6 + VMOVN dXi2lo, qRes26lo ;// Output i2 + VMOVN dXi2hi, qRes26hi + VSHLL qSrc3lo, dXj3lo, #(12-1) + VSHLL qSrc3hi, dXj3hi, #(12-1) + VLD1 {qScale5lo, qScale5hi}, [pTemp]! + VSHLL qSrc5lo, dXj5lo, #(12-1) + VSHLL qSrc5hi, dXj5hi, #(12-1) + VQRDMULH qSrc3lo, qScale3lo, qSrc3lo + VQRDMULH qSrc3hi, qScale3hi, qSrc3hi + VQRDMULH qSrc5lo, qScale5lo, qSrc5lo + VQRDMULH qSrc5hi, qScale5hi, qSrc5hi + + ;// Row 3 & 5 + VHADD qRes53lo, qSrc5lo, qSrc3lo ;// (j5+j3)/2 + VHADD qRes53hi, qSrc5hi, qSrc3hi ;// (j5+j3)/2 + SUB pSrc, pSrc, #16*2*2 + VMOVN dXi7lo, qRes53lo ;// Output i7 + VMOVN dXi7hi, qRes53hi + VSUB qRes53lo, qSrc5lo, qSrc3lo ;// j5-j3 + VSUB qRes53hi, qSrc5hi, qSrc3hi ;// j5-j3 + VLD1 qXj4, [pSrc @64] + VMOVN dXi4lo, qRes53lo ;// Output i4 + VMOVN dXi4hi, qRes53hi + VSHLL qSrc4lo, dXj4lo, #(12-1) + VSHLL qSrc4hi, dXj4hi, #(12-1) + VLD1 {qScale4lo, qScale4hi}, [pScale] + LDR pSrc, =armCOMM_IDCTCoef ;// Address of DCT inverse AAN constants + VQRDMULH qSrc4lo, qScale4lo, qSrc4lo + VQRDMULH qSrc4hi, qScale4hi, qSrc4hi + VLDR dCoefs, [pSrc] ;// Load DCT inverse AAN constants + ;// Row 4 + VMOVN dXi1lo, qSrc4lo ;// Output i1 + VMOVN dXi1hi, qSrc4hi + + MEND + + END diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_MaskTable.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_MaskTable.h new file mode 100755 index 0000000000000000000000000000000000000000..51118fdb37d5cfcff9f0c917da93efafe8e8745c --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_MaskTable.h @@ -0,0 +1,27 @@ +/** + * + * File Name: armCOMM_MaskTable.h + * OpenMAX DL: v1.0.2 + * Revision: 12290 + * Date: Wednesday, April 9, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Mask Table to mask the end of array + */ + + + +#ifndef _ARMCOMM_MASKTABLE_H_ +#define _ARMCOMM_MASKTABLE_H_ + +#define MaskTableSize 72 + +/* Mask table */ + +extern const OMX_U16 armCOMM_qMaskTable16[MaskTableSize]; +extern const OMX_U8 armCOMM_qMaskTable8[MaskTableSize]; + +#endif diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_Version.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_Version.h new file mode 100755 index 0000000000000000000000000000000000000000..41b3e1e3be4ea9fada1d8b61fc8627d4fa96eee9 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_Version.h @@ -0,0 +1,43 @@ +/* Guard the header against multiple inclusion. */ +#ifndef __ARM_COMM_VERSION_H__ +#define __ARM_COMM_VERSION_H__ + + +/* The following line should be in omxtypes.h but hasn't been approved by OpenMAX yet */ +#define OMX_VERSION 102 + +/* We need to define these macros in order to convert a #define number into a #define string. */ +#define ARM_QUOTE(a) #a +#define ARM_INDIRECT(A) ARM_QUOTE(A) + +/* Convert the OMX_VERSION number into a string that can be used, for example, to print it out. */ +#define ARM_VERSION_STRING ARM_INDIRECT(OMX_VERSION) + + +/* Define this in order to turn on ARM version/release/build strings in each domain */ +#define ARM_INCLUDE_VERSION_DESCRIPTIONS + +#ifdef ARM_INCLUDE_VERSION_DESCRIPTIONS + extern const char * const omxAC_VersionDescription; + extern const char * const omxIC_VersionDescription; + extern const char * const omxIP_VersionDescription; + extern const char * const omxSP_VersionDescription; + extern const char * const omxVC_VersionDescription; +#endif /* ARM_INCLUDE_VERSION_DESCRIPTIONS */ + + +/* The following entries should be automatically updated by the release script */ +/* They are used in the ARM version strings defined for each domain. */ + +/* The release tag associated with this release of the library. - used for source and object releases */ +#define OMX_ARM_RELEASE_TAG "r1p0-00bet0" + +/* The ARM architecture used to build any objects or executables in this release. */ +#define OMX_ARM_BUILD_ARCHITECTURE "ARM Architecture V7 with NEON" + +/* The ARM Toolchain used to build any objects or executables in this release. */ +#define OMX_ARM_BUILD_TOOLCHAIN "ARM RVCT 3.1" + + +#endif /* __ARM_COMM_VERSION_H__ */ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_s.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_s.h new file mode 100755 index 0000000000000000000000000000000000000000..0956bd16bf439be19f82195d780e6618fe6e7267 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_s.h @@ -0,0 +1,1157 @@ +;// +;// +;// File Name: armCOMM_s.h +;// OpenMAX DL: v1.0.2 +;// Revision: 12290 +;// Date: Wednesday, April 9, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// +;// ARM optimized OpenMAX common header file +;// + +;// Protect against multiple inclusion + IF :LNOT::DEF:ARMCOMM_S_H + GBLL ARMCOMM_S_H + + REQUIRE8 ;// Requires 8-byte stack alignment + PRESERVE8 ;// Preserves 8-byte stack alignment + + GBLL ARM_ERRORCHECK +ARM_ERRORCHECK SETL {FALSE} + +;// Globals + + GBLS _RRegList ;// R saved register list + GBLS _DRegList ;// D saved register list + GBLS _Variant ;// Selected processor variant + GBLS _CPU ;// CPU name + GBLS _Struct ;// Structure name + + GBLL _InFunc ;// Inside function assembly flag + GBLL _SwLong ;// Long switch flag + + GBLA _RBytes ;// Number of register bytes on stack + GBLA _SBytes ;// Number of scratch bytes on stack + GBLA _ABytes ;// Stack offset of next argument + GBLA _Workspace ;// Stack offset of scratch workspace + GBLA _F ;// Function number + GBLA _StOff ;// Struct offset + GBLA _SwNum ;// Switch number + GBLS _32 ;// Suffix for 32 byte alignmnet + GBLS _16 ;// Suffix for 16 byte alignmnet + +_InFunc SETL {FALSE} +_SBytes SETA 0 +_F SETA 0 +_SwNum SETA 0 +_32 SETS "ALIGN32" +_16 SETS "ALIGN16" + +;///////////////////////////////////////////////////////// +;// Override the tools settings of the CPU if the #define +;// USECPU is set, otherwise use the CPU defined by the +;// assembler settings. +;///////////////////////////////////////////////////////// + + IF :DEF: OVERRIDECPU +_CPU SETS OVERRIDECPU + ELSE +_CPU SETS {CPU} + ENDIF + + + +;///////////////////////////////////////////////////////// +;// Work out which code to build +;///////////////////////////////////////////////////////// + + IF :DEF:ARM1136JS:LOR::DEF:CortexA8:LOR::DEF:ARM_GENERIC + INFO 1,"Please switch to using M_VARIANTS" + ENDIF + + ;// Define and reset all officially recongnised variants + MACRO + _M_DEF_VARIANTS + _M_DEF_VARIANT ARM926EJS + _M_DEF_VARIANT ARM1136JS + _M_DEF_VARIANT ARM1136JS_U + _M_DEF_VARIANT CortexA8 + _M_DEF_VARIANT ARM7TDMI + MEND + + MACRO + _M_DEF_VARIANT $var + GBLL $var + GBLL _ok$var +$var SETL {FALSE} + MEND + + + ;// Variant declaration + ;// + ;// Define a list of code variants supported by this + ;// source file. This macro then chooses the most + ;// appropriate variant to build for the currently configured + ;// core. + ;// + MACRO + M_VARIANTS $v0,$v1,$v2,$v3,$v4,$v5,$v6,$v7 + ;// Set to TRUE variants that are supported + _M_DEF_VARIANTS + _M_VARIANT $v0 + _M_VARIANT $v1 + _M_VARIANT $v2 + _M_VARIANT $v3 + _M_VARIANT $v4 + _M_VARIANT $v5 + _M_VARIANT $v6 + _M_VARIANT $v7 + + ;// Look for first available variant to match a CPU + ;// _M_TRY cpu, variant fall back list +_Variant SETS "" + _M_TRY ARM926EJ-S, ARM926EJS + _M_TRY ARM1176JZ-S, ARM1136JS + _M_TRY ARM1176JZF-S, ARM1136JS + _M_TRY ARM1156T2-S, ARM1136JS + _M_TRY ARM1156T2F-S, ARM1136JS + _M_TRY ARM1136J-S, ARM1136JS + _M_TRY ARM1136JF-S, ARM1136JS + _M_TRY MPCore, ARM1136JS + _M_TRY falcon-vfp, ARM1136JS + _M_TRY falcon-full-neon, CortexA8 + _M_TRY Cortex-A8NoNeon, ARM1136JS + _M_TRY Cortex-A8, CortexA8, ARM1136JS + _M_TRY Cortex-R4, ARM1136JS + _M_TRY ARM7TDMI + + ;// Select the correct variant + _M_DEF_VARIANTS + IF _Variant="" + INFO 1, "No match found for CPU '$_CPU'" + ELSE +$_Variant SETL {TRUE} + ENDIF + MEND + + ;// Register a variant as available + MACRO + _M_VARIANT $var + IF "$var"="" + MEXIT + ENDIF + IF :LNOT::DEF:_ok$var + INFO 1, "Unrecognized variant '$var'" + ENDIF +$var SETL {TRUE} + MEND + + ;// For a given CPU, see if any of the variants supporting + ;// this CPU are available. The first available variant is + ;// chosen + MACRO + _M_TRY $cpu, $v0,$v1,$v2,$v3,$v4,$v5,$v6,$v7 + IF "$cpu"<>_CPU + MEXIT + ENDIF + _M_TRY1 $v0 + _M_TRY1 $v1 + _M_TRY1 $v2 + _M_TRY1 $v3 + _M_TRY1 $v4 + _M_TRY1 $v5 + _M_TRY1 $v6 + _M_TRY1 $v7 + ;// Check a match was found + IF _Variant="" + INFO 1, "No variant match found for CPU '$_CPU'" + ENDIF + MEND + + MACRO + _M_TRY1 $var + IF "$var"="" + MEXIT + ENDIF + IF (_Variant=""):LAND:$var +_Variant SETS "$var" + ENDIF + MEND + +;//////////////////////////////////////////////////////// +;// Structure definition +;//////////////////////////////////////////////////////// + + ;// Declare a structure of given name + MACRO + M_STRUCT $sname +_Struct SETS "$sname" +_StOff SETA 0 + MEND + + ;// Declare a structure field + ;// The field is called $sname_$fname + ;// $size = the size of each entry, must be power of 2 + ;// $number = (if provided) the number of entries for an array + MACRO + M_FIELD $fname, $size, $number + IF (_StOff:AND:($size-1))!=0 +_StOff SETA _StOff + ($size - (_StOff:AND:($size-1))) + ENDIF +$_Struct._$fname EQU _StOff + IF "$number"<>"" +_StOff SETA _StOff + $size*$number + ELSE +_StOff SETA _StOff + $size + ENDIF + MEND + + + MACRO + M_ENDSTRUCT +sizeof_$_Struct EQU _StOff +_Struct SETS "" + MEND + +;////////////////////////////////////////////////////////// +;// Switch and table macros +;////////////////////////////////////////////////////////// + + ;// Start a relative switch table with register to switch on + ;// + ;// $v = the register to switch on + ;// $s = if specified must be "L" to indicate long + ;// this allows a greater range to the case code + MACRO + M_SWITCH $v, $s + ASSERT "$s"="":LOR:"$s"="L" +_SwLong SETL {FALSE} + IF "$s"="L" +_SwLong SETL {TRUE} + ENDIF +_SwNum SETA _SwNum+1 + IF {CONFIG}=16 + ;// Thumb + IF _SwLong + TBH [pc, $v, LSL#1] + ELSE + TBB [pc, $v] + ENDIF +_Switch$_SwNum + ELSE + ;// ARM + ADD pc, pc, $v, LSL #2 + NOP + ENDIF + MEND + + ;// Add a case to the switch statement + MACRO + M_CASE $label + IF {CONFIG}=16 + ;// Thumb + IF _SwLong + DCW ($label - _Switch$_SwNum)/2 + ELSE + DCB ($label - _Switch$_SwNum)/2 + ENDIF + ELSE + ;// ARM + B $label + ENDIF + MEND + + ;// End of switch statement + MACRO + M_ENDSWITCH + ALIGN 2 + MEND + + +;//////////////////////////////////////////////////////// +;// Data area allocation +;//////////////////////////////////////////////////////// + + ;// Constant table allocator macro + ;// + ;// Creates a new section for each constant table + ;// $name is symbol through which the table can be accessed. + ;// $align is the optional alignment of the table, log2 of + ;// the byte alignment - $align=4 is 16 byte aligned + MACRO + M_TABLE $name, $align + ASSERT :LNOT:_InFunc + IF "$align"="" + AREA |.constdata|, READONLY, DATA + ELSE + ;// AREAs inherit the alignment of the first declaration. + ;// Therefore for each alignment size we must have an area + ;// of a different name. + AREA constdata_a$align, READONLY, DATA, ALIGN=$align + + ;// We also force alignment incase we are tagging onto + ;// an already started area. + ALIGN (1<<$align) + ENDIF +$name + MEND + +;///////////////////////////////////////////////////// +;// Macros to allocate space on the stack +;// +;// These all assume that the stack is 8-byte aligned +;// at entry to the function, which means that the +;// 32-byte alignment macro needs to work in a +;// bit more of a special way... +;///////////////////////////////////////////////////// + + + + + ;// Allocate 1-byte aligned area of name + ;// $name size $size bytes. + MACRO + M_ALLOC1 $name, $size + ASSERT :LNOT:_InFunc +$name$_F EQU _SBytes +_SBytes SETA _SBytes + ($size) + MEND + + ;// Allocate 2-byte aligned area of name + ;// $name size $size bytes. + MACRO + M_ALLOC2 $name, $size + ASSERT :LNOT:_InFunc + IF (_SBytes:AND:1)!=0 +_SBytes SETA _SBytes + (2 - (_SBytes:AND:1)) + ENDIF +$name$_F EQU _SBytes +_SBytes SETA _SBytes + ($size) + MEND + + ;// Allocate 4-byte aligned area of name + ;// $name size $size bytes. + MACRO + M_ALLOC4 $name, $size + ASSERT :LNOT:_InFunc + IF (_SBytes:AND:3)!=0 +_SBytes SETA _SBytes + (4 - (_SBytes:AND:3)) + ENDIF +$name$_F EQU _SBytes +_SBytes SETA _SBytes + ($size) + MEND + + ;// Allocate 8-byte aligned area of name + ;// $name size $size bytes. + MACRO + M_ALLOC8 $name, $size + ASSERT :LNOT:_InFunc + IF (_SBytes:AND:7)!=0 +_SBytes SETA _SBytes + (8 - (_SBytes:AND:7)) + ENDIF +$name$_F EQU _SBytes +_SBytes SETA _SBytes + ($size) + MEND + + + ;// Allocate 8-byte aligned area of name + ;// $name size ($size+16) bytes. + ;// The extra 16 bytes are later used to align the pointer to 16 bytes + + MACRO + M_ALLOC16 $name, $size + ASSERT :LNOT:_InFunc + IF (_SBytes:AND:7)!=0 +_SBytes SETA _SBytes + (8 - (_SBytes:AND:7)) + ENDIF +$name$_F$_16 EQU (_SBytes + 8) +_SBytes SETA _SBytes + ($size) + 8 + MEND + + ;// Allocate 8-byte aligned area of name + ;// $name size ($size+32) bytes. + ;// The extra 32 bytes are later used to align the pointer to 32 bytes + + MACRO + M_ALLOC32 $name, $size + ASSERT :LNOT:_InFunc + IF (_SBytes:AND:7)!=0 +_SBytes SETA _SBytes + (8 - (_SBytes:AND:7)) + ENDIF +$name$_F$_32 EQU (_SBytes + 24) +_SBytes SETA _SBytes + ($size) + 24 + MEND + + + + + ;// Argument Declaration Macro + ;// + ;// Allocate an argument name $name + ;// size $size bytes + MACRO + M_ARG $name, $size + ASSERT _InFunc +$name$_F EQU _ABytes +_ABytes SETA _ABytes + ($size) + MEND + +;/////////////////////////////////////////////// +;// Macros to access stacked variables +;/////////////////////////////////////////////// + + ;// Macro to perform a data processing operation + ;// with a constant second operand + MACRO + _M_OPC $op,$rd,$rn,$const + LCLA _sh + LCLA _cst +_sh SETA 0 +_cst SETA $const + IF _cst=0 + $op $rd, $rn, #_cst + MEXIT + ENDIF + WHILE (_cst:AND:3)=0 +_cst SETA _cst>>2 +_sh SETA _sh+2 + WEND + $op $rd, $rn, #(_cst:AND:0x000000FF)<<_sh + IF _cst>=256 + $op $rd, $rd, #(_cst:AND:0xFFFFFF00)<<_sh + ENDIF + MEND + + ;// Macro to perform a data access operation + ;// Such as LDR or STR + ;// The addressing mode is modified such that + ;// 1. If no address is given then the name is taken + ;// as a stack offset + ;// 2. If the addressing mode is not available for the + ;// state being assembled for (eg Thumb) then a suitable + ;// addressing mode is substituted. + ;// + ;// On Entry: + ;// $i = Instruction to perform (eg "LDRB") + ;// $a = Required byte alignment + ;// $r = Register(s) to transfer (eg "r1") + ;// $a0,$a1,$a2. Addressing mode and condition. One of: + ;// label {,cc} + ;// [base] {,,,cc} + ;// [base, offset]{!} {,,cc} + ;// [base, offset, shift]{!} {,cc} + ;// [base], offset {,,cc} + ;// [base], offset, shift {,cc} + MACRO + _M_DATA $i,$a,$r,$a0,$a1,$a2,$a3 + IF "$a0":LEFT:1="[" + IF "$a1"="" + $i$a3 $r, $a0 + ELSE + IF "$a0":RIGHT:1="]" + IF "$a2"="" + _M_POSTIND $i$a3, "$r", $a0, $a1 + ELSE + _M_POSTIND $i$a3, "$r", $a0, "$a1,$a2" + ENDIF + ELSE + IF "$a2"="" + _M_PREIND $i$a3, "$r", $a0, $a1 + ELSE + _M_PREIND $i$a3, "$r", $a0, "$a1,$a2" + ENDIF + ENDIF + ENDIF + ELSE + LCLA _Offset +_Offset SETA _Workspace + $a0$_F + ASSERT (_Offset:AND:($a-1))=0 + $i$a1 $r, [sp, #_Offset] + ENDIF + MEND + + ;// Handle post indexed load/stores + ;// op reg, [base], offset + MACRO + _M_POSTIND $i,$r,$a0,$a1 + LCLS _base + LCLS _offset + IF {CONFIG}=16 ;// Thumb +_base SETS ("$a0":LEFT:(:LEN:"$a0"-1)):RIGHT:(:LEN:"$a0"-2) ;// remove [] +_offset SETS "$a1" + IF _offset:LEFT:1="+" +_offset SETS _offset:RIGHT:(:LEN:_offset-1) + ENDIF + $i $r, $a0 + IF _offset:LEFT:1="-" +_offset SETS _offset:RIGHT:(:LEN:_offset-1) + SUB $_base, $_base, $_offset + ELSE + ADD $_base, $_base, $_offset + ENDIF + ELSE ;// ARM + $i $r, $a0, $a1 + ENDIF + MEND + + ;// Handle pre indexed load/store + ;// op reg, [base, offset]{!} + MACRO + _M_PREIND $i,$r,$a0,$a1 + LCLS _base + LCLS _offset + IF ({CONFIG}=16):LAND:(("$a1":RIGHT:2)="]!") +_base SETS "$a0":RIGHT:(:LEN:("$a0")-1) +_offset SETS "$a1":LEFT:(:LEN:("$a1")-2) + $i $r, [$_base, $_offset] + ADD $_base, $_base, $_offset + ELSE + $i $r, $a0, $a1 + ENDIF + MEND + + ;// Load unsigned byte from stack + MACRO + M_LDRB $r,$a0,$a1,$a2,$a3 + _M_DATA "LDRB",1,$r,$a0,$a1,$a2,$a3 + MEND + + ;// Load signed byte from stack + MACRO + M_LDRSB $r,$a0,$a1,$a2,$a3 + _M_DATA "LDRSB",1,$r,$a0,$a1,$a2,$a3 + MEND + + ;// Store byte to stack + MACRO + M_STRB $r,$a0,$a1,$a2,$a3 + _M_DATA "STRB",1,$r,$a0,$a1,$a2,$a3 + MEND + + ;// Load unsigned half word from stack + MACRO + M_LDRH $r,$a0,$a1,$a2,$a3 + _M_DATA "LDRH",2,$r,$a0,$a1,$a2,$a3 + MEND + + ;// Load signed half word from stack + MACRO + M_LDRSH $r,$a0,$a1,$a2,$a3 + _M_DATA "LDRSH",2,$r,$a0,$a1,$a2,$a3 + MEND + + ;// Store half word to stack + MACRO + M_STRH $r,$a0,$a1,$a2,$a3 + _M_DATA "STRH",2,$r,$a0,$a1,$a2,$a3 + MEND + + ;// Load word from stack + MACRO + M_LDR $r,$a0,$a1,$a2,$a3 + _M_DATA "LDR",4,$r,$a0,$a1,$a2,$a3 + MEND + + ;// Store word to stack + MACRO + M_STR $r,$a0,$a1,$a2,$a3 + _M_DATA "STR",4,$r,$a0,$a1,$a2,$a3 + MEND + + ;// Load double word from stack + MACRO + M_LDRD $r0,$r1,$a0,$a1,$a2,$a3 + _M_DATA "LDRD",8,"$r0,$r1",$a0,$a1,$a2,$a3 + MEND + + ;// Store double word to stack + MACRO + M_STRD $r0,$r1,$a0,$a1,$a2,$a3 + _M_DATA "STRD",8,"$r0,$r1",$a0,$a1,$a2,$a3 + MEND + + ;// Get absolute address of stack allocated location + MACRO + M_ADR $a, $b, $cc + _M_OPC ADD$cc, $a, sp, (_Workspace + $b$_F) + MEND + + ;// Get absolute address of stack allocated location and align the address to 16 bytes + MACRO + M_ADR16 $a, $b, $cc + _M_OPC ADD$cc, $a, sp, (_Workspace + $b$_F$_16) + + ;// Now align $a to 16 bytes + BIC$cc $a,$a,#0x0F + MEND + + ;// Get absolute address of stack allocated location and align the address to 32 bytes + MACRO + M_ADR32 $a, $b, $cc + _M_OPC ADD$cc, $a, sp, (_Workspace + $b$_F$_32) + + ;// Now align $a to 32 bytes + BIC$cc $a,$a,#0x1F + MEND + +;////////////////////////////////////////////////////////// +;// Function header and footer macros +;////////////////////////////////////////////////////////// + + ;// Function Header Macro + ;// Generates the function prologue + ;// Note that functions should all be "stack-moves-once" + ;// The FNSTART and FNEND macros should be the only places + ;// where the stack moves. + ;// + ;// $name = function name + ;// $rreg = "" don't stack any registers + ;// "lr" stack "lr" only + ;// "rN" stack registers "r4-rN,lr" + ;// $dreg = "" don't stack any D registers + ;// "dN" stack registers "d8-dN" + ;// + ;// Note: ARM Archicture procedure call standard AAPCS + ;// states that r4-r11, sp, d8-d15 must be preserved by + ;// a compliant function. + MACRO + M_START $name, $rreg, $dreg + ASSERT :LNOT:_InFunc + ASSERT "$name"!="" +_InFunc SETL {TRUE} +_RBytes SETA 0 +_Workspace SETA 0 + + ;// Create an area for the function + AREA |.text|, CODE + EXPORT $name +$name FUNCTION + + ;// Save R registers + _M_GETRREGLIST $rreg + IF _RRegList<>"" + STMFD sp!, {$_RRegList, lr} + ENDIF + + ;// Save D registers + _M_GETDREGLIST $dreg + IF _DRegList<>"" + VSTMFD sp!, {$_DRegList} + ENDIF + + + ;// Ensure size claimed on stack is 8-byte aligned + IF ((_SBytes:AND:7)!=0) +_SBytes SETA _SBytes + (8 - (_SBytes:AND:7)) + ENDIF + + IF (_SBytes!=0) + _M_OPC SUB, sp, sp, _SBytes + ENDIF + + +_ABytes SETA _SBytes + _RBytes - _Workspace + + + ;// Print function name if debug enabled + M_PRINTF "$name\n", + MEND + + ;// Work out a list of R saved registers + MACRO + _M_GETRREGLIST $rreg + IF "$rreg"="" +_RRegList SETS "" + MEXIT + ENDIF + IF "$rreg"="lr":LOR:"$rreg"="r4" +_RRegList SETS "r4" +_RBytes SETA _RBytes+8 + MEXIT + ENDIF + IF "$rreg"="r5":LOR:"$rreg"="r6" +_RRegList SETS "r4-r6" +_RBytes SETA _RBytes+16 + MEXIT + ENDIF + IF "$rreg"="r7":LOR:"$rreg"="r8" +_RRegList SETS "r4-r8" +_RBytes SETA _RBytes+24 + MEXIT + ENDIF + IF "$rreg"="r9":LOR:"$rreg"="r10" +_RRegList SETS "r4-r10" +_RBytes SETA _RBytes+32 + MEXIT + ENDIF + IF "$rreg"="r11":LOR:"$rreg"="r12" +_RRegList SETS "r4-r12" +_RBytes SETA _RBytes+40 + MEXIT + ENDIF + INFO 1, "Unrecognized saved r register limit '$rreg'" + MEND + + ;// Work out a list of D saved registers + MACRO + _M_GETDREGLIST $dreg + IF "$dreg"="" +_DRegList SETS "" + MEXIT + ENDIF + IF "$dreg"="d8" +_DRegList SETS "d8" +_RBytes SETA _RBytes+8 + MEXIT + ENDIF + IF "$dreg"="d9" +_DRegList SETS "d8-d9" +_RBytes SETA _RBytes+16 + MEXIT + ENDIF + IF "$dreg"="d10" +_DRegList SETS "d8-d10" +_RBytes SETA _RBytes+24 + MEXIT + ENDIF + IF "$dreg"="d11" +_DRegList SETS "d8-d11" +_RBytes SETA _RBytes+32 + MEXIT + ENDIF + IF "$dreg"="d12" +_DRegList SETS "d8-d12" +_RBytes SETA _RBytes+40 + MEXIT + ENDIF + IF "$dreg"="d13" +_DRegList SETS "d8-d13" +_RBytes SETA _RBytes+48 + MEXIT + ENDIF + IF "$dreg"="d14" +_DRegList SETS "d8-d14" +_RBytes SETA _RBytes+56 + MEXIT + ENDIF + IF "$dreg"="d15" +_DRegList SETS "d8-d15" +_RBytes SETA _RBytes+64 + MEXIT + ENDIF + INFO 1, "Unrecognized saved d register limit '$dreg'" + MEND + + ;// Produce function return instructions + MACRO + _M_RET $cc + IF _DRegList<>"" + VPOP$cc {$_DRegList} + ENDIF + IF _RRegList="" + BX$cc lr + ELSE + LDM$cc.FD sp!, {$_RRegList, pc} + ENDIF + MEND + + ;// Early Function Exit Macro + ;// $cc = condition to exit with + ;// (Example: M_EXIT EQ) + MACRO + M_EXIT $cc + ASSERT _InFunc + IF _SBytes!=0 + ;// Restore stack frame and exit + B$cc _End$_F + ELSE + ;// Can return directly + _M_RET $cc + ENDIF + MEND + + ;// Function Footer Macro + ;// Generates the function epilogue + MACRO + M_END + ASSERT _InFunc +_InFunc SETL {FALSE} +_End$_F + + ;// Restore the stack pointer to its original value on function entry + IF _SBytes!=0 + _M_OPC ADD, sp, sp, _SBytes + ENDIF + _M_RET + ENDFUNC + + ;// Reset the global stack tracking variables back to their + ;// initial values, and increment the function count +_SBytes SETA 0 +_F SETA _F+1 + MEND + + +;//========================================================================== +;// Debug Macros +;//========================================================================== + + GBLL DEBUG_ON +DEBUG_ON SETL {FALSE} + GBLL DEBUG_STALLS_ON +DEBUG_STALLS_ON SETL {FALSE} + + ;//========================================================================== + ;// Debug call to printf + ;// M_PRINTF $format, $val0, $val1, $val2 + ;// + ;// Examples: + ;// M_PRINTF "x=%08x\n", r0 + ;// + ;// This macro preserves the value of all registers including the + ;// flags. + ;//========================================================================== + + MACRO + M_PRINTF $format, $val0, $val1, $val2 + IF DEBUG_ON + + IMPORT printf + LCLA nArgs +nArgs SETA 0 + + ;// save registers so we don't corrupt them + STMFD sp!, {r0-r12, lr} + + ;// Drop stack to give us some workspace + SUB sp, sp, #16 + + ;// Save registers we need to print to the stack + IF "$val2" <> "" + ASSERT "$val1" <> "" + STR $val2, [sp, #8] +nArgs SETA nArgs+1 + ENDIF + IF "$val1" <> "" + ASSERT "$val0" <> "" + STR $val1, [sp, #4] +nArgs SETA nArgs+1 + ENDIF + IF "$val0"<>"" + STR $val0, [sp] +nArgs SETA nArgs+1 + ENDIF + + ;// Now we are safe to corrupt registers + ADR r0, %FT00 + IF nArgs=1 + LDR r1, [sp] + ENDIF + IF nArgs=2 + LDMIA sp, {r1,r2} + ENDIF + IF nArgs=3 + LDMIA sp, {r1,r2,r3} + ENDIF + + ;// print the values + MRS r4, cpsr ;// preserve flags + BL printf + MSR cpsr_f, r4 ;// restore flags + B %FT01 +00 ;// string to print + DCB "$format", 0 + ALIGN +01 ;// Finished + ADD sp, sp, #16 + ;// Restore registers + LDMFD sp!, {r0-r12,lr} + + ENDIF ;// DEBUG_ON + MEND + + + ;// Stall Simulation Macro + ;// Inserts a given number of NOPs for the currently + ;// defined platform + MACRO + M_STALL $plat1stall, $plat2stall, $plat3stall, $plat4stall, $plat5stall, $plat6stall + IF DEBUG_STALLS_ON + _M_STALL_SUB $plat1stall + _M_STALL_SUB $plat2stall + _M_STALL_SUB $plat3stall + _M_STALL_SUB $plat4stall + _M_STALL_SUB $plat5stall + _M_STALL_SUB $plat6stall + ENDIF + MEND + + MACRO + _M_STALL_SUB $platstall + IF "$platstall"!="" + LCLA _pllen + LCLS _pl + LCLL _pllog +_pllen SETA :LEN:"$platstall" +_pl SETS "$platstall":LEFT:(_pllen - 2) + IF :DEF:$_pl + IF $_pl + LCLS _st + LCLA _stnum +_st SETS "$platstall":RIGHT:1 +_stnum SETA $_st + WHILE _stnum>0 + MOV sp, sp +_stnum SETA _stnum - 1 + WEND + ENDIF + ENDIF + ENDIF + MEND + + + +;//========================================================================== +;// Endian Invarience Macros +;// +;// The idea behind these macros is that if an array is +;// loaded as words then the SMUL00 macro will multiply +;// array elements 0 regardless of the endianess of the +;// system. For little endian SMUL00=SMULBB, for big +;// endian SMUL00=SMULTT and similarly for other packed operations. +;// +;//========================================================================== + + MACRO + LIBI4 $comli, $combi, $a, $b, $c, $d, $cc + IF {ENDIAN}="big" + $combi.$cc $a, $b, $c, $d + ELSE + $comli.$cc $a, $b, $c, $d + ENDIF + MEND + + MACRO + LIBI3 $comli, $combi, $a, $b, $c, $cc + IF {ENDIAN}="big" + $combi.$cc $a, $b, $c + ELSE + $comli.$cc $a, $b, $c + ENDIF + MEND + + ;// SMLAxy macros + + MACRO + SMLA00 $a, $b, $c, $d, $cc + LIBI4 SMLABB, SMLATT, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLA01 $a, $b, $c, $d, $cc + LIBI4 SMLABT, SMLATB, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLA0B $a, $b, $c, $d, $cc + LIBI4 SMLABB, SMLATB, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLA0T $a, $b, $c, $d, $cc + LIBI4 SMLABT, SMLATT, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLA10 $a, $b, $c, $d, $cc + LIBI4 SMLATB, SMLABT, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLA11 $a, $b, $c, $d, $cc + LIBI4 SMLATT, SMLABB, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLA1B $a, $b, $c, $d, $cc + LIBI4 SMLATB, SMLABB, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLA1T $a, $b, $c, $d, $cc + LIBI4 SMLATT, SMLABT, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLAB0 $a, $b, $c, $d, $cc + LIBI4 SMLABB, SMLABT, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLAB1 $a, $b, $c, $d, $cc + LIBI4 SMLABT, SMLABB, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLAT0 $a, $b, $c, $d, $cc + LIBI4 SMLATB, SMLATT, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLAT1 $a, $b, $c, $d, $cc + LIBI4 SMLATT, SMLATB, $a, $b, $c, $d, $cc + MEND + + ;// SMULxy macros + + MACRO + SMUL00 $a, $b, $c, $cc + LIBI3 SMULBB, SMULTT, $a, $b, $c, $cc + MEND + + MACRO + SMUL01 $a, $b, $c, $cc + LIBI3 SMULBT, SMULTB, $a, $b, $c, $cc + MEND + + MACRO + SMUL0B $a, $b, $c, $cc + LIBI3 SMULBB, SMULTB, $a, $b, $c, $cc + MEND + + MACRO + SMUL0T $a, $b, $c, $cc + LIBI3 SMULBT, SMULTT, $a, $b, $c, $cc + MEND + + MACRO + SMUL10 $a, $b, $c, $cc + LIBI3 SMULTB, SMULBT, $a, $b, $c, $cc + MEND + + MACRO + SMUL11 $a, $b, $c, $cc + LIBI3 SMULTT, SMULBB, $a, $b, $c, $cc + MEND + + MACRO + SMUL1B $a, $b, $c, $cc + LIBI3 SMULTB, SMULBB, $a, $b, $c, $cc + MEND + + MACRO + SMUL1T $a, $b, $c, $cc + LIBI3 SMULTT, SMULBT, $a, $b, $c, $cc + MEND + + MACRO + SMULB0 $a, $b, $c, $cc + LIBI3 SMULBB, SMULBT, $a, $b, $c, $cc + MEND + + MACRO + SMULB1 $a, $b, $c, $cc + LIBI3 SMULBT, SMULBB, $a, $b, $c, $cc + MEND + + MACRO + SMULT0 $a, $b, $c, $cc + LIBI3 SMULTB, SMULTT, $a, $b, $c, $cc + MEND + + MACRO + SMULT1 $a, $b, $c, $cc + LIBI3 SMULTT, SMULTB, $a, $b, $c, $cc + MEND + + ;// SMLAWx, SMULWx macros + + MACRO + SMLAW0 $a, $b, $c, $d, $cc + LIBI4 SMLAWB, SMLAWT, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLAW1 $a, $b, $c, $d, $cc + LIBI4 SMLAWT, SMLAWB, $a, $b, $c, $d, $cc + MEND + + MACRO + SMULW0 $a, $b, $c, $cc + LIBI3 SMULWB, SMULWT, $a, $b, $c, $cc + MEND + + MACRO + SMULW1 $a, $b, $c, $cc + LIBI3 SMULWT, SMULWB, $a, $b, $c, $cc + MEND + + ;// SMLALxy macros + + + MACRO + SMLAL00 $a, $b, $c, $d, $cc + LIBI4 SMLALBB, SMLALTT, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLAL01 $a, $b, $c, $d, $cc + LIBI4 SMLALBT, SMLALTB, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLAL0B $a, $b, $c, $d, $cc + LIBI4 SMLALBB, SMLALTB, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLAL0T $a, $b, $c, $d, $cc + LIBI4 SMLALBT, SMLALTT, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLAL10 $a, $b, $c, $d, $cc + LIBI4 SMLALTB, SMLALBT, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLAL11 $a, $b, $c, $d, $cc + LIBI4 SMLALTT, SMLALBB, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLAL1B $a, $b, $c, $d, $cc + LIBI4 SMLALTB, SMLALBB, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLAL1T $a, $b, $c, $d, $cc + LIBI4 SMLALTT, SMLALBT, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLALB0 $a, $b, $c, $d, $cc + LIBI4 SMLALBB, SMLALBT, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLALB1 $a, $b, $c, $d, $cc + LIBI4 SMLALBT, SMLALBB, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLALT0 $a, $b, $c, $d, $cc + LIBI4 SMLALTB, SMLALTT, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLALT1 $a, $b, $c, $d, $cc + LIBI4 SMLALTT, SMLALTB, $a, $b, $c, $d, $cc + MEND + + ENDIF ;// ARMCOMM_S_H + + END diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armOMX.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armOMX.h new file mode 100755 index 0000000000000000000000000000000000000000..7a68d14f4d01ae73e2262a843c9024815a3b4e33 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armOMX.h @@ -0,0 +1,274 @@ +/* + * + * File Name: armOMX_ReleaseVersion.h + * OpenMAX DL: v1.0.2 + * Revision: 12290 + * Date: Wednesday, April 9, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * This file allows a version of the OMX DL libraries to be built where some or + * all of the function names can be given a user specified suffix. + * + * You might want to use it where: + * + * - you want to rename a function "out of the way" so that you could replace + * a function with a different version (the original version would still be + * in the library just with a different name - so you could debug the new + * version by comparing it to the output of the old) + * + * - you want to rename all the functions to versions with a suffix so that + * you can include two versions of the library and choose between functions + * at runtime. + * + * e.g. omxIPBM_Copy_U8_C1R could be renamed omxIPBM_Copy_U8_C1R_CortexA8 + * + */ + + +#ifndef _armOMX_H_ +#define _armOMX_H_ + + +/* We need to define these two macros in order to expand and concatenate the names */ +#define OMXCAT2BAR(A, B) omx ## A ## B +#define OMXCATBAR(A, B) OMXCAT2BAR(A, B) + +/* Define the suffix to add to all functions - the default is no suffix */ +#define BARE_SUFFIX + + + +/* Define what happens to the bare suffix-less functions, down to the sub-domain accuracy */ +#define OMXACAAC_SUFFIX BARE_SUFFIX +#define OMXACMP3_SUFFIX BARE_SUFFIX +#define OMXICJP_SUFFIX BARE_SUFFIX +#define OMXIPBM_SUFFIX BARE_SUFFIX +#define OMXIPCS_SUFFIX BARE_SUFFIX +#define OMXIPPP_SUFFIX BARE_SUFFIX +#define OMXSP_SUFFIX BARE_SUFFIX +#define OMXVCCOMM_SUFFIX BARE_SUFFIX +#define OMXVCM4P10_SUFFIX BARE_SUFFIX +#define OMXVCM4P2_SUFFIX BARE_SUFFIX + + + + +/* Define what the each bare, un-suffixed OpenMAX API function names is to be renamed */ +#define omxACAAC_DecodeChanPairElt OMXCATBAR(ACAAC_DecodeChanPairElt, OMXACAAC_SUFFIX) +#define omxACAAC_DecodeDatStrElt OMXCATBAR(ACAAC_DecodeDatStrElt, OMXACAAC_SUFFIX) +#define omxACAAC_DecodeFillElt OMXCATBAR(ACAAC_DecodeFillElt, OMXACAAC_SUFFIX) +#define omxACAAC_DecodeIsStereo_S32 OMXCATBAR(ACAAC_DecodeIsStereo_S32, OMXACAAC_SUFFIX) +#define omxACAAC_DecodeMsPNS_S32_I OMXCATBAR(ACAAC_DecodeMsPNS_S32_I, OMXACAAC_SUFFIX) +#define omxACAAC_DecodeMsStereo_S32_I OMXCATBAR(ACAAC_DecodeMsStereo_S32_I, OMXACAAC_SUFFIX) +#define omxACAAC_DecodePrgCfgElt OMXCATBAR(ACAAC_DecodePrgCfgElt, OMXACAAC_SUFFIX) +#define omxACAAC_DecodeTNS_S32_I OMXCATBAR(ACAAC_DecodeTNS_S32_I, OMXACAAC_SUFFIX) +#define omxACAAC_DeinterleaveSpectrum_S32 OMXCATBAR(ACAAC_DeinterleaveSpectrum_S32, OMXACAAC_SUFFIX) +#define omxACAAC_EncodeTNS_S32_I OMXCATBAR(ACAAC_EncodeTNS_S32_I, OMXACAAC_SUFFIX) +#define omxACAAC_LongTermPredict_S32 OMXCATBAR(ACAAC_LongTermPredict_S32, OMXACAAC_SUFFIX) +#define omxACAAC_LongTermReconstruct_S32_I OMXCATBAR(ACAAC_LongTermReconstruct_S32_I, OMXACAAC_SUFFIX) +#define omxACAAC_MDCTFwd_S32 OMXCATBAR(ACAAC_MDCTFwd_S32, OMXACAAC_SUFFIX) +#define omxACAAC_MDCTInv_S32_S16 OMXCATBAR(ACAAC_MDCTInv_S32_S16, OMXACAAC_SUFFIX) +#define omxACAAC_NoiselessDecode OMXCATBAR(ACAAC_NoiselessDecode, OMXACAAC_SUFFIX) +#define omxACAAC_QuantInv_S32_I OMXCATBAR(ACAAC_QuantInv_S32_I, OMXACAAC_SUFFIX) +#define omxACAAC_UnpackADIFHeader OMXCATBAR(ACAAC_UnpackADIFHeader, OMXACAAC_SUFFIX) +#define omxACAAC_UnpackADTSFrameHeader OMXCATBAR(ACAAC_UnpackADTSFrameHeader, OMXACAAC_SUFFIX) + + +#define omxACMP3_HuffmanDecode_S32 OMXCATBAR(ACMP3_HuffmanDecode_S32, OMXACMP3_SUFFIX) +#define omxACMP3_HuffmanDecodeSfb_S32 OMXCATBAR(ACMP3_HuffmanDecodeSfb_S32, OMXACMP3_SUFFIX) +#define omxACMP3_HuffmanDecodeSfbMbp_S32 OMXCATBAR(ACMP3_HuffmanDecodeSfbMbp_S32, OMXACMP3_SUFFIX) +#define omxACMP3_MDCTInv_S32 OMXCATBAR(ACMP3_MDCTInv_S32, OMXACMP3_SUFFIX) +#define omxACMP3_ReQuantize_S32_I OMXCATBAR(ACMP3_ReQuantize_S32_I, OMXACMP3_SUFFIX) +#define omxACMP3_ReQuantizeSfb_S32_I OMXCATBAR(ACMP3_ReQuantizeSfb_S32_I, OMXACMP3_SUFFIX) +#define omxACMP3_SynthPQMF_S32_S16 OMXCATBAR(ACMP3_SynthPQMF_S32_S16, OMXACMP3_SUFFIX) +#define omxACMP3_UnpackFrameHeader OMXCATBAR(ACMP3_UnpackFrameHeader, OMXACMP3_SUFFIX) +#define omxACMP3_UnpackScaleFactors_S8 OMXCATBAR(ACMP3_UnpackScaleFactors_S8, OMXACMP3_SUFFIX) +#define omxACMP3_UnpackSideInfo OMXCATBAR(ACMP3_UnpackSideInfo, OMXACMP3_SUFFIX) + +#define omxICJP_CopyExpand_U8_C3 OMXCATBAR(ICJP_CopyExpand_U8_C3, OMXICJP_SUFFIX) +#define omxICJP_DCTFwd_S16 OMXCATBAR(ICJP_DCTFwd_S16, OMXICJP_SUFFIX) +#define omxICJP_DCTFwd_S16_I OMXCATBAR(ICJP_DCTFwd_S16_I, OMXICJP_SUFFIX) +#define omxICJP_DCTInv_S16 OMXCATBAR(ICJP_DCTInv_S16, OMXICJP_SUFFIX) +#define omxICJP_DCTInv_S16_I OMXCATBAR(ICJP_DCTInv_S16_I, OMXICJP_SUFFIX) +#define omxICJP_DCTQuantFwd_Multiple_S16 OMXCATBAR(ICJP_DCTQuantFwd_Multiple_S16, OMXICJP_SUFFIX) +#define omxICJP_DCTQuantFwd_S16 OMXCATBAR(ICJP_DCTQuantFwd_S16, OMXICJP_SUFFIX) +#define omxICJP_DCTQuantFwd_S16_I OMXCATBAR(ICJP_DCTQuantFwd_S16_I, OMXICJP_SUFFIX) +#define omxICJP_DCTQuantFwdTableInit OMXCATBAR(ICJP_DCTQuantFwdTableInit, OMXICJP_SUFFIX) +#define omxICJP_DCTQuantInv_Multiple_S16 OMXCATBAR(ICJP_DCTQuantInv_Multiple_S16, OMXICJP_SUFFIX) +#define omxICJP_DCTQuantInv_S16 OMXCATBAR(ICJP_DCTQuantInv_S16, OMXICJP_SUFFIX) +#define omxICJP_DCTQuantInv_S16_I OMXCATBAR(ICJP_DCTQuantInv_S16_I, OMXICJP_SUFFIX) +#define omxICJP_DCTQuantInvTableInit OMXCATBAR(ICJP_DCTQuantInvTableInit, OMXICJP_SUFFIX) +#define omxICJP_DecodeHuffman8x8_Direct_S16_C1 OMXCATBAR(ICJP_DecodeHuffman8x8_Direct_S16_C1, OMXICJP_SUFFIX) +#define omxICJP_DecodeHuffmanSpecGetBufSize_U8 OMXCATBAR(ICJP_DecodeHuffmanSpecGetBufSize_U8, OMXICJP_SUFFIX) +#define omxICJP_DecodeHuffmanSpecInit_U8 OMXCATBAR(ICJP_DecodeHuffmanSpecInit_U8, OMXICJP_SUFFIX) +#define omxICJP_EncodeHuffman8x8_Direct_S16_U1_C1 OMXCATBAR(ICJP_EncodeHuffman8x8_Direct_S16_U1_C1, OMXICJP_SUFFIX) +#define omxICJP_EncodeHuffmanSpecGetBufSize_U8 OMXCATBAR(ICJP_EncodeHuffmanSpecGetBufSize_U8, OMXICJP_SUFFIX) +#define omxICJP_EncodeHuffmanSpecInit_U8 OMXCATBAR(ICJP_EncodeHuffmanSpecInit_U8, OMXICJP_SUFFIX) + +#define omxIPBM_AddC_U8_C1R_Sfs OMXCATBAR(IPBM_AddC_U8_C1R_Sfs, OMXIPBM_SUFFIX) +#define omxIPBM_Copy_U8_C1R OMXCATBAR(IPBM_Copy_U8_C1R, OMXIPBM_SUFFIX) +#define omxIPBM_Copy_U8_C3R OMXCATBAR(IPBM_Copy_U8_C3R, OMXIPBM_SUFFIX) +#define omxIPBM_Mirror_U8_C1R OMXCATBAR(IPBM_Mirror_U8_C1R, OMXIPBM_SUFFIX) +#define omxIPBM_MulC_U8_C1R_Sfs OMXCATBAR(IPBM_MulC_U8_C1R_Sfs, OMXIPBM_SUFFIX) + +#define omxIPCS_ColorTwistQ14_U8_C3R OMXCATBAR(IPCS_ColorTwistQ14_U8_C3R, OMXIPCS_SUFFIX) +#define omxIPCS_BGR565ToYCbCr420LS_MCU_U16_S16_C3P3R OMXCATBAR(IPCS_BGR565ToYCbCr420LS_MCU_U16_S16_C3P3R, OMXIPCS_SUFFIX) +#define omxIPCS_BGR565ToYCbCr422LS_MCU_U16_S16_C3P3R OMXCATBAR(IPCS_BGR565ToYCbCr422LS_MCU_U16_S16_C3P3R, OMXIPCS_SUFFIX) +#define omxIPCS_BGR565ToYCbCr444LS_MCU_U16_S16_C3P3R OMXCATBAR(IPCS_BGR565ToYCbCr444LS_MCU_U16_S16_C3P3R, OMXIPCS_SUFFIX) +#define omxIPCS_BGR888ToYCbCr420LS_MCU_U8_S16_C3P3R OMXCATBAR(IPCS_BGR888ToYCbCr420LS_MCU_U8_S16_C3P3R, OMXIPCS_SUFFIX) +#define omxIPCS_BGR888ToYCbCr422LS_MCU_U8_S16_C3P3R OMXCATBAR(IPCS_BGR888ToYCbCr422LS_MCU_U8_S16_C3P3R, OMXIPCS_SUFFIX) +#define omxIPCS_BGR888ToYCbCr444LS_MCU_U8_S16_C3P3R OMXCATBAR(IPCS_BGR888ToYCbCr444LS_MCU_U8_S16_C3P3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr420RszCscRotBGR_U8_P3C3R OMXCATBAR(IPCS_YCbCr420RszCscRotBGR_U8_P3C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr420RszRot_U8_P3R OMXCATBAR(IPCS_YCbCr420RszRot_U8_P3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr420ToBGR565_U8_U16_P3C3R OMXCATBAR(IPCS_YCbCr420ToBGR565_U8_U16_P3C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr420ToBGR565LS_MCU_S16_U16_P3C3R OMXCATBAR(IPCS_YCbCr420ToBGR565LS_MCU_S16_U16_P3C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr420ToBGR888LS_MCU_S16_U8_P3C3R OMXCATBAR(IPCS_YCbCr420ToBGR888LS_MCU_S16_U8_P3C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr422RszCscRotBGR_U8_P3C3R OMXCATBAR(IPCS_YCbCr422RszCscRotBGR_U8_P3C3R, OMXIPCS_SUFFIX) +#define omxIPCS_CbYCrY422RszCscRotBGR_U8_U16_C2R OMXCATBAR(IPCS_CbYCrY422RszCscRotBGR_U8_U16_C2R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr422RszRot_U8_P3R OMXCATBAR(IPCS_YCbCr422RszRot_U8_P3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbYCr422ToBGR565_U8_U16_C2C3R OMXCATBAR(IPCS_YCbYCr422ToBGR565_U8_U16_C2C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr422ToBGR565LS_MCU_S16_U16_P3C3R OMXCATBAR(IPCS_YCbCr422ToBGR565LS_MCU_S16_U16_P3C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbYCr422ToBGR888_U8_C2C3R OMXCATBAR(IPCS_YCbYCr422ToBGR888_U8_C2C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R OMXCATBAR(IPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R OMXCATBAR(IPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R, OMXIPCS_SUFFIX) +#define omxIPCS_CbYCrY422ToYCbCr420Rotate_U8_C2P3R OMXCATBAR(IPCS_CbYCrY422ToYCbCr420Rotate_U8_C2P3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr422ToYCbCr420Rotate_U8_P3R OMXCATBAR(IPCS_YCbCr422ToYCbCr420Rotate_U8_P3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr444ToBGR565_U8_U16_C3R OMXCATBAR(IPCS_YCbCr444ToBGR565_U8_U16_C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr444ToBGR565_U8_U16_P3C3R OMXCATBAR(IPCS_YCbCr444ToBGR565_U8_U16_P3C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr444ToBGR565LS_MCU_S16_U16_P3C3R OMXCATBAR(IPCS_YCbCr444ToBGR565LS_MCU_S16_U16_P3C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr444ToBGR888_U8_C3R OMXCATBAR(IPCS_YCbCr444ToBGR888_U8_C3R, OMXIPCS_SUFFIX) + +#define omxIPPP_Deblock_HorEdge_U8_I OMXCATBAR(IPPP_Deblock_HorEdge_U8_I, OMXIPPP_SUFFIX) +#define omxIPPP_Deblock_VerEdge_U8_I OMXCATBAR(IPPP_Deblock_VerEdge_U8_I, OMXIPPP_SUFFIX) +#define omxIPPP_FilterFIR_U8_C1R OMXCATBAR(IPPP_FilterFIR_U8_C1R, OMXIPPP_SUFFIX) +#define omxIPPP_FilterMedian_U8_C1R OMXCATBAR(IPPP_FilterMedian_U8_C1R, OMXIPPP_SUFFIX) +#define omxIPPP_GetCentralMoment_S64 OMXCATBAR(IPPP_GetCentralMoment_S64, OMXIPPP_SUFFIX) +#define omxIPPP_GetSpatialMoment_S64 OMXCATBAR(IPPP_GetSpatialMoment_S64, OMXIPPP_SUFFIX) +#define omxIPPP_MomentGetStateSize OMXCATBAR(IPPP_MomentGetStateSize, OMXIPPP_SUFFIX) +#define omxIPPP_MomentInit OMXCATBAR(IPPP_MomentInit, OMXIPPP_SUFFIX) +#define omxIPPP_Moments_U8_C1R OMXCATBAR(IPPP_Moments_U8_C1R, OMXIPPP_SUFFIX) +#define omxIPPP_Moments_U8_C3R OMXCATBAR(IPPP_Moments_U8_C3R, OMXIPPP_SUFFIX) + +#define omxSP_BlockExp_S16 OMXCATBAR(SP_BlockExp_S16, OMXSP_SUFFIX) +#define omxSP_BlockExp_S32 OMXCATBAR(SP_BlockExp_S32, OMXSP_SUFFIX) +#define omxSP_Copy_S16 OMXCATBAR(SP_Copy_S16, OMXSP_SUFFIX) +#define omxSP_DotProd_S16 OMXCATBAR(SP_DotProd_S16, OMXSP_SUFFIX) +#define omxSP_DotProd_S16_Sfs OMXCATBAR(SP_DotProd_S16_Sfs, OMXSP_SUFFIX) +#define omxSP_FFTFwd_CToC_SC16_Sfs OMXCATBAR(SP_FFTFwd_CToC_SC16_Sfs, OMXSP_SUFFIX) +#define omxSP_FFTFwd_CToC_SC32_Sfs OMXCATBAR(SP_FFTFwd_CToC_SC32_Sfs, OMXSP_SUFFIX) +#define omxSP_FFTFwd_RToCCS_S16S32_Sfs OMXCATBAR(SP_FFTFwd_RToCCS_S16S32_Sfs, OMXSP_SUFFIX) +#define omxSP_FFTFwd_RToCCS_S32_Sfs OMXCATBAR(SP_FFTFwd_RToCCS_S32_Sfs, OMXSP_SUFFIX) +#define omxSP_FFTGetBufSize_C_SC16 OMXCATBAR(SP_FFTGetBufSize_C_SC16, OMXSP_SUFFIX) +#define omxSP_FFTGetBufSize_C_SC32 OMXCATBAR(SP_FFTGetBufSize_C_SC32, OMXSP_SUFFIX) +#define omxSP_FFTGetBufSize_R_S16S32 OMXCATBAR(SP_FFTGetBufSize_R_S16S32, OMXSP_SUFFIX) +#define omxSP_FFTGetBufSize_R_S32 OMXCATBAR(SP_FFTGetBufSize_R_S32, OMXSP_SUFFIX) +#define omxSP_FFTInit_C_SC16 OMXCATBAR(SP_FFTInit_C_SC16, OMXSP_SUFFIX) +#define omxSP_FFTInit_C_SC32 OMXCATBAR(SP_FFTInit_C_SC32, OMXSP_SUFFIX) +#define omxSP_FFTInit_R_S16S32 OMXCATBAR(SP_FFTInit_R_S16S32, OMXSP_SUFFIX) +#define omxSP_FFTInit_R_S32 OMXCATBAR(SP_FFTInit_R_S32, OMXSP_SUFFIX) +#define omxSP_FFTInv_CCSToR_S32_Sfs OMXCATBAR(SP_FFTInv_CCSToR_S32_Sfs, OMXSP_SUFFIX) +#define omxSP_FFTInv_CCSToR_S32S16_Sfs OMXCATBAR(SP_FFTInv_CCSToR_S32S16_Sfs, OMXSP_SUFFIX) +#define omxSP_FFTInv_CToC_SC16_Sfs OMXCATBAR(SP_FFTInv_CToC_SC16_Sfs, OMXSP_SUFFIX) +#define omxSP_FFTInv_CToC_SC32_Sfs OMXCATBAR(SP_FFTInv_CToC_SC32_Sfs, OMXSP_SUFFIX) +#define omxSP_FilterMedian_S32 OMXCATBAR(SP_FilterMedian_S32, OMXSP_SUFFIX) +#define omxSP_FilterMedian_S32_I OMXCATBAR(SP_FilterMedian_S32_I, OMXSP_SUFFIX) +#define omxSP_FIR_Direct_S16 OMXCATBAR(SP_FIR_Direct_S16, OMXSP_SUFFIX) +#define omxSP_FIR_Direct_S16_I OMXCATBAR(SP_FIR_Direct_S16_I, OMXSP_SUFFIX) +#define omxSP_FIR_Direct_S16_ISfs OMXCATBAR(SP_FIR_Direct_S16_ISfs, OMXSP_SUFFIX) +#define omxSP_FIR_Direct_S16_Sfs OMXCATBAR(SP_FIR_Direct_S16_Sfs, OMXSP_SUFFIX) +#define omxSP_FIROne_Direct_S16 OMXCATBAR(SP_FIROne_Direct_S16, OMXSP_SUFFIX) +#define omxSP_FIROne_Direct_S16_I OMXCATBAR(SP_FIROne_Direct_S16_I, OMXSP_SUFFIX) +#define omxSP_FIROne_Direct_S16_ISfs OMXCATBAR(SP_FIROne_Direct_S16_ISfs, OMXSP_SUFFIX) +#define omxSP_FIROne_Direct_S16_Sfs OMXCATBAR(SP_FIROne_Direct_S16_Sfs, OMXSP_SUFFIX) +#define omxSP_IIR_BiQuadDirect_S16 OMXCATBAR(SP_IIR_BiQuadDirect_S16, OMXSP_SUFFIX) +#define omxSP_IIR_BiQuadDirect_S16_I OMXCATBAR(SP_IIR_BiQuadDirect_S16_I, OMXSP_SUFFIX) +#define omxSP_IIR_Direct_S16 OMXCATBAR(SP_IIR_Direct_S16, OMXSP_SUFFIX) +#define omxSP_IIR_Direct_S16_I OMXCATBAR(SP_IIR_Direct_S16_I, OMXSP_SUFFIX) +#define omxSP_IIROne_BiQuadDirect_S16 OMXCATBAR(SP_IIROne_BiQuadDirect_S16, OMXSP_SUFFIX) +#define omxSP_IIROne_BiQuadDirect_S16_I OMXCATBAR(SP_IIROne_BiQuadDirect_S16_I, OMXSP_SUFFIX) +#define omxSP_IIROne_Direct_S16 OMXCATBAR(SP_IIROne_Direct_S16, OMXSP_SUFFIX) +#define omxSP_IIROne_Direct_S16_I OMXCATBAR(SP_IIROne_Direct_S16_I, OMXSP_SUFFIX) + +#define omxVCCOMM_Average_16x OMXCATBAR(VCCOMM_Average_16x, OMXVCCOMM_SUFFIX) +#define omxVCCOMM_Average_8x OMXCATBAR(VCCOMM_Average_8x, OMXVCCOMM_SUFFIX) +#define omxVCCOMM_ComputeTextureErrorBlock OMXCATBAR(VCCOMM_ComputeTextureErrorBlock, OMXVCCOMM_SUFFIX) +#define omxVCCOMM_ComputeTextureErrorBlock_SAD OMXCATBAR(VCCOMM_ComputeTextureErrorBlock_SAD, OMXVCCOMM_SUFFIX) +#define omxVCCOMM_Copy16x16 OMXCATBAR(VCCOMM_Copy16x16, OMXVCCOMM_SUFFIX) +#define omxVCCOMM_Copy8x8 OMXCATBAR(VCCOMM_Copy8x8, OMXVCCOMM_SUFFIX) +#define omxVCCOMM_ExpandFrame_I OMXCATBAR(VCCOMM_ExpandFrame_I, OMXVCCOMM_SUFFIX) +#define omxVCCOMM_LimitMVToRect OMXCATBAR(VCCOMM_LimitMVToRect, OMXVCCOMM_SUFFIX) +#define omxVCCOMM_SAD_16x OMXCATBAR(VCCOMM_SAD_16x, OMXVCCOMM_SUFFIX) +#define omxVCCOMM_SAD_8x OMXCATBAR(VCCOMM_SAD_8x, OMXVCCOMM_SUFFIX) + +#define omxVCM4P10_Average_4x OMXCATBAR(VCM4P10_Average_4x, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_BlockMatch_Half OMXCATBAR(VCM4P10_BlockMatch_Half, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_BlockMatch_Integer OMXCATBAR(VCM4P10_BlockMatch_Integer, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_BlockMatch_Quarter OMXCATBAR(VCM4P10_BlockMatch_Quarter, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_DeblockChroma_I OMXCATBAR(VCM4P10_DeblockChroma_I, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_DeblockLuma_I OMXCATBAR(VCM4P10_DeblockLuma_I, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC OMXCATBAR(VCM4P10_DecodeChromaDcCoeffsToPairCAVLC, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_DecodeCoeffsToPairCAVLC OMXCATBAR(VCM4P10_DecodeCoeffsToPairCAVLC, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_DequantTransformResidualFromPairAndAdd OMXCATBAR(VCM4P10_DequantTransformResidualFromPairAndAdd, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_FilterDeblockingChroma_HorEdge_I OMXCATBAR(VCM4P10_FilterDeblockingChroma_HorEdge_I, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_FilterDeblockingChroma_VerEdge_I OMXCATBAR(VCM4P10_FilterDeblockingChroma_VerEdge_I, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_FilterDeblockingLuma_HorEdge_I OMXCATBAR(VCM4P10_FilterDeblockingLuma_HorEdge_I, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_FilterDeblockingLuma_VerEdge_I OMXCATBAR(VCM4P10_FilterDeblockingLuma_VerEdge_I, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_GetVLCInfo OMXCATBAR(VCM4P10_GetVLCInfo, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_InterpolateChroma OMXCATBAR(VCM4P10_InterpolateChroma, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_InterpolateHalfHor_Luma OMXCATBAR(VCM4P10_InterpolateHalfHor_Luma, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_InterpolateHalfVer_Luma OMXCATBAR(VCM4P10_InterpolateHalfVer_Luma, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_InterpolateLuma OMXCATBAR(VCM4P10_InterpolateLuma, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_InvTransformDequant_ChromaDC OMXCATBAR(VCM4P10_InvTransformDequant_ChromaDC, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_InvTransformDequant_LumaDC OMXCATBAR(VCM4P10_InvTransformDequant_LumaDC, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_InvTransformResidualAndAdd OMXCATBAR(VCM4P10_InvTransformResidualAndAdd, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_MEGetBufSize OMXCATBAR(VCM4P10_MEGetBufSize, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_MEInit OMXCATBAR(VCM4P10_MEInit, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_MotionEstimationMB OMXCATBAR(VCM4P10_MotionEstimationMB, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_PredictIntra_16x16 OMXCATBAR(VCM4P10_PredictIntra_16x16, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_PredictIntra_4x4 OMXCATBAR(VCM4P10_PredictIntra_4x4, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_PredictIntraChroma_8x8 OMXCATBAR(VCM4P10_PredictIntraChroma_8x8, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_SAD_4x OMXCATBAR(VCM4P10_SAD_4x, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_SADQuar_16x OMXCATBAR(VCM4P10_SADQuar_16x, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_SADQuar_4x OMXCATBAR(VCM4P10_SADQuar_4x, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_SADQuar_8x OMXCATBAR(VCM4P10_SADQuar_8x, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_SATD_4x4 OMXCATBAR(VCM4P10_SATD_4x4, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_SubAndTransformQDQResidual OMXCATBAR(VCM4P10_SubAndTransformQDQResidual, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_TransformDequantChromaDCFromPair OMXCATBAR(VCM4P10_TransformDequantChromaDCFromPair, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_TransformDequantLumaDCFromPair OMXCATBAR(VCM4P10_TransformDequantLumaDCFromPair, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_TransformQuant_ChromaDC OMXCATBAR(VCM4P10_TransformQuant_ChromaDC, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_TransformQuant_LumaDC OMXCATBAR(VCM4P10_TransformQuant_LumaDC, OMXVCM4P10_SUFFIX) + +#define omxVCM4P2_BlockMatch_Half_16x16 OMXCATBAR(VCM4P2_BlockMatch_Half_16x16, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_BlockMatch_Half_8x8 OMXCATBAR(VCM4P2_BlockMatch_Half_8x8, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_BlockMatch_Integer_16x16 OMXCATBAR(VCM4P2_BlockMatch_Integer_16x16, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_BlockMatch_Integer_8x8 OMXCATBAR(VCM4P2_BlockMatch_Integer_8x8, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_DCT8x8blk OMXCATBAR(VCM4P2_DCT8x8blk, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_DecodeBlockCoef_Inter OMXCATBAR(VCM4P2_DecodeBlockCoef_Inter, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_DecodeBlockCoef_Intra OMXCATBAR(VCM4P2_DecodeBlockCoef_Intra, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_DecodePadMV_PVOP OMXCATBAR(VCM4P2_DecodePadMV_PVOP, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_DecodeVLCZigzag_Inter OMXCATBAR(VCM4P2_DecodeVLCZigzag_Inter, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_DecodeVLCZigzag_IntraACVLC OMXCATBAR(VCM4P2_DecodeVLCZigzag_IntraACVLC, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_DecodeVLCZigzag_IntraDCVLC OMXCATBAR(VCM4P2_DecodeVLCZigzag_IntraDCVLC, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_EncodeMV OMXCATBAR(VCM4P2_EncodeMV, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_EncodeVLCZigzag_Inter OMXCATBAR(VCM4P2_EncodeVLCZigzag_Inter, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_EncodeVLCZigzag_IntraACVLC OMXCATBAR(VCM4P2_EncodeVLCZigzag_IntraACVLC, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_EncodeVLCZigzag_IntraDCVLC OMXCATBAR(VCM4P2_EncodeVLCZigzag_IntraDCVLC, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_FindMVpred OMXCATBAR(VCM4P2_FindMVpred, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_IDCT8x8blk OMXCATBAR(VCM4P2_IDCT8x8blk, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_MCReconBlock OMXCATBAR(VCM4P2_MCReconBlock, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_MEGetBufSize OMXCATBAR(VCM4P2_MEGetBufSize, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_MEInit OMXCATBAR(VCM4P2_MEInit, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_MotionEstimationMB OMXCATBAR(VCM4P2_MotionEstimationMB, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_PredictReconCoefIntra OMXCATBAR(VCM4P2_PredictReconCoefIntra, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_QuantInter_I OMXCATBAR(VCM4P2_QuantInter_I, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_QuantIntra_I OMXCATBAR(VCM4P2_QuantIntra_I, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_QuantInvInter_I OMXCATBAR(VCM4P2_QuantInvInter_I, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_QuantInvIntra_I OMXCATBAR(VCM4P2_QuantInvIntra_I, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_TransRecBlockCoef_inter OMXCATBAR(VCM4P2_TransRecBlockCoef_inter, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_TransRecBlockCoef_intra OMXCATBAR(VCM4P2_TransRecBlockCoef_intra, OMXVCM4P2_SUFFIX) + + +#endif /* _armOMX_h_ */ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/omxtypes.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/omxtypes.h new file mode 100755 index 0000000000000000000000000000000000000000..8b295a6feee35b4c7cca52b5ef61b36bb41e0c63 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/omxtypes.h @@ -0,0 +1,252 @@ +/** + * File: omxtypes.h + * Brief: Defines basic Data types used in OpenMAX v1.0.2 header files. + * + * Copyright © 2005-2008 The Khronos Group Inc. All Rights Reserved. + * + * These materials are protected by copyright laws and contain material + * proprietary to the Khronos Group, Inc. You may use these materials + * for implementing Khronos specifications, without altering or removing + * any trademark, copyright or other notice from the specification. + * + * Khronos Group makes no, and expressly disclaims any, representations + * or warranties, express or implied, regarding these materials, including, + * without limitation, any implied warranties of merchantability or fitness + * for a particular purpose or non-infringement of any intellectual property. + * Khronos Group makes no, and expressly disclaims any, warranties, express + * or implied, regarding the correctness, accuracy, completeness, timeliness, + * and reliability of these materials. + * + * Under no circumstances will the Khronos Group, or any of its Promoters, + * Contributors or Members or their respective partners, officers, directors, + * employees, agents or representatives be liable for any damages, whether + * direct, indirect, special or consequential damages for lost revenues, + * lost profits, or otherwise, arising from or in connection with these + * materials. + * + * Khronos and OpenMAX are trademarks of the Khronos Group Inc. + * + */ + +#ifndef _OMXTYPES_H_ +#define _OMXTYPES_H_ + +#include + +#define OMX_IN +#define OMX_OUT +#define OMX_INOUT + + +typedef enum { + + /* Mandatory return codes - use cases are explicitly described for each function */ + OMX_Sts_NoErr = 0, /* No error, the function completed successfully */ + OMX_Sts_Err = -2, /* Unknown/unspecified error */ + OMX_Sts_InvalidBitstreamValErr = -182, /* Invalid value detected during bitstream processing */ + OMX_Sts_MemAllocErr = -9, /* Not enough memory allocated for the operation */ + OMX_StsACAAC_GainCtrErr = -159, /* AAC: Unsupported gain control data detected */ + OMX_StsACAAC_PrgNumErr = -167, /* AAC: Invalid number of elements for one program */ + OMX_StsACAAC_CoefValErr = -163, /* AAC: Invalid quantized coefficient value */ + OMX_StsACAAC_MaxSfbErr = -162, /* AAC: Invalid maxSfb value in relation to numSwb */ + OMX_StsACAAC_PlsDataErr = -160, /* AAC: pulse escape sequence data error */ + + /* Optional return codes - use cases are explicitly described for each function*/ + OMX_Sts_BadArgErr = -5, /* Bad Arguments */ + + OMX_StsACAAC_TnsNumFiltErr = -157, /* AAC: Invalid number of TNS filters */ + OMX_StsACAAC_TnsLenErr = -156, /* AAC: Invalid TNS region length */ + OMX_StsACAAC_TnsOrderErr = -155, /* AAC: Invalid order of TNS filter */ + OMX_StsACAAC_TnsCoefResErr = -154, /* AAC: Invalid bit-resolution for TNS filter coefficients */ + OMX_StsACAAC_TnsCoefErr = -153, /* AAC: Invalid TNS filter coefficients */ + OMX_StsACAAC_TnsDirectErr = -152, /* AAC: Invalid TNS filter direction */ + + OMX_StsICJP_JPEGMarkerErr = -183, /* JPEG marker encountered within an entropy-coded block; */ + /* Huffman decoding operation terminated early. */ + OMX_StsICJP_JPEGMarker = -181, /* JPEG marker encountered; Huffman decoding */ + /* operation terminated early. */ + OMX_StsIPPP_ContextMatchErr = -17, /* Context parameter doesn't match to the operation */ + + OMX_StsSP_EvenMedianMaskSizeErr = -180, /* Even size of the Median Filter mask was replaced by the odd one */ + + OMX_Sts_MaximumEnumeration = INT_MAX /*Placeholder, forces enum of size OMX_INT*/ + + } OMXResult; /** Return value or error value returned from a function. Identical to OMX_INT */ + + +/* OMX_U8 */ +#if UCHAR_MAX == 0xff +typedef unsigned char OMX_U8; +#elif USHRT_MAX == 0xff +typedef unsigned short int OMX_U8; +#else +#error OMX_U8 undefined +#endif + + +/* OMX_S8 */ +#if SCHAR_MAX == 0x7f +typedef signed char OMX_S8; +#elif SHRT_MAX == 0x7f +typedef signed short int OMX_S8; +#else +#error OMX_S8 undefined +#endif + + +/* OMX_U16 */ +#if USHRT_MAX == 0xffff +typedef unsigned short int OMX_U16; +#elif UINT_MAX == 0xffff +typedef unsigned int OMX_U16; +#else +#error OMX_U16 undefined +#endif + + +/* OMX_S16 */ +#if SHRT_MAX == 0x7fff +typedef signed short int OMX_S16; +#elif INT_MAX == 0x7fff +typedef signed int OMX_S16; +#else +#error OMX_S16 undefined +#endif + + +/* OMX_U32 */ +#if UINT_MAX == 0xffffffff +typedef unsigned int OMX_U32; +#elif LONG_MAX == 0xffffffff +typedef unsigned long int OMX_U32; +#else +#error OMX_U32 undefined +#endif + + +/* OMX_S32 */ +#if INT_MAX == 0x7fffffff +typedef signed int OMX_S32; +#elif LONG_MAX == 0x7fffffff +typedef long signed int OMX_S32; +#else +#error OMX_S32 undefined +#endif + + +/* OMX_U64 & OMX_S64 */ +#if defined( _WIN32 ) || defined ( _WIN64 ) + typedef __int64 OMX_S64; /** Signed 64-bit integer */ + typedef unsigned __int64 OMX_U64; /** Unsigned 64-bit integer */ + #define OMX_MIN_S64 (0x8000000000000000i64) + #define OMX_MIN_U64 (0x0000000000000000i64) + #define OMX_MAX_S64 (0x7FFFFFFFFFFFFFFFi64) + #define OMX_MAX_U64 (0xFFFFFFFFFFFFFFFFi64) +#else + typedef long long OMX_S64; /** Signed 64-bit integer */ + typedef unsigned long long OMX_U64; /** Unsigned 64-bit integer */ + #define OMX_MIN_S64 (0x8000000000000000LL) + #define OMX_MIN_U64 (0x0000000000000000LL) + #define OMX_MAX_S64 (0x7FFFFFFFFFFFFFFFLL) + #define OMX_MAX_U64 (0xFFFFFFFFFFFFFFFFLL) +#endif + + +/* OMX_SC8 */ +typedef struct +{ + OMX_S8 Re; /** Real part */ + OMX_S8 Im; /** Imaginary part */ + +} OMX_SC8; /** Signed 8-bit complex number */ + + +/* OMX_SC16 */ +typedef struct +{ + OMX_S16 Re; /** Real part */ + OMX_S16 Im; /** Imaginary part */ + +} OMX_SC16; /** Signed 16-bit complex number */ + + +/* OMX_SC32 */ +typedef struct +{ + OMX_S32 Re; /** Real part */ + OMX_S32 Im; /** Imaginary part */ + +} OMX_SC32; /** Signed 32-bit complex number */ + + +/* OMX_SC64 */ +typedef struct +{ + OMX_S64 Re; /** Real part */ + OMX_S64 Im; /** Imaginary part */ + +} OMX_SC64; /** Signed 64-bit complex number */ + + +/* OMX_F32 */ +typedef float OMX_F32; /** Single precision floating point,IEEE 754 */ + + +/* OMX_F64 */ +typedef double OMX_F64; /** Double precision floating point,IEEE 754 */ + + +/* OMX_INT */ +typedef int OMX_INT; /** signed integer corresponding to machine word length, has maximum signed value INT_MAX*/ + + +#define OMX_MIN_S8 (-128) +#define OMX_MIN_U8 0 +#define OMX_MIN_S16 (-32768) +#define OMX_MIN_U16 0 +#define OMX_MIN_S32 (-2147483647-1) +#define OMX_MIN_U32 0 + +#define OMX_MAX_S8 (127) +#define OMX_MAX_U8 (255) +#define OMX_MAX_S16 (32767) +#define OMX_MAX_U16 (0xFFFF) +#define OMX_MAX_S32 (2147483647) +#define OMX_MAX_U32 (0xFFFFFFFF) + +typedef void OMXVoid; + +#ifndef NULL +#define NULL ((void*)0) +#endif + +/** Defines the geometric position and size of a rectangle, + * where x,y defines the coordinates of the top left corner + * of the rectangle, with dimensions width in the x-direction + * and height in the y-direction */ +typedef struct { + OMX_INT x; /** x-coordinate of top left corner of rectangle */ + OMX_INT y; /** y-coordinate of top left corner of rectangle */ + OMX_INT width; /** Width in the x-direction. */ + OMX_INT height; /** Height in the y-direction. */ +}OMXRect; + + +/** Defines the geometric position of a point, */ +typedef struct +{ + OMX_INT x; /** x-coordinate */ + OMX_INT y; /** y-coordinate */ + +} OMXPoint; + + +/** Defines the dimensions of a rectangle, or region of interest in an image */ +typedef struct +{ + OMX_INT width; /** Width of the rectangle, in the x-direction */ + OMX_INT height; /** Height of the rectangle, in the y-direction */ + +} OMXSize; + +#endif /* _OMXTYPES_H_ */ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/omxtypes_s.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/omxtypes_s.h new file mode 100755 index 0000000000000000000000000000000000000000..48703d1c0f171dfcc4aa1e1524a4d8dcd3fefb52 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/omxtypes_s.h @@ -0,0 +1,77 @@ +;// +;// +;// File Name: omxtypes_s.h +;// OpenMAX DL: v1.0.2 +;// Revision: 12290 +;// Date: Wednesday, April 9, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + +;// Mandatory return codes - use cases are explicitly described for each function +OMX_Sts_NoErr EQU 0 ;// No error the function completed successfully +OMX_Sts_Err EQU -2 ;// Unknown/unspecified error +OMX_Sts_InvalidBitstreamValErr EQU -182 ;// Invalid value detected during bitstream processing +OMX_Sts_MemAllocErr EQU -9 ;// Not enough memory allocated for the operation +OMX_StsACAAC_GainCtrErr EQU -159 ;// AAC: Unsupported gain control data detected +OMX_StsACAAC_PrgNumErr EQU -167 ;// AAC: Invalid number of elements for one program +OMX_StsACAAC_CoefValErr EQU -163 ;// AAC: Invalid quantized coefficient value +OMX_StsACAAC_MaxSfbErr EQU -162 ;// AAC: Invalid maxSfb value in relation to numSwb +OMX_StsACAAC_PlsDataErr EQU -160 ;// AAC: pulse escape sequence data error + +;// Optional return codes - use cases are explicitly described for each function +OMX_Sts_BadArgErr EQU -5 ;// Bad Arguments + +OMX_StsACAAC_TnsNumFiltErr EQU -157 ;// AAC: Invalid number of TNS filters +OMX_StsACAAC_TnsLenErr EQU -156 ;// AAC: Invalid TNS region length +OMX_StsACAAC_TnsOrderErr EQU -155 ;// AAC: Invalid order of TNS filter +OMX_StsACAAC_TnsCoefResErr EQU -154 ;// AAC: Invalid bit-resolution for TNS filter coefficients +OMX_StsACAAC_TnsCoefErr EQU -153 ;// AAC: Invalid TNS filter coefficients +OMX_StsACAAC_TnsDirectErr EQU -152 ;// AAC: Invalid TNS filter direction + +OMX_StsICJP_JPEGMarkerErr EQU -183 ;// JPEG marker encountered within an entropy-coded block; + ;// Huffman decoding operation terminated early. +OMX_StsICJP_JPEGMarker EQU -181 ;// JPEG marker encountered; Huffman decoding + ;// operation terminated early. +OMX_StsIPPP_ContextMatchErr EQU -17 ;// Context parameter doesn't match to the operation + +OMX_StsSP_EvenMedianMaskSizeErr EQU -180 ;// Even size of the Median Filter mask was replaced by the odd one + +OMX_Sts_MaximumEnumeration EQU 0x7FFFFFFF + + + +OMX_MIN_S8 EQU (-128) +OMX_MIN_U8 EQU 0 +OMX_MIN_S16 EQU (-32768) +OMX_MIN_U16 EQU 0 + + +OMX_MIN_S32 EQU (-2147483647-1) +OMX_MIN_U32 EQU 0 + +OMX_MAX_S8 EQU (127) +OMX_MAX_U8 EQU (255) +OMX_MAX_S16 EQU (32767) +OMX_MAX_U16 EQU (0xFFFF) +OMX_MAX_S32 EQU (2147483647) +OMX_MAX_U32 EQU (0xFFFFFFFF) + +OMX_VC_UPPER EQU 0x1 ;// Used by the PredictIntra functions +OMX_VC_LEFT EQU 0x2 ;// Used by the PredictIntra functions +OMX_VC_UPPER_RIGHT EQU 0x40 ;// Used by the PredictIntra functions + +NULL EQU 0 + +;// Structures + + INCLUDE armCOMM_s.h + + M_STRUCT OMXPoint + M_FIELD x, 4 + M_FIELD y, 4 + M_ENDSTRUCT + + END diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/build_vc.pl b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/build_vc.pl new file mode 100755 index 0000000000000000000000000000000000000000..649e74c21f7d1df804409afb8c8342123d9ba957 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/build_vc.pl @@ -0,0 +1,113 @@ +#!/usr/bin/perl +# +# +# File Name: build_vc.pl +# OpenMAX DL: v1.0.2 +# Revision: 12290 +# Date: Wednesday, April 9, 2008 +# +# (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +# +# +# +# This file builds the OpenMAX DL vc domain library omxVC.o. +# + +use File::Spec; +use strict; + +my ($CC, $CC_OPTS, $AS, $AS_OPTS, $LIB, $LIB_OPTS, $LIB_TYPE); + +$CC = 'armcc'; +$CC_OPTS = '--no_unaligned_access --cpu Cortex-A8 -c'; +$AS = 'armasm'; +$AS_OPTS = '--no_unaligned_access --cpu Cortex-A8'; +# $LIB = 'armlink'; +# $LIB_OPTS = '--partial -o'; +# $LIB_TYPE = '.o'; +$LIB = 'armar'; +$LIB_OPTS = '--create -r'; +$LIB_TYPE = '.a'; + +#------------------------ + +my (@headerlist, @filelist, $hd, $file, $ofile, $command, $objlist, $libfile, $h); + +# Define the list of directories containing included header files. +@headerlist = qw(api vc/api vc/m4p2/api vc/m4p10/api); + +# Define the list of source files to compile. +open(FILES, '; +close(FILES); + +# Fix the file separators in the header paths +foreach $h (@headerlist) +{ + $h = File::Spec->canonpath($h); +} + +# Create the include path to be passed to the compiler +$hd = '-I' . join(' -I', @headerlist); + +# Create the build directories "/lib/" and "/obj/" (if they are not there already) +mkdir "obj", 0777 if (! -d "obj"); +mkdir "lib", 0777 if (! -d "lib"); + +$objlist = ''; + +# Compile each file +foreach $file (@filelist) +{ + my $f; + my $base; + my $ext; + my $objfile; + + chomp($file); + $file = File::Spec->canonpath($file); + + (undef, undef, $f) = File::Spec->splitpath($file); + $f=~s/[\n\f\r]//g; # Remove any end-of-line characters + + if(($base, $ext) = $f =~ /(.+)\.(\w)$/) + { + $objfile = File::Spec->catfile('obj', $base.'.o'); + + if($ext eq 'c') + { + $objlist .= "$objfile "; + $command = $CC.' '.$CC_OPTS.' '.$hd.' -o '.$objfile.' '.$file; + print "$command\n"; + system($command); + } + elsif($ext eq 's') + { + $objlist .= "$objfile "; + $command = $AS.' '.$AS_OPTS.' '.$hd.' -o '.$objfile.' '.$file; + print "$command\n"; + system($command); + } + else + { + print "Ignoring file: $f\n"; + } + } + else + { + die "No file extension found: $f\n"; + } +} + +# Do the final link stage to create the libraries. +$libfile = File::Spec->catfile('lib', 'omxVC'.$LIB_TYPE); +$command = $LIB.' '.$LIB_OPTS.' '.$libfile.' '.$objlist; +print "$command\n"; +(system($command) == 0) and print "Build successful\n"; + + + + + + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/filelist_vc.txt b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/filelist_vc.txt new file mode 100755 index 0000000000000000000000000000000000000000..8db8eeba144134ad8ec85d3e23a8f765111b108d --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/filelist_vc.txt @@ -0,0 +1,75 @@ +./api/armCOMM.h +./api/armCOMM_BitDec_s.h +./api/armCOMM_Bitstream.h +./api/armCOMM_IDCT_s.h +./api/armCOMM_IDCTTable.h +./api/armCOMM_MaskTable.h +./api/armCOMM_s.h +./api/armCOMM_Version.h +./api/armOMX_ReleaseVersion.h +./api/omxtypes.h +./api/omxtypes_s.h +./src/armCOMM_IDCTTable.c +./src/armCOMM_MaskTable.c +./vc/api/armVC.h +./vc/api/armVCCOMM_s.h +./vc/api/omxVC.h +./vc/api/omxVC_s.h +./vc/comm/src/omxVCCOMM_Copy16x16_s.s +./vc/comm/src/omxVCCOMM_Copy8x8_s.s +./vc/comm/src/omxVCCOMM_ExpandFrame_I_s.s +./vc/m4p10/api/armVCM4P10_CAVLCTables.h +./vc/m4p10/src/armVCM4P10_Average_4x_Align_unsafe_s.s +./vc/m4p10/src/armVCM4P10_CAVLCTables.c +./vc/m4p10/src/armVCM4P10_DeblockingChroma_unsafe_s.s +./vc/m4p10/src/armVCM4P10_DeblockingLuma_unsafe_s.s +./vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair_s.s +./vc/m4p10/src/armVCM4P10_DequantTables_s.s +./vc/m4p10/src/armVCM4P10_Interpolate_Chroma_s.s +./vc/m4p10/src/armVCM4P10_InterpolateLuma_Align_unsafe_s.s +./vc/m4p10/src/armVCM4P10_InterpolateLuma_Copy_unsafe_s.s +./vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s +./vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s +./vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s +./vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s +./vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s +./vc/m4p10/src/armVCM4P10_QuantTables_s.s +./vc/m4p10/src/armVCM4P10_TransformResidual4x4_s.s +./vc/m4p10/src/armVCM4P10_UnpackBlock4x4_s.s +./vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c +./vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c +./vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c +./vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c +./vc/m4p10/src/omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.s +./vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s +./vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s +./vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s +./vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.s +./vc/m4p10/src/omxVCM4P10_InterpolateChroma.c +./vc/m4p10/src/omxVCM4P10_InterpolateLuma_s.s +./vc/m4p10/src/omxVCM4P10_PredictIntra_16x16_s.s +./vc/m4p10/src/omxVCM4P10_PredictIntra_4x4_s.s +./vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8_s.s +./vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair_s.s +./vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair_s.s +./vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h +./vc/m4p2/api/armVCM4P2_ZigZag_Tables.h +./vc/m4p2/src/armVCM4P2_Clip8_s.s +./vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s +./vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c +./vc/m4p2/src/armVCM4P2_Lookup_Tables.c +./vc/m4p2/src/armVCM4P2_SetPredDir_s.s +./vc/m4p2/src/armVCM4P2_Zigzag_Tables.c +./vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c +./vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c +./vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP_s.s +./vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter_s.s +./vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s +./vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s +./vc/m4p2/src/omxVCM4P2_FindMVpred_s.s +./vc/m4p2/src/omxVCM4P2_IDCT8x8blk_s.s +./vc/m4p2/src/omxVCM4P2_MCReconBlock_s.s +./vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s +./vc/m4p2/src/omxVCM4P2_QuantInvInter_I_s.s +./vc/m4p2/src/omxVCM4P2_QuantInvIntra_I_s.s +./vc/src/armVC_Version.c \ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/src/armCOMM.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/src/armCOMM.c new file mode 100755 index 0000000000000000000000000000000000000000..e572a896754dd46c166e31ae827eab526d62a645 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/src/armCOMM.c @@ -0,0 +1,936 @@ +/** + * + * File Name: armCOMM.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Defines Common APIs used across OpenMAX API's + */ + +#include "omxtypes.h" +#include "armCOMM.h" + +/***********************************************************************/ + /* Miscellaneous Arithmetic operations */ + +/** + * Function: armRoundFloatToS16 + * + * Description: + * Converts a double precision value into a short int after rounding + * + * Parameters: + * [in] Value Float value to be converted + * + * Return Value: + * [out] converted value in OMX_S16 format + * + */ + +OMX_S16 armRoundFloatToS16 (OMX_F64 Value) +{ + if (Value > 0) + { + return (OMX_S16)(Value + .5); + } + else + { + return (OMX_S16)(Value - .5); + } +} + +/** + * Function: armRoundFloatToS32 + * + * Description: + * Converts a double precision value into a int after rounding + * + * Parameters: + * [in] Value Float value to be converted + * + * Return Value: + * [out] converted value in OMX_S32 format + * + */ + +OMX_S32 armRoundFloatToS32 (OMX_F64 Value) +{ + if (Value > 0) + { + return (OMX_S32)(Value + .5); + } + else + { + return (OMX_S32)(Value - .5); + } +} +/** + * Function: armSatRoundFloatToS16 + * + * Description: + * Converts a double precision value into a short int after rounding and saturation + * + * Parameters: + * [in] Value Float value to be converted + * + * Return Value: + * [out] converted value in OMX_S16 format + * + */ + +OMX_S16 armSatRoundFloatToS16 (OMX_F64 Value) +{ + if (Value > 0) + { + Value += 0.5; + + if(Value > (OMX_S16)OMX_MAX_S16 ) + { + return (OMX_S16)OMX_MAX_S16; + } + else + { + return (OMX_S16)Value; + } + } + else + { + Value -= 0.5; + + if(Value < (OMX_S16)OMX_MIN_S16 ) + { + return (OMX_S16)OMX_MIN_S16; + } + else + { + return (OMX_S16)Value; + } + } +} + +/** + * Function: armSatRoundFloatToS32 + * + * Description: + * Converts a double precision value into a int after rounding and saturation + * + * Parameters: + * [in] Value Float value to be converted + * + * Return Value: + * [out] converted value in OMX_S32 format + * + */ + +OMX_S32 armSatRoundFloatToS32 (OMX_F64 Value) +{ + if (Value > 0) + { + Value += 0.5; + + if(Value > (OMX_S32)OMX_MAX_S32 ) + { + return (OMX_S32)OMX_MAX_S32; + } + else + { + return (OMX_S32)Value; + } + } + else + { + Value -= 0.5; + + if(Value < (OMX_S32)OMX_MIN_S32 ) + { + return (OMX_S32)OMX_MIN_S32; + } + else + { + return (OMX_S32)Value; + } + } +} + +/** + * Function: armSatRoundFloatToU16 + * + * Description: + * Converts a double precision value into a unsigned short int after rounding and saturation + * + * Parameters: + * [in] Value Float value to be converted + * + * Return Value: + * [out] converted value in OMX_U16 format + * + */ + +OMX_U16 armSatRoundFloatToU16 (OMX_F64 Value) +{ + Value += 0.5; + + if(Value > (OMX_U16)OMX_MAX_U16 ) + { + return (OMX_U16)OMX_MAX_U16; + } + else + { + return (OMX_U16)Value; + } +} + +/** + * Function: armSatRoundFloatToU32 + * + * Description: + * Converts a double precision value into a unsigned int after rounding and saturation + * + * Parameters: + * [in] Value Float value to be converted + * + * Return Value: + * [out] converted value in OMX_U32 format + * + */ + +OMX_U32 armSatRoundFloatToU32 (OMX_F64 Value) +{ + Value += 0.5; + + if(Value > (OMX_U32)OMX_MAX_U32 ) + { + return (OMX_U32)OMX_MAX_U32; + } + else + { + return (OMX_U32)Value; + } +} + +/** + * Function: armRoundFloatToS64 + * + * Description: + * Converts a double precision value into a 64 bit int after rounding + * + * Parameters: + * [in] Value Float value to be converted + * + * Return Value: + * [out] converted value in OMX_S64 format + * + */ + +OMX_S64 armRoundFloatToS64 (OMX_F64 Value) +{ + if (Value > 0) + { + return (OMX_S64)(Value + .5); + } + else + { + return (OMX_S64)(Value - .5); + } +} + +/** + * Function: armSignCheck + * + * Description: + * Checks the sign of a variable: + * returns 1 if it is Positive + * returns 0 if it is 0 + * returns -1 if it is Negative + * + * Remarks: + * + * Parameters: + * [in] var Variable to be checked + * + * Return Value: + * OMX_INT -- returns 1 if it is Positive + * returns 0 if it is 0 + * returns -1 if it is Negative + */ + +OMX_INT armSignCheck ( + OMX_S16 var +) + +{ + OMX_INT Sign; + + if (var < 0) + { + Sign = -1; + } + else if ( var > 0) + { + Sign = 1; + } + else + { + Sign = 0; + } + + return Sign; +} + +/** + * Function: armClip + * + * Description: Clips the input between MAX and MIN value + * + * + * Remarks: + * + * Parameters: + * [in] Min lower bound + * [in] Max upper bound + * [in] src variable to the clipped + * + * Return Value: + * OMX_S32 -- returns clipped value + */ + +OMX_S32 armClip ( + OMX_INT min, + OMX_INT max, + OMX_S32 src +) + +{ + if (src > max) + { + src = max; + } + else if (src < min) + { + src = min; + } + + return src; +} + +/** + * Function: armClip_F32 + * + * Description: Clips the input between MAX and MIN value + * + * + * Remarks: + * + * Parameters: + * [in] Min lower bound + * [in] Max upper bound + * [in] src variable to the clipped + * + * Return Value: + * OMX_F32 -- returns clipped value + */ + +OMX_F32 armClip_F32 ( + OMX_F32 min, + OMX_F32 max, + OMX_F32 src +) + +{ + if (src > max) + { + src = max; + } + else if (src < min) + { + src = min; + } + + return src; +} + +/** + * Function: armShiftSat_F32 + * + * Description: Divides a float value by 2^shift and + * saturates it for unsigned value range for satBits. + * Second parameter is like "shifting" the corresponding + * integer value. Takes care of rounding while clipping the final + * value. + * + * Parameters: + * [in] v Number to be operated upon + * [in] shift Divides the input "v" by "2^shift" + * [in] satBits Final range is [0, 2^satBits) + * + * Return Value: + * OMX_S32 -- returns "shifted" saturated value + */ + +OMX_U32 armShiftSat_F32(OMX_F32 v, OMX_INT shift, OMX_INT satBits) +{ + OMX_U32 allOnes = (OMX_U32)(-1); + OMX_U32 maxV = allOnes >> (32-satBits); + OMX_F32 vShifted, vRounded, shiftDiv = (OMX_F32)(1 << shift); + OMX_U32 vInt; + OMX_U32 vIntSat; + + if(v <= 0) + return 0; + + vShifted = v / shiftDiv; + vRounded = (OMX_F32)(vShifted + 0.5); + vInt = (OMX_U32)vRounded; + vIntSat = vInt; + if(vIntSat > maxV) + vIntSat = maxV; + return vIntSat; +} + +/** + * Functions: armSwapElem + * + * Description: + * These function swaps two elements at the specified pointer locations. + * The size of each element could be anything as specified by + * + * Return Value: + * OMXResult -- Error status from the function + */ +OMXResult armSwapElem( + OMX_U8 *pBuf1, + OMX_U8 *pBuf2, + OMX_INT elemSize + ) +{ + OMX_INT i; + OMX_U8 temp; + armRetArgErrIf(!pBuf1 || !pBuf2, OMX_Sts_BadArgErr); + + for(i = 0; i < elemSize; i++) + { + temp = *(pBuf1 + i); + *(pBuf1 + i) = *(pBuf2 + i); + *(pBuf2 + i) = temp; + } + return OMX_Sts_NoErr; +} + +/** + * Function: armMedianOf3 + * + * Description: Finds the median of three numbers + * + * Remarks: + * + * Parameters: + * [in] fEntry First entry + * [in] sEntry second entry + * [in] tEntry Third entry + * + * Return Value: + * OMX_S32 -- returns the median value + */ + +OMX_S32 armMedianOf3 ( + OMX_S32 fEntry, + OMX_S32 sEntry, + OMX_S32 tEntry +) +{ + OMX_S32 a, b, c; + + a = armMin (fEntry, sEntry); + b = armMax (fEntry, sEntry); + c = armMin (b, tEntry); + return (armMax (a, c)); +} + +/** + * Function: armLogSize + * + * Description: Finds the size of a positive value and returns the same + * + * Remarks: + * + * Parameters: + * [in] value Positive value + * + * Return Value: + * OMX_U8 -- Returns the minimum number of bits required to represent the positive value. + This is the smallest k>=0 such that that value is less than (1< 0; value = value >> 1) + { + i++; + } + return i; +} + +/***********************************************************************/ + /* Saturating Arithmetic operations */ + +/** + * Function :armSatAdd_S32() + * + * Description : + * Returns the result of saturated addition of the two inputs Value1, Value2 + * + * Parametrs: + * [in] Value1 First Operand + * [in] Value2 Second Operand + * + * Return: + * [out] Result of operation + * + * + **/ + +OMX_S32 armSatAdd_S32(OMX_S32 Value1,OMX_S32 Value2) +{ + OMX_S32 Result; + + Result = Value1 + Value2; + + if( (Value1^Value2) >= 0) + { + /*Same sign*/ + if( (Result^Value1) >= 0) + { + /*Result has not saturated*/ + return Result; + } + else + { + if(Value1 >= 0) + { + /*Result has saturated in positive side*/ + return OMX_MAX_S32; + } + else + { + /*Result has saturated in negative side*/ + return OMX_MIN_S32; + } + + } + + } + else + { + return Result; + } + +} + +/** + * Function :armSatAdd_S64() + * + * Description : + * Returns the result of saturated addition of the two inputs Value1, Value2 + * + * Parametrs: + * [in] Value1 First Operand + * [in] Value2 Second Operand + * + * Return: + * [out] Result of operation + * + * + **/ + +OMX_S64 armSatAdd_S64(OMX_S64 Value1,OMX_S64 Value2) +{ + OMX_S64 Result; + + Result = Value1 + Value2; + + if( (Value1^Value2) >= 0) + { + /*Same sign*/ + if( (Result^Value1) >= 0) + { + /*Result has not saturated*/ + return Result; + } + else + { + if(Value1 >= 0) + { + /*Result has saturated in positive side*/ + Result = OMX_MAX_S64; + return Result; + } + else + { + /*Result has saturated in negative side*/ + return OMX_MIN_S64; + } + + } + + } + else + { + return Result; + } + +} + +/** Function :armSatSub_S32() + * + * Description : + * Returns the result of saturated substraction of the two inputs Value1, Value2 + * + * Parametrs: + * [in] Value1 First Operand + * [in] Value2 Second Operand + * + * Return: + * [out] Result of operation + * + **/ + +OMX_S32 armSatSub_S32(OMX_S32 Value1,OMX_S32 Value2) +{ + OMX_S32 Result; + + Result = Value1 - Value2; + + if( (Value1^Value2) < 0) + { + /*Opposite sign*/ + if( (Result^Value1) >= 0) + { + /*Result has not saturated*/ + return Result; + } + else + { + if(Value1 >= 0) + { + /*Result has saturated in positive side*/ + return OMX_MAX_S32; + } + else + { + /*Result has saturated in negative side*/ + return OMX_MIN_S32; + } + + } + + } + else + { + return Result; + } + +} + +/** + * Function :armSatMac_S32() + * + * Description : + * Returns the result of Multiplication of Value1 and Value2 and subesquent saturated + * accumulation with Mac + * + * Parametrs: + * [in] Value1 First Operand + * [in] Value2 Second Operand + * [in] Mac Accumulator + * + * Return: + * [out] Result of operation + **/ + +OMX_S32 armSatMac_S32(OMX_S32 Mac,OMX_S16 Value1,OMX_S16 Value2) +{ + OMX_S32 Result; + + Result = (OMX_S32)(Value1*Value2); + Result = armSatAdd_S32( Mac , Result ); + + return Result; +} + +/** + * Function :armSatMac_S16S32_S32 + * + * Description : + * Returns the result of saturated MAC operation of the three inputs delayElem, filTap , mac + * + * mac = mac + Saturate_in_32Bits(delayElem * filTap) + * + * Parametrs: + * [in] delayElem First 32 bit Operand + * [in] filTap Second 16 bit Operand + * [in] mac Result of MAC operation + * + * Return: + * [out] mac Result of operation + * + **/ + +OMX_S32 armSatMac_S16S32_S32(OMX_S32 mac, OMX_S32 delayElem, OMX_S16 filTap ) +{ + + OMX_S32 result; + + result = armSatMulS16S32_S32(filTap,delayElem); + + if ( result > OMX_MAX_S16 ) + { + result = OMX_MAX_S32; + } + else if( result < OMX_MIN_S16 ) + { + result = OMX_MIN_S32; + } + else + { + result = delayElem * filTap; + } + + mac = armSatAdd_S32(mac,result); + + return mac; +} + + +/** + * Function :armSatRoundRightShift_S32_S16 + * + * Description : + * Returns the result of rounded right shift operation of input by the scalefactor + * + * output = Saturate_in_16Bits( ( Right/LeftShift( (Round(input) , shift ) ) + * + * Parametrs: + * [in] input The input to be operated on + * [in] shift The shift number + * + * Return: + * [out] Result of operation + * + **/ + + +OMX_S16 armSatRoundRightShift_S32_S16(OMX_S32 input, OMX_INT shift) +{ + input = armSatRoundLeftShift_S32(input,-shift); + + if ( input > OMX_MAX_S16 ) + { + return (OMX_S16)OMX_MAX_S16; + } + else if (input < OMX_MIN_S16) + { + return (OMX_S16)OMX_MIN_S16; + } + else + { + return (OMX_S16)input; + } + +} + +/** + * Function :armSatRoundLeftShift_S32() + * + * Description : + * Returns the result of saturating left-shift operation on input + * Or rounded Right shift if the input Shift is negative. + * + * Parametrs: + * [in] Value Operand + * [in] Shift Operand for shift operation + * + * Return: + * [out] Result of operation + * + **/ + +OMX_S32 armSatRoundLeftShift_S32(OMX_S32 Value, OMX_INT Shift) +{ + OMX_INT i; + + if (Shift < 0) + { + Shift = -Shift; + Value = armSatAdd_S32(Value, (1 << (Shift - 1))); + Value = Value >> Shift; + } + else + { + for (i = 0; i < Shift; i++) + { + Value = armSatAdd_S32(Value, Value); + } + } + return Value; +} + +/** + * Function :armSatRoundLeftShift_S64() + * + * Description : + * Returns the result of saturating left-shift operation on input + * Or rounded Right shift if the input Shift is negative. + * + * Parametrs: + * [in] Value Operand + * [in] shift Operand for shift operation + * + * Return: + * [out] Result of operation + * + **/ + +OMX_S64 armSatRoundLeftShift_S64(OMX_S64 Value, OMX_INT Shift) +{ + OMX_INT i; + + if (Shift < 0) + { + Shift = -Shift; + Value = armSatAdd_S64(Value, ((OMX_S64)1 << (Shift - 1))); + Value = Value >> Shift; + } + else + { + for (i = 0; i < Shift; i++) + { + Value = armSatAdd_S64(Value, Value); + } + } + return Value; +} + +/** + * Function :armSatMulS16S32_S32() + * + * Description : + * Returns the result of a S16 data type multiplied with an S32 data type + * in a S32 container + * + * Parametrs: + * [in] input1 Operand 1 + * [in] input2 Operand 2 + * + * Return: + * [out] Result of operation + * + **/ + + +OMX_S32 armSatMulS16S32_S32(OMX_S16 input1,OMX_S32 input2) +{ + OMX_S16 hi2,lo1; + OMX_U16 lo2; + + OMX_S32 temp1,temp2; + OMX_S32 result; + + lo1 = input1; + + hi2 = ( input2 >> 16 ); + lo2 = ( (OMX_U32)( input2 << 16 ) >> 16 ); + + temp1 = hi2 * lo1; + temp2 = ( lo2* lo1 ) >> 16; + + result = armSatAdd_S32(temp1,temp2); + + return result; +} + +/** + * Function :armSatMulS32S32_S32() + * + * Description : + * Returns the result of a S32 data type multiplied with an S32 data type + * in a S32 container + * + * Parametrs: + * [in] input1 Operand 1 + * [in] input2 Operand 2 + * + * Return: + * [out] Result of operation + * + **/ + +OMX_S32 armSatMulS32S32_S32(OMX_S32 input1,OMX_S32 input2) +{ + OMX_S16 hi1,hi2; + OMX_U16 lo1,lo2; + + OMX_S32 temp1,temp2,temp3; + OMX_S32 result; + + hi1 = ( input1 >> 16 ); + lo1 = ( (OMX_U32)( input1 << 16 ) >> 16 ); + + hi2 = ( input2 >> 16 ); + lo2 = ( (OMX_U32)( input2 << 16 ) >> 16 ); + + temp1 = hi1 * hi2; + temp2 = ( hi1* lo2 ) >> 16; + temp3 = ( hi2* lo1 ) >> 16; + + result = armSatAdd_S32(temp1,temp2); + result = armSatAdd_S32(result,temp3); + + return result; +} + +/** + * Function :armIntDivAwayFromZero() + * + * Description : Integer division with rounding to the nearest integer. + * Half-integer values are rounded away from zero + * unless otherwise specified. For example 3//2 is rounded + * to 2, and -3//2 is rounded to -2. + * + * Parametrs: + * [in] Num Operand 1 + * [in] Deno Operand 2 + * + * Return: + * [out] Result of operation input1//input2 + * + **/ + +OMX_S32 armIntDivAwayFromZero (OMX_S32 Num, OMX_S32 Deno) +{ + OMX_F64 result; + + result = ((OMX_F64)Num)/((OMX_F64)Deno); + + if (result >= 0) + { + result += 0.5; + } + else + { + result -= 0.5; + } + + return (OMX_S32)(result); +} + + +/*End of File*/ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/src/armCOMM_Bitstream.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/src/armCOMM_Bitstream.c new file mode 100755 index 0000000000000000000000000000000000000000..9ef9319d3056336525f7c5d4adcd77a4bfb9e1f3 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/src/armCOMM_Bitstream.c @@ -0,0 +1,329 @@ +/** + * + * File Name: armCOMM_Bitstream.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Defines bitstream encode and decode functions common to all codecs + */ + +#include "omxtypes.h" +#include "armCOMM.h" +#include "armCOMM_Bitstream.h" + +/*************************************** + * Fixed bit length Decode + ***************************************/ + +/** + * Function: armLookAheadBits() + * + * Description: + * Get the next N bits from the bitstream without advancing the bitstream pointer + * + * Parameters: + * [in] **ppBitStream + * [in] *pOffset + * [in] N=1...32 + * + * Returns Value + */ + +OMX_U32 armLookAheadBits(const OMX_U8 **ppBitStream, OMX_INT *pOffset, OMX_INT N) +{ + const OMX_U8 *pBitStream = *ppBitStream; + OMX_INT Offset = *pOffset; + OMX_U32 Value; + + armAssert(Offset>=0 && Offset<=7); + armAssert(N>=1 && N<=32); + + /* Read next 32 bits from stream */ + Value = (pBitStream[0] << 24 ) | ( pBitStream[1] << 16) | (pBitStream[2] << 8 ) | (pBitStream[3]) ; + Value = (Value << Offset ) | (pBitStream[4] >> (8-Offset)); + + /* Return N bits */ + return Value >> (32-N); +} + + +/** + * Function: armGetBits() + * + * Description: + * Read N bits from the bitstream + * + * Parameters: + * [in] *ppBitStream + * [in] *pOffset + * [in] N=1..32 + * + * [out] *ppBitStream + * [out] *pOffset + * Returns Value + */ + + +OMX_U32 armGetBits(const OMX_U8 **ppBitStream, OMX_INT *pOffset, OMX_INT N) +{ + const OMX_U8 *pBitStream = *ppBitStream; + OMX_INT Offset = *pOffset; + OMX_U32 Value; + + if(N == 0) + { + return 0; + } + + armAssert(Offset>=0 && Offset<=7); + armAssert(N>=1 && N<=32); + + /* Read next 32 bits from stream */ + Value = (pBitStream[0] << 24 ) | ( pBitStream[1] << 16) | (pBitStream[2] << 8 ) | (pBitStream[3]) ; + Value = (Value << Offset ) | (pBitStream[4] >> (8-Offset)); + + /* Advance bitstream pointer by N bits */ + Offset += N; + *ppBitStream = pBitStream + (Offset>>3); + *pOffset = Offset & 7; + + /* Return N bits */ + return Value >> (32-N); +} + +/** + * Function: armByteAlign() + * + * Description: + * Align the pointer *ppBitStream to the next byte boundary + * + * Parameters: + * [in] *ppBitStream + * [in] *pOffset + * + * [out] *ppBitStream + * [out] *pOffset + * + **/ + +OMXVoid armByteAlign(const OMX_U8 **ppBitStream,OMX_INT *pOffset) +{ + if(*pOffset > 0) + { + *ppBitStream += 1; + *pOffset = 0; + } +} + +/** + * Function: armSkipBits() + * + * Description: + * Skip N bits from the value at *ppBitStream + * + * Parameters: + * [in] *ppBitStream + * [in] *pOffset + * [in] N + * + * [out] *ppBitStream + * [out] *pOffset + * + **/ + + +OMXVoid armSkipBits(const OMX_U8 **ppBitStream,OMX_INT *pOffset,OMX_INT N) +{ + OMX_INT Offset = *pOffset; + const OMX_U8 *pBitStream = *ppBitStream; + + /* Advance bitstream pointer by N bits */ + Offset += N; + *ppBitStream = pBitStream + (Offset>>3); + *pOffset = Offset & 7; +} + +/*************************************** + * Variable bit length Decode + ***************************************/ + +/** + * Function: armUnPackVLC32() + * + * Description: + * Variable length decode of variable length symbol (max size 32 bits) read from + * the bit stream pointed by *ppBitStream at *pOffset by using the table + * pointed by pCodeBook + * + * Parameters: + * [in] *pBitStream + * [in] *pOffset + * [in] pCodeBook + * + * [out] *pBitStream + * [out] *pOffset + * + * Returns : Code Book Index if successfull. + * : ARM_NO_CODEBOOK_INDEX = -1 if search fails. + **/ +#ifndef C_OPTIMIZED_IMPLEMENTATION + +OMX_U16 armUnPackVLC32( + const OMX_U8 **ppBitStream, + OMX_INT *pOffset, + const ARM_VLC32 *pCodeBook +) +{ + const OMX_U8 *pBitStream = *ppBitStream; + OMX_INT Offset = *pOffset; + OMX_U32 Value; + OMX_INT Index; + + armAssert(Offset>=0 && Offset<=7); + + /* Read next 32 bits from stream */ + Value = (pBitStream[0] << 24 ) | ( pBitStream[1] << 16) | (pBitStream[2] << 8 ) | (pBitStream[3]) ; + Value = (Value << Offset ) | (pBitStream[4] >> (8-Offset)); + + /* Search through the codebook */ + for (Index=0; pCodeBook->codeLen != 0; Index++) + { + if (pCodeBook->codeWord == (Value >> (32 - pCodeBook->codeLen))) + { + Offset = Offset + pCodeBook->codeLen; + *ppBitStream = pBitStream + (Offset >> 3) ; + *pOffset = Offset & 7; + + return Index; + } + pCodeBook++; + } + + /* No code match found */ + return ARM_NO_CODEBOOK_INDEX; +} + +#endif + +/*************************************** + * Fixed bit length Encode + ***************************************/ + +/** + * Function: armPackBits + * + * Description: + * Pack a VLC code word into the bitstream + * + * Remarks: + * + * Parameters: + * [in] ppBitStream pointer to the pointer to the current byte + * in the bit stream. + * [in] pOffset pointer to the bit position in the byte + * pointed by *ppBitStream. Valid within 0 + * to 7. + * [in] codeWord Code word that need to be inserted in to the + * bitstream + * [in] codeLength Length of the code word valid range 1...32 + * + * [out] ppBitStream *ppBitStream is updated after the block is encoded, + * so that it points to the current byte in the bit + * stream buffer. + * [out] pBitOffset *pBitOffset is updated so that it points to the + * current bit position in the byte pointed by + * *ppBitStream. + * + * Return Value: + * Standard OMX_RESULT result. See enumeration for possible result codes. + * + */ + +OMXResult armPackBits ( + OMX_U8 **ppBitStream, + OMX_INT *pOffset, + OMX_U32 codeWord, + OMX_INT codeLength +) +{ + OMX_U8 *pBitStream = *ppBitStream; + OMX_INT Offset = *pOffset; + OMX_U32 Value; + + /* checking argument validity */ + armRetArgErrIf(Offset < 0, OMX_Sts_BadArgErr); + armRetArgErrIf(Offset > 7, OMX_Sts_BadArgErr); + armRetArgErrIf(codeLength < 1, OMX_Sts_BadArgErr); + armRetArgErrIf(codeLength > 32, OMX_Sts_BadArgErr); + + /* Prepare the first byte */ + codeWord = codeWord << (32-codeLength); + Value = (pBitStream[0] >> (8-Offset)) << (8-Offset); + Value = Value | (codeWord >> (24+Offset)); + + /* Write out whole bytes */ + while (8-Offset <= codeLength) + { + *pBitStream++ = (OMX_U8)Value; + codeWord = codeWord << (8-Offset); + codeLength = codeLength - (8-Offset); + Offset = 0; + Value = codeWord >> 24; + } + + /* Write out final partial byte */ + *pBitStream = (OMX_U8)Value; + *ppBitStream = pBitStream; + *pOffset = Offset + codeLength; + + return OMX_Sts_NoErr; +} + +/*************************************** + * Variable bit length Encode + ***************************************/ + +/** + * Function: armPackVLC32 + * + * Description: + * Pack a VLC code word into the bitstream + * + * Remarks: + * + * Parameters: + * [in] ppBitStream pointer to the pointer to the current byte + * in the bit stream. + * [in] pBitOffset pointer to the bit position in the byte + * pointed by *ppBitStream. Valid within 0 + * to 7. + * [in] code VLC code word that need to be inserted in to the + * bitstream + * + * [out] ppBitStream *ppBitStream is updated after the block is encoded, + * so that it points to the current byte in the bit + * stream buffer. + * [out] pBitOffset *pBitOffset is updated so that it points to the + * current bit position in the byte pointed by + * *ppBitStream. + * + * Return Value: + * Standard OMX_RESULT result. See enumeration for possible result codes. + * + */ + +OMXResult armPackVLC32 ( + OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + ARM_VLC32 code +) +{ + return (armPackBits(ppBitStream, pBitOffset, code.codeWord, code.codeLen)); +} + +/*End of File*/ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/src/armCOMM_IDCTTable.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/src/armCOMM_IDCTTable.c new file mode 100755 index 0000000000000000000000000000000000000000..3f5e279c62bf646a0785240518d02555f6f9c4da --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/src/armCOMM_IDCTTable.c @@ -0,0 +1,60 @@ +/** + * + * File Name: armCOMM_IDCTTable.c + * OpenMAX DL: v1.0.2 + * Revision: 12290 + * Date: Wednesday, April 9, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * File: armCOMM_IDCTTable.c + * Brief: Defines Tables used in IDCT computation + * + */ + +#include "armCOMM_IDCTTable.h" + + /* Table of s(u)*A(u)*A(v)/16 at Q15 + * s(u)=1.0 0 <= u <= 5 + * s(6)=2.0 + * s(7)=4.0 + * A(0) = 2*sqrt(2) + * A(u) = 4*cos(u*pi/16) for (u!=0) + */ + +__align(4) const OMX_U16 armCOMM_IDCTPreScale [64] = +{ + 0x4000, 0x58c5, 0x539f, 0x4b42, 0x4000, 0x3249, 0x4546, 0x46a1, + 0x58c5, 0x7b21, 0x73fc, 0x6862, 0x58c5, 0x45bf, 0x6016, 0x61f8, + 0x539f, 0x73fc, 0x6d41, 0x6254, 0x539f, 0x41b3, 0x5a82, 0x5c48, + 0x4b42, 0x6862, 0x6254, 0x587e, 0x4b42, 0x3b21, 0x5175, 0x530d, + 0x4000, 0x58c5, 0x539f, 0x4b42, 0x4000, 0x3249, 0x4546, 0x46a1, + 0x3249, 0x45bf, 0x41b3, 0x3b21, 0x3249, 0x2782, 0x366d, 0x377e, + 0x22a3, 0x300b, 0x2d41, 0x28ba, 0x22a3, 0x1b37, 0x257e, 0x263a, + 0x11a8, 0x187e, 0x1712, 0x14c3, 0x11a8, 0x0de0, 0x131d, 0x137d +}; + /* Above array armCOMM_IDCTPreScale, in Q23 format */ +const OMX_U32 armCOMM_IDCTPreScaleU32 [64] = +{ + 0x400000, 0x58c543, 0x539eba, 0x4b418c, 0x400000, 0x3248d4, 0x4545ea, 0x46a157, + 0x58c543, 0x7b20d8, 0x73fbfc, 0x686214, 0x58c543, 0x45bf1f, 0x6015a5, 0x61f78b, + 0x539eba, 0x73fbfc, 0x6d413d, 0x6253a6, 0x539eba, 0x41b328, 0x5a827a, 0x5c4869, + 0x4b418c, 0x686214, 0x6253a6, 0x587de3, 0x4b418c, 0x3b20d8, 0x5174e0, 0x530d69, + 0x400000, 0x58c543, 0x539eba, 0x4b418c, 0x400000, 0x3248d4, 0x4545ea, 0x46a157, + 0x3248d4, 0x45bf1f, 0x41b328, 0x3b20d8, 0x3248d4, 0x27821d, 0x366d72, 0x377e6b, + 0x22a2f5, 0x300ad3, 0x2d413d, 0x28ba70, 0x22a2f5, 0x1b36b9, 0x257d86, 0x26398d, + 0x11a856, 0x187de3, 0x17121a, 0x14c35a, 0x11a856, 0x0ddf9b, 0x131cc7, 0x137ca2 +}; + +const OMX_U16 armCOMM_IDCTCoef [4] = +{ + 0x5a82, /* InvSqrt2 */ + 0x30fc, /* SinPIBy8 */ + 0x7642, /* CosPIBy8 */ + 0x0000 +}; + + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/src/armCOMM_MaskTable.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/src/armCOMM_MaskTable.c new file mode 100755 index 0000000000000000000000000000000000000000..09f88c3ab071555f41e52034a1c2c44060819d59 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/src/armCOMM_MaskTable.c @@ -0,0 +1,45 @@ +/* ---------------------------------------------------------------- + * + * + * File Name: armCOMM_MaskTable.c + * OpenMAX DL: v1.0.2 + * Revision: 12290 + * Date: Wednesday, April 9, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Mask Table to mask the end of array. + * + */ + +#include "omxtypes.h" + +#define MaskTableSize 72 + +const OMX_U16 armCOMM_qMaskTable16[MaskTableSize] = +{ + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0xFFFF, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0xFFFF, 0xFFFF, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0xFFFF, 0xFFFF, 0xFFFF, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0000, 0x0000, 0x0000, 0x0000, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0000, 0x0000, 0x0000, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0000, 0x0000, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0000, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF +}; + +const OMX_U8 armCOMM_qMaskTable8[MaskTableSize] = +{ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF +}; diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/api/armVC.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/api/armVC.h new file mode 100755 index 0000000000000000000000000000000000000000..35b510b07c0b5ac8deb7960fd3ec57f0d91e26aa --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/api/armVC.h @@ -0,0 +1,1153 @@ +/** + * + * File Name: armVC.h + * OpenMAX DL: v1.0.2 + * Revision: 12290 + * Date: Wednesday, April 9, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * File: armVideo.h + * Brief: Declares API's/Basic Data types used across the OpenMAX Video domain + * + */ + + +#ifndef _armVideo_H_ +#define _armVideo_H_ + +#include "omxVC.h" +#include "armCOMM_Bitstream.h" + +/** + * ARM specific state structure to hold Motion Estimation information. + */ + +struct m4p2_MESpec +{ + OMXVCM4P2MEParams MEParams; + OMXVCM4P2MEMode MEMode; +}; + +struct m4p10_MESpec +{ + OMXVCM4P10MEParams MEParams; + OMXVCM4P10MEMode MEMode; +}; + +typedef struct m4p2_MESpec ARMVCM4P2_MESpec; +typedef struct m4p10_MESpec ARMVCM4P10_MESpec; + +/** + * Function: armVCM4P2_CompareMV + * + * Description: + * Performs comparision of motion vectors and SAD's to decide the + * best MV and SAD + * + * Remarks: + * + * Parameters: + * [in] mvX x coordinate of the candidate motion vector + * [in] mvY y coordinate of the candidate motion vector + * [in] candSAD Candidate SAD + * [in] bestMVX x coordinate of the best motion vector + * [in] bestMVY y coordinate of the best motion vector + * [in] bestSAD best SAD + * + * Return Value: + * OMX_INT -- 1 to indicate that the current sad is the best + * 0 to indicate that it is NOT the best SAD + */ + +OMX_INT armVCM4P2_CompareMV ( + OMX_S16 mvX, + OMX_S16 mvY, + OMX_INT candSAD, + OMX_S16 bestMVX, + OMX_S16 bestMVY, + OMX_INT bestSAD); + +/** + * Function: armVCM4P2_ACDCPredict + * + * Description: + * Performs adaptive DC/AC coefficient prediction for an intra block. Prior + * to the function call, prediction direction (predDir) should be selected + * as specified in subclause 7.4.3.1 of ISO/IEC 14496-2. + * + * Remarks: + * + * Parameters: + * [in] pSrcDst pointer to the coefficient buffer which contains + * the quantized coefficient residuals (PQF) of the + * current block + * [in] pPredBufRow pointer to the coefficient row buffer + * [in] pPredBufCol pointer to the coefficient column buffer + * [in] curQP quantization parameter of the current block. curQP + * may equal to predQP especially when the current + * block and the predictor block are in the same + * macroblock. + * [in] predQP quantization parameter of the predictor block + * [in] predDir indicates the prediction direction which takes one + * of the following values: + * OMX_VIDEO_HORIZONTAL predict horizontally + * OMX_VIDEO_VERTICAL predict vertically + * [in] ACPredFlag a flag indicating if AC prediction should be + * performed. It is equal to ac_pred_flag in the bit + * stream syntax of MPEG-4 + * [in] videoComp video component type (luminance, chrominance or + * alpha) of the current block + * [in] flag This flag defines the if one wants to use this functions to + * calculate PQF (set 1, prediction) or QF (set 0, reconstruction) + * [out] pPreACPredict pointer to the predicted coefficients buffer. + * Filled ONLY if it is not NULL + * [out] pSrcDst pointer to the coefficient buffer which contains + * the quantized coefficients (QF) of the current + * block + * [out] pPredBufRow pointer to the updated coefficient row buffer + * [out] pPredBufCol pointer to the updated coefficient column buffer + * [out] pSumErr pointer to the updated sum of the difference + * between predicted and unpredicted coefficients + * If this is NULL, do not update + * + * Return Value: + * Standard OMXResult result. See enumeration for possible result codes. + * + */ + +OMXResult armVCM4P2_ACDCPredict( + OMX_S16 * pSrcDst, + OMX_S16 * pPreACPredict, + OMX_S16 * pPredBufRow, + OMX_S16 * pPredBufCol, + OMX_INT curQP, + OMX_INT predQP, + OMX_INT predDir, + OMX_INT ACPredFlag, + OMXVCM4P2VideoComponent videoComp, + OMX_U8 flag, + OMX_INT *pSumErr +); + +/** + * Function: armVCM4P2_SetPredDir + * + * Description: + * Performs detecting the prediction direction + * + * Remarks: + * + * Parameters: + * [in] blockIndex block index indicating the component type and + * position as defined in subclause 6.1.3.8, of ISO/IEC + * 14496-2. Furthermore, indexes 6 to 9 indicate the + * alpha blocks spatially corresponding to luminance + * blocks 0 to 3 in the same macroblock. + * [in] pCoefBufRow pointer to the coefficient row buffer + * [in] pQpBuf pointer to the quantization parameter buffer + * [out] predQP quantization parameter of the predictor block + * [out] predDir indicates the prediction direction which takes one + * of the following values: + * OMX_VIDEO_HORIZONTAL predict horizontally + * OMX_VIDEO_VERTICAL predict vertically + * + * Return Value: + * Standard OMXResult result. See enumeration for possible result codes. + * + */ + +OMXResult armVCM4P2_SetPredDir( + OMX_INT blockIndex, + OMX_S16 *pCoefBufRow, + OMX_S16 *pCoefBufCol, + OMX_INT *predDir, + OMX_INT *predQP, + const OMX_U8 *pQpBuf +); + +/** + * Function: armVCM4P2_EncodeVLCZigzag_Intra + * + * Description: + * Performs zigzag scanning and VLC encoding for one intra block. + * + * Remarks: + * + * Parameters: + * [in] ppBitStream pointer to the pointer to the current byte in + * the bit stream + * [in] pBitOffset pointer to the bit position in the byte pointed + * by *ppBitStream. Valid within 0 to 7. + * [in] pQDctBlkCoef pointer to the quantized DCT coefficient + * [in] predDir AC prediction direction, which is used to decide + * the zigzag scan pattern. This takes one of the + * following values: + * OMX_VIDEO_NONE AC prediction not used. + * Performs classical zigzag + * scan. + * OMX_VIDEO_HORIZONTAL Horizontal prediction. + * Performs alternate-vertical + * zigzag scan. + * OMX_VIDEO_VERTICAL Vertical prediction. + * Performs alternate-horizontal + * zigzag scan. + * [in] pattern block pattern which is used to decide whether + * this block is encoded + * [in] start start indicates whether the encoding begins with 0th element + * or 1st. + * [out] ppBitStream *ppBitStream is updated after the block is encoded, + * so that it points to the current byte in the bit + * stream buffer. + * [out] pBitOffset *pBitOffset is updated so that it points to the + * current bit position in the byte pointed by + * *ppBitStream. + * + * Return Value: + * Standard OMXResult result. See enumeration for possible result codes. + * + */ + +OMXResult armVCM4P2_EncodeVLCZigzag_Intra( + OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + const OMX_S16 *pQDctBlkCoef, + OMX_U8 predDir, + OMX_U8 pattern, + OMX_INT shortVideoHeader, + OMX_U8 start +); + +/** + * Function: armVCM4P2_DecodeVLCZigzag_Intra + * + * Description: + * Performs VLC decoding and inverse zigzag scan for one intra coded block. + * + * Remarks: + * + * Parameters: + * [in] ppBitStream pointer to the pointer to the current byte in + * the bitstream buffer + * [in] pBitOffset pointer to the bit position in the byte pointed + * to by *ppBitStream. *pBitOffset is valid within + * [0-7]. + * [in] predDir AC prediction direction which is used to decide + * the zigzag scan pattern. It takes one of the + * following values: + * OMX_VIDEO_NONE AC prediction not used; + * perform classical zigzag scan; + * OMX_VIDEO_HORIZONTAL Horizontal prediction; + * perform alternate-vertical + * zigzag scan; + * OMX_VIDEO_VERTICAL Vertical prediction; + * thus perform + * alternate-horizontal + * zigzag scan. + * [in] videoComp video component type (luminance, chrominance or + * alpha) of the current block + * [in] shortVideoHeader binary flag indicating presence of short_video_header; escape modes 0-3 are used if shortVideoHeader==0, + * and escape mode 4 is used when shortVideoHeader==1. + * [in] start start indicates whether the encoding begins with 0th element + * or 1st. + * [out] ppBitStream *ppBitStream is updated after the block is + * decoded, so that it points to the current byte + * in the bit stream buffer + * [out] pBitOffset *pBitOffset is updated so that it points to the + * current bit position in the byte pointed by + * *ppBitStream + * [out] pDst pointer to the coefficient buffer of current + * block. Should be 32-bit aligned + * + * Return Value: + * Standard OMXResult result. See enumeration for possible result codes. + * + */ + +OMXResult armVCM4P2_DecodeVLCZigzag_Intra( + const OMX_U8 ** ppBitStream, + OMX_INT * pBitOffset, + OMX_S16 * pDst, + OMX_U8 predDir, + OMX_INT shortVideoHeader, + OMX_U8 start +); + +/** + * Function: armVCM4P2_FillVLDBuffer + * + * Description: + * Performs filling of the coefficient buffer according to the run, level + * and sign, also updates the index + * + * Parameters: + * [in] storeRun Stored Run value (count of zeros) + * [in] storeLevel Stored Level value (non-zero value) + * [in] sign Flag indicating the sign of level + * [in] last status of the last flag + * [in] pIndex pointer to coefficient index in 8x8 matrix + * [out] pIndex pointer to updated coefficient index in 8x8 + * matrix + * [in] pZigzagTable pointer to the zigzag tables + * [out] pDst pointer to the coefficient buffer of current + * block. Should be 32-bit aligned + * Return Value: + * Standard OMXResult result. See enumeration for possible result codes. + * + */ + +OMXResult armVCM4P2_FillVLDBuffer( + OMX_U32 storeRun, + OMX_S16 * pDst, + OMX_S16 storeLevel, + OMX_U8 sign, + OMX_U8 last, + OMX_U8 * index, + const OMX_U8 * pZigzagTable +); + +/** + * Function: armVCM4P2_GetVLCBits + * + * Description: + * Performs escape mode decision based on the run, run+, level, level+ and + * last combinations. + * + * Remarks: + * + * Parameters: + * [in] ppBitStream pointer to the pointer to the current byte in + * the bit stream + * [in] pBitOffset pointer to the bit position in the byte pointed + * by *ppBitStream. Valid within 0 to 7 + * [in] shortVideoHeader binary flag indicating presence of short_video_header; escape modes 0-3 are used if shortVideoHeader==0, + * and escape mode 4 is used when shortVideoHeader==1. + * [in] start start indicates whether the encoding begins with + * 0th element or 1st. + * [in/out] pLast pointer to last status flag + * [in] runBeginSingleLevelEntriesL0 The run value from which level + * will be equal to 1: last == 0 + * [in] IndexBeginSingleLevelEntriesL0 Array index in the VLC table + * pointing to the + * runBeginSingleLevelEntriesL0 + * [in] runBeginSingleLevelEntriesL1 The run value from which level + * will be equal to 1: last == 1 + * [in] IndexBeginSingleLevelEntriesL1 Array index in the VLC table + * pointing to the + * runBeginSingleLevelEntriesL0 + * [in] pRunIndexTableL0 Run Index table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 0 + * [in] pVlcTableL0 VLC table for last == 0 + * [in] pRunIndexTableL1 Run Index table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 1 + * [in] pVlcTableL1 VLC table for last == 1 + * [in] pLMAXTableL0 Level MAX table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 0 + * [in] pLMAXTableL1 Level MAX table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 1 + * [in] pRMAXTableL0 Run MAX table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 0 + * [in] pRMAXTableL1 Run MAX table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 1 + * [out]pDst pointer to the coefficient buffer of current + * block. Should be 32-bit aligned + * + * Return Value: + * Standard OMXResult result. See enumeration for possible result codes. + * + */ + +OMXResult armVCM4P2_GetVLCBits ( + const OMX_U8 **ppBitStream, + OMX_INT * pBitOffset, + OMX_S16 * pDst, + OMX_INT shortVideoHeader, + OMX_U8 start, + OMX_U8 * pLast, + OMX_U8 runBeginSingleLevelEntriesL0, + OMX_U8 maxIndexForMultipleEntriesL0, + OMX_U8 maxRunForMultipleEntriesL1, + OMX_U8 maxIndexForMultipleEntriesL1, + const OMX_U8 * pRunIndexTableL0, + const ARM_VLC32 *pVlcTableL0, + const OMX_U8 * pRunIndexTableL1, + const ARM_VLC32 *pVlcTableL1, + const OMX_U8 * pLMAXTableL0, + const OMX_U8 * pLMAXTableL1, + const OMX_U8 * pRMAXTableL0, + const OMX_U8 * pRMAXTableL1, + const OMX_U8 * pZigzagTable +); + +/** + * Function: armVCM4P2_PutVLCBits + * + * Description: + * Checks the type of Escape Mode and put encoded bits for + * quantized DCT coefficients. + * + * Remarks: + * + * Parameters: + * [in] ppBitStream pointer to the pointer to the current byte in + * the bit stream + * [in] pBitOffset pointer to the bit position in the byte pointed + * by *ppBitStream. Valid within 0 to 7 + * [in] shortVideoHeader binary flag indicating presence of short_video_header; escape modes 0-3 are used if shortVideoHeader==0, + * and escape mode 4 is used when shortVideoHeader==1. + * [in] start start indicates whether the encoding begins with + * 0th element or 1st. + * [in] maxStoreRunL0 Max store possible (considering last and inter/intra) + * for last = 0 + * [in] maxStoreRunL1 Max store possible (considering last and inter/intra) + * for last = 1 + * [in] maxRunForMultipleEntriesL0 + * The run value after which level + * will be equal to 1: + * (considering last and inter/intra status) for last = 0 + * [in] maxRunForMultipleEntriesL1 + * The run value after which level + * will be equal to 1: + * (considering last and inter/intra status) for last = 1 + * [in] pRunIndexTableL0 Run Index table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 0 + * [in] pVlcTableL0 VLC table for last == 0 + * [in] pRunIndexTableL1 Run Index table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 1 + * [in] pVlcTableL1 VLC table for last == 1 + * [in] pLMAXTableL0 Level MAX table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 0 + * [in] pLMAXTableL1 Level MAX table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 1 + * [in] pRMAXTableL0 Run MAX table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 0 + * [in] pRMAXTableL1 Run MAX table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 1 + * [out] pQDctBlkCoef pointer to the quantized DCT coefficient + * [out] ppBitStream *ppBitStream is updated after the block is encoded + * so that it points to the current byte in the bit + * stream buffer. + * [out] pBitOffset *pBitOffset is updated so that it points to the + * current bit position in the byte pointed by + * *ppBitStream. + * + * Return Value: + * Standard OMXResult result. See enumeration for possible result codes. + * + */ + + +OMXResult armVCM4P2_PutVLCBits ( + OMX_U8 **ppBitStream, + OMX_INT * pBitOffset, + const OMX_S16 *pQDctBlkCoef, + OMX_INT shortVideoHeader, + OMX_U8 start, + OMX_U8 maxStoreRunL0, + OMX_U8 maxStoreRunL1, + OMX_U8 maxRunForMultipleEntriesL0, + OMX_U8 maxRunForMultipleEntriesL1, + const OMX_U8 * pRunIndexTableL0, + const ARM_VLC32 *pVlcTableL0, + const OMX_U8 * pRunIndexTableL1, + const ARM_VLC32 *pVlcTableL1, + const OMX_U8 * pLMAXTableL0, + const OMX_U8 * pLMAXTableL1, + const OMX_U8 * pRMAXTableL0, + const OMX_U8 * pRMAXTableL1, + const OMX_U8 * pZigzagTable +); +/** + * Function: armVCM4P2_FillVLCBuffer + * + * Description: + * Performs calculating the VLC bits depending on the escape type and insert + * the same in the bitstream + * + * Remarks: + * + * Parameters: + * [in] ppBitStream pointer to the pointer to the current byte in + * the bit stream + * [in] pBitOffset pointer to the bit position in the byte pointed + * by *ppBitStream. Valid within 0 to 7 + * [in] run Run value (count of zeros) to be encoded + * [in] level Level value (non-zero value) to be encoded + * [in] runPlus Calculated as runPlus = run - (RMAX + 1) + * [in] levelPlus Calculated as + * levelPlus = sign(level)*[abs(level) - LMAX] + * [in] fMode Flag indicating the escape modes + * [in] last status of the last flag + * [in] maxRunForMultipleEntries + * The run value after which level will be equal to 1: + * (considering last and inter/intra status) + * [in] pRunIndexTable Run Index table defined in + * armVCM4P2_Huff_tables_VLC.h + * [in] pVlcTable VLC table defined in armVCM4P2_Huff_tables_VLC.h + * [out] ppBitStream *ppBitStream is updated after the block is encoded + * so that it points to the current byte in the bit + * stream buffer. + * [out] pBitOffset *pBitOffset is updated so that it points to the + * current bit position in the byte pointed by + * *ppBitStream. + * + * Return Value: + * Standard OMXResult result. See enumeration for possible result codes. + * + */ + +OMXResult armVCM4P2_FillVLCBuffer ( + OMX_U8 **ppBitStream, + OMX_INT * pBitOffset, + OMX_U32 run, + OMX_S16 level, + OMX_U32 runPlus, + OMX_S16 levelPlus, + OMX_U8 fMode, + OMX_U8 last, + OMX_U8 maxRunForMultipleEntries, + const OMX_U8 *pRunIndexTable, + const ARM_VLC32 *pVlcTable +); + +/** + * Function: armVCM4P2_CheckVLCEscapeMode + * + * Description: + * Performs escape mode decision based on the run, run+, level, level+ and + * last combinations. + * + * Remarks: + * + * Parameters: + * [in] run Run value (count of zeros) to be encoded + * [in] level Level value (non-zero value) to be encoded + * [in] runPlus Calculated as runPlus = run - (RMAX + 1) + * [in] levelPlus Calculated as + * levelPlus = sign(level)*[abs(level) - LMAX] + * [in] maxStoreRun Max store possible (considering last and inter/intra) + * [in] maxRunForMultipleEntries + * The run value after which level + * will be equal to 1: + * (considering last and inter/intra status) + * [in] shortVideoHeader binary flag indicating presence of short_video_header; escape modes 0-3 are used if shortVideoHeader==0, + * and escape mode 4 is used when shortVideoHeader==1. + * [in] pRunIndexTable Run Index table defined in + * armVCM4P2_Huff_Tables_VLC.c + * (considering last and inter/intra status) + * + * + * Return Value: + * Returns an Escape mode which can take values from 0 to 3 + * 0 --> no escape mode, 1 --> escape type 1, + * 1 --> escape type 2, 3 --> escape type 3, check section 7.4.1.3 + * in the MPEG ISO standard. + * + */ + +OMX_U8 armVCM4P2_CheckVLCEscapeMode( + OMX_U32 run, + OMX_U32 runPlus, + OMX_S16 level, + OMX_S16 levelPlus, + OMX_U8 maxStoreRun, + OMX_U8 maxRunForMultipleEntries, + OMX_INT shortVideoHeader, + const OMX_U8 *pRunIndexTable +); + + +/** + * Function: armVCM4P2_BlockMatch_Integer + * + * Description: + * Performs a 16x16 block search; estimates motion vector and associated minimum SAD. + * Both the input and output motion vectors are represented using half-pixel units, and + * therefore a shift left or right by 1 bit may be required, respectively, to match the + * input or output MVs with other functions that either generate output MVs or expect + * input MVs represented using integer pixel units. + * + * Remarks: + * + * Parameters: + * [in] pSrcRefBuf pointer to the reference Y plane; points to the reference MB that + * corresponds to the location of the current macroblock in the current + * plane. + * [in] refWidth width of the reference plane + * [in] pRefRect pointer to the valid rectangular in reference plane. Relative to image origin. + * It's not limited to the image boundary, but depended on the padding. For example, + * if you pad 4 pixels outside the image border, then the value for left border + * can be -4 + * [in] pSrcCurrBuf pointer to the current macroblock extracted from original plane (linear array, + * 256 entries); must be aligned on an 8-byte boundary. + * [in] pCurrPointPos position of the current macroblock in the current plane + * [in] pSrcPreMV pointer to predicted motion vector; NULL indicates no predicted MV + * [in] pSrcPreSAD pointer to SAD associated with the predicted MV (referenced by pSrcPreMV) + * [in] searchRange search range for 16X16 integer block,the units of it is full pixel,the search range + * is the same in all directions.It is in inclusive of the boundary and specified in + * terms of integer pixel units. + * [in] pMESpec vendor-specific motion estimation specification structure; must have been allocated + * and then initialized using omxVCM4P2_MEInit prior to calling the block matching + * function. + * [in] BlockSize MacroBlock Size i.e either 16x16 or 8x8. + * [out] pDstMV pointer to estimated MV + * [out] pDstSAD pointer to minimum SAD + * + * Return Value: + * OMX_Sts_NoErr ¨C no error. + * OMX_Sts_BadArgErr ¨C bad arguments + * + */ + +OMXResult armVCM4P2_BlockMatch_Integer( + const OMX_U8 *pSrcRefBuf, + OMX_INT refWidth, + const OMXRect *pRefRect, + const OMX_U8 *pSrcCurrBuf, + const OMXVCM4P2Coordinate *pCurrPointPos, + const OMXVCMotionVector *pSrcPreMV, + const OMX_INT *pSrcPreSAD, + void *pMESpec, + OMXVCMotionVector *pDstMV, + OMX_INT *pDstSAD, + OMX_U8 BlockSize +); + +/** + * Function: armVCM4P2_BlockMatch_Half + * + * Description: + * Performs a 16x16 block match with half-pixel resolution. Returns the estimated + * motion vector and associated minimum SAD. This function estimates the half-pixel + * motion vector by interpolating the integer resolution motion vector referenced + * by the input parameter pSrcDstMV, i.e., the initial integer MV is generated + * externally. The input parameters pSrcRefBuf and pSearchPointRefPos should be + * shifted by the winning MV of 16x16 integer search prior to calling BlockMatch_Half_16x16. + * The function BlockMatch_Integer_16x16 may be used for integer motion estimation. + * + * Remarks: + * + * Parameters: + * [in] pSrcRefBuf pointer to the reference Y plane; points to the reference MB + * that corresponds to the location of the current macroblock in + * the current plane. + * [in] refWidth width of the reference plane + * [in] pRefRect reference plane valid region rectangle + * [in] pSrcCurrBuf pointer to the current macroblock extracted from original plane + * (linear array, 256 entries); must be aligned on an 8-byte boundary. + * [in] pSearchPointRefPos position of the starting point for half pixel search (specified + * in terms of integer pixel units) in the reference plane. + * [in] rndVal rounding control bit for half pixel motion estimation; + * 0=rounding control disabled; 1=rounding control enabled + * [in] pSrcDstMV pointer to the initial MV estimate; typically generated during a prior + * 16X16 integer search and its unit is half pixel. + * [in] BlockSize MacroBlock Size i.e either 16x16 or 8x8. + * [out]pSrcDstMV pointer to estimated MV + * [out]pDstSAD pointer to minimum SAD + * + * Return Value: + * OMX_Sts_NoErr ¨C no error + * OMX_Sts_BadArgErr ¨C bad arguments + * + */ + +OMXResult armVCM4P2_BlockMatch_Half( + const OMX_U8 *pSrcRefBuf, + OMX_INT refWidth, + const OMXRect *pRefRect, + const OMX_U8 *pSrcCurrBuf, + const OMXVCM4P2Coordinate *pSearchPointRefPos, + OMX_INT rndVal, + OMXVCMotionVector *pSrcDstMV, + OMX_INT *pDstSAD, + OMX_U8 BlockSize +); +/** + * Function: armVCM4P2_PadMV + * + * Description: + * Performs motion vector padding for a macroblock. + * + * Remarks: + * + * Parameters: + * [in] pSrcDstMV pointer to motion vector buffer of the current + * macroblock + * [in] pTransp pointer to transparent status buffer of the + * current macroblock + * [out] pSrcDstMV pointer to motion vector buffer in which the + * motion vectors have been padded + * Return Value: + * Standard OMXResult result. See enumeration for possible result codes. + * + */ + +OMXResult armVCM4P2_PadMV( + OMXVCMotionVector * pSrcDstMV, + OMX_U8 * pTransp +); + +/* + * H.264 Specific Declarations + */ +/* Defines */ +#define ARM_M4P10_Q_OFFSET (15) + + +/* Dequant tables */ + +extern const OMX_U8 armVCM4P10_PosToVCol4x4[16]; +extern const OMX_U8 armVCM4P10_PosToVCol2x2[4]; +extern const OMX_U8 armVCM4P10_VMatrix[6][3]; +extern const OMX_U32 armVCM4P10_MFMatrix[6][3]; + + +/* + * Description: + * This function perform the work required by the OpenMAX + * DecodeCoeffsToPair function and DecodeChromaDCCoeffsToPair. + * Since most of the code is common we share it here. + * + * Parameters: + * [in] ppBitStream Double pointer to current byte in bit stream buffer + * [in] pOffset Pointer to current bit position in the byte pointed + * to by *ppBitStream + * [in] sMaxNumCoeff Maximum number of non-zero coefficients in current + * block (4,15 or 16) + * [in] nTable Table number (0 to 4) according to the five columns + * of Table 9-5 in the H.264 spec + * [out] ppBitStream *ppBitStream is updated after each block is decoded + * [out] pOffset *pOffset is updated after each block is decoded + * [out] pNumCoeff Pointer to the number of nonzero coefficients in + * this block + * [out] ppPosCoefbuf Double pointer to destination residual + * coefficient-position pair buffer + * Return Value: + * Standard omxError result. See enumeration for possible result codes. + + */ + +OMXResult armVCM4P10_DecodeCoeffsToPair( + const OMX_U8** ppBitStream, + OMX_S32* pOffset, + OMX_U8* pNumCoeff, + OMX_U8**ppPosCoefbuf, + OMX_INT nTable, + OMX_INT sMaxNumCoeff + ); + +/* + * Description: + * Perform DC style intra prediction, averaging upper and left block + * + * Parameters: + * [in] pSrcLeft Pointer to the buffer of 16 left coefficients: + * p[x, y] (x = -1, y = 0..3) + * [in] pSrcAbove Pointer to the buffer of 16 above coefficients: + * p[x,y] (x = 0..3, y = -1) + * [in] leftStep Step of left coefficient buffer + * [in] dstStep Step of the destination buffer + * [in] availability Neighboring 16x16 MB availability flag + * [out] pDst Pointer to the destination buffer + * + * Return Value: + * None + */ + +void armVCM4P10_PredictIntraDC4x4( + const OMX_U8* pSrcLeft, + const OMX_U8 *pSrcAbove, + OMX_U8* pDst, + OMX_INT leftStep, + OMX_INT dstStep, + OMX_S32 availability +); + +/* + * Description + * Unpack a 4x4 block of coefficient-residual pair values + * + * Parameters: + * [in] ppSrc Double pointer to residual coefficient-position pair + * buffer output by CALVC decoding + * [out] ppSrc *ppSrc is updated to the start of next non empty block + * [out] pDst Pointer to unpacked 4x4 block + */ + +void armVCM4P10_UnpackBlock4x4( + const OMX_U8 **ppSrc, + OMX_S16* pDst +); + +/* + * Description + * Unpack a 2x2 block of coefficient-residual pair values + * + * Parameters: + * [in] ppSrc Double pointer to residual coefficient-position pair + * buffer output by CALVC decoding + * [out] ppSrc *ppSrc is updated to the start of next non empty block + * [out] pDst Pointer to unpacked 4x4 block + */ + +void armVCM4P10_UnpackBlock2x2( + const OMX_U8 **ppSrc, + OMX_S16* pDst +); + +/* + * Description + * Deblock one boundary pixel + * + * Parameters: + * [in] pQ0 Pointer to pixel q0 + * [in] Step Step between pixels q0 and q1 + * [in] tC0 Edge threshold value + * [in] alpha alpha threshold value + * [in] beta beta threshold value + * [in] bS deblocking strength + * [in] ChromaFlag True for chroma blocks + * [out] pQ0 Deblocked pixels + * + */ + +void armVCM4P10_DeBlockPixel( + OMX_U8 *pQ0, /* pointer to the pixel q0 */ + int Step, /* step between pixels q0 and q1 */ + int tC0, /* edge threshold value */ + int alpha, /* alpha */ + int beta, /* beta */ + int bS, /* deblocking strength */ + int ChromaFlag +); + +/** + * Function: armVCM4P10_InterpolateHalfHor_Luma + * + * Description: + * This function performs interpolation for horizontal 1/2-pel positions + * + * Remarks: + * + * [in] pSrc Pointer to top-left corner of block used to interpolate + in the reconstructed frame plane + * [in] iSrcStep Step of the source buffer. + * [in] iDstStep Step of the destination(interpolation) buffer. + * [in] iWidth Width of the current block + * [in] iHeight Height of the current block + * [out] pDst Pointer to the interpolation buffer of the 1/2-pel + * + * Return Value: + * Standard OMXResult value. + * + */ + +OMXResult armVCM4P10_InterpolateHalfHor_Luma( + const OMX_U8* pSrc, + OMX_U32 iSrcStep, + OMX_U8* pDst, + OMX_U32 iDstStep, + OMX_U32 iWidth, + OMX_U32 iHeight +); + +/** + * Function: armVCM4P10_InterpolateHalfVer_Luma + * + * Description: + * This function performs interpolation for vertical 1/2-pel positions + * around a full-pel position. + * + * Remarks: + * + * [in] pSrc Pointer to top-left corner of block used to interpolate + * in the reconstructed frame plane + * [in] iSrcStep Step of the source buffer. + * [in] iDstStep Step of the destination(interpolation) buffer. + * [in] iWidth Width of the current block + * [in] iHeight Height of the current block + * [out] pDst Pointer to the interpolation buffer of the 1/2-pel + * + * Return Value: + * Standard OMXResult value. + * + */ + +OMXResult armVCM4P10_InterpolateHalfVer_Luma( + const OMX_U8* pSrc, + OMX_U32 iSrcStep, + OMX_U8* pDst, + OMX_U32 iDstStep, + OMX_U32 iWidth, + OMX_U32 iHeight +); + +/** + * Function: armVCM4P10_InterpolateHalfDiag_Luma + * + * Description: + * This function performs interpolation for (1/2, 1/2) positions + * around a full-pel position. + * + * Remarks: + * + * [in] pSrc Pointer to top-left corner of block used to interpolate + * in the reconstructed frame plane + * [in] iSrcStep Step of the source buffer. + * [in] iDstStep Step of the destination(interpolation) buffer. + * [in] iWidth Width of the current block + * [in] iHeight Height of the current block + * [out] pDst Pointer to the interpolation buffer of the (1/2,1/2)-pel + * + * Return Value: + * Standard OMXResult value. + * + */ + +OMXResult armVCM4P10_InterpolateHalfDiag_Luma( + const OMX_U8* pSrc, + OMX_U32 iSrcStep, + OMX_U8* pDst, + OMX_U32 iDstStep, + OMX_U32 iWidth, + OMX_U32 iHeight +); + +/* + * Description: + * Transform Residual 4x4 Coefficients + * + * Parameters: + * [in] pSrc Source 4x4 block + * [out] pDst Destination 4x4 block + * + */ + +void armVCM4P10_TransformResidual4x4(OMX_S16* pDst, OMX_S16 *pSrc); + +/* + * Description: + * Forward Transform Residual 4x4 Coefficients + * + * Parameters: + * [in] pSrc Source 4x4 block + * [out] pDst Destination 4x4 block + * + */ + +void armVCM4P10_FwdTransformResidual4x4(OMX_S16* pDst, OMX_S16 *pSrc); + +OMX_INT armVCM4P10_CompareMotionCostToMV ( + OMX_S16 mvX, + OMX_S16 mvY, + OMXVCMotionVector diffMV, + OMX_INT candSAD, + OMXVCMotionVector *bestMV, + OMX_U32 nLamda, + OMX_S32 *pBestCost); + +/** + * Function: armVCCOMM_SAD + * + * Description: + * This function calculate the SAD for NxM blocks. + * + * Remarks: + * + * [in] pSrcOrg Pointer to the original block + * [in] iStepOrg Step of the original block buffer + * [in] pSrcRef Pointer to the reference block + * [in] iStepRef Step of the reference block buffer + * [in] iHeight Height of the block + * [in] iWidth Width of the block + * [out] pDstSAD Pointer of result SAD + * + * Return Value: + * Standard OMXResult value. + * + */ +OMXResult armVCCOMM_SAD( + const OMX_U8* pSrcOrg, + OMX_U32 iStepOrg, + const OMX_U8* pSrcRef, + OMX_U32 iStepRef, + OMX_S32* pDstSAD, + OMX_U32 iHeight, + OMX_U32 iWidth); + +/** + * Function: armVCCOMM_Average + * + * Description: + * This function calculates the average of two blocks and stores the result. + * + * Remarks: + * + * [in] pPred0 Pointer to the top-left corner of reference block 0 + * [in] pPred1 Pointer to the top-left corner of reference block 1 + * [in] iPredStep0 Step of reference block 0 + * [in] iPredStep1 Step of reference block 1 + * [in] iDstStep Step of the destination buffer + * [in] iWidth Width of the blocks + * [in] iHeight Height of the blocks + * [out] pDstPred Pointer to the destination buffer + * + * Return Value: + * Standard OMXResult value. + * + */ + OMXResult armVCCOMM_Average ( + const OMX_U8* pPred0, + const OMX_U8* pPred1, + OMX_U32 iPredStep0, + OMX_U32 iPredStep1, + OMX_U8* pDstPred, + OMX_U32 iDstStep, + OMX_U32 iWidth, + OMX_U32 iHeight +); + +/** + * Function: armVCM4P10_SADQuar + * + * Description: + * This function calculates the SAD between one block (pSrc) and the + * average of the other two (pSrcRef0 and pSrcRef1) + * + * Remarks: + * + * [in] pSrc Pointer to the original block + * [in] pSrcRef0 Pointer to reference block 0 + * [in] pSrcRef1 Pointer to reference block 1 + * [in] iSrcStep Step of the original block buffer + * [in] iRefStep0 Step of reference block 0 + * [in] iRefStep1 Step of reference block 1 + * [in] iHeight Height of the block + * [in] iWidth Width of the block + * [out] pDstSAD Pointer of result SAD + * + * Return Value: + * Standard OMXResult value. + * + */ +OMXResult armVCM4P10_SADQuar( + const OMX_U8* pSrc, + const OMX_U8* pSrcRef0, + const OMX_U8* pSrcRef1, + OMX_U32 iSrcStep, + OMX_U32 iRefStep0, + OMX_U32 iRefStep1, + OMX_U32* pDstSAD, + OMX_U32 iHeight, + OMX_U32 iWidth +); + +/** + * Function: armVCM4P10_Interpolate_Chroma + * + * Description: + * This function performs interpolation for chroma components. + * + * Remarks: + * + * [in] pSrc Pointer to top-left corner of block used to + * interpolate in the reconstructed frame plane + * [in] iSrcStep Step of the source buffer. + * [in] iDstStep Step of the destination(interpolation) buffer. + * [in] iWidth Width of the current block + * [in] iHeight Height of the current block + * [in] dx Fractional part of horizontal motion vector + * component in 1/8 pixel unit (0~7) + * [in] dy Fractional part of vertical motion vector + * component in 1/8 pixel unit (0~7) + * [out] pDst Pointer to the interpolation buffer + * + * Return Value: + * Standard OMXResult value. + * + */ + OMXResult armVCM4P10_Interpolate_Chroma( + OMX_U8 *pSrc, + OMX_U32 iSrcStep, + OMX_U8 *pDst, + OMX_U32 iDstStep, + OMX_U32 iWidth, + OMX_U32 iHeight, + OMX_U32 dx, + OMX_U32 dy +); + +/** + * Function: armVCM4P10_Interpolate_Luma + * + * Description: + * This function performs interpolation for luma components. + * + * Remarks: + * + * [in] pSrc Pointer to top-left corner of block used to + * interpolate in the reconstructed frame plane + * [in] iSrcStep Step of the source buffer. + * [in] iDstStep Step of the destination(interpolation) buffer. + * [in] iWidth Width of the current block + * [in] iHeight Height of the current block + * [in] dx Fractional part of horizontal motion vector + * component in 1/4 pixel unit (0~3) + * [in] dy Fractional part of vertical motion vector + * component in 1/4 pixel unit (0~3) + * [out] pDst Pointer to the interpolation buffer + * + * Return Value: + * Standard OMXResult value. + * + */ + + OMXResult armVCM4P10_Interpolate_Luma( + const OMX_U8 *pSrc, + OMX_U32 iSrcStep, + OMX_U8 *pDst, + OMX_U32 iDstStep, + OMX_U32 iWidth, + OMX_U32 iHeight, + OMX_U32 dx, + OMX_U32 dy +); + +/** + * Function: omxVCH264_DequantTransformACFromPair_U8_S16_C1_DLx + * + * Description: + * Reconstruct the 4x4 residual block from coefficient-position pair buffer, + * perform dequantisation and integer inverse transformation for 4x4 block of + * residuals and update the pair buffer pointer to next non-empty block. + * + * Remarks: + * + * Parameters: + * [in] ppSrc Double pointer to residual coefficient-position + * pair buffer output by CALVC decoding + * [in] pDC Pointer to the DC coefficient of this block, NULL + * if it doesn't exist + * [in] QP Quantization parameter + * [in] AC Flag indicating if at least one non-zero coefficient exists + * [out] pDst pointer to the reconstructed 4x4 block data + * + * Return Value: + * Standard omxError result. See enumeration for possible result codes. + * + */ + +OMXResult armVCM4P10_DequantTransformACFromPair_U8_S16_C1_DLx( + OMX_U8 **ppSrc, + OMX_S16 *pDst, + OMX_INT QP, + OMX_S16* pDC, + int AC +); + +#endif /*_armVideo_H_*/ + +/*End of File*/ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/api/armVCCOMM_s.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/api/armVCCOMM_s.h new file mode 100755 index 0000000000000000000000000000000000000000..32a0166d7f25475fc9ba572609a051aaefc642c1 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/api/armVCCOMM_s.h @@ -0,0 +1,72 @@ +;// +;// +;// File Name: armVCCOMM_s.h +;// OpenMAX DL: v1.0.2 +;// Revision: 12290 +;// Date: Wednesday, April 9, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// +;// ARM optimized OpenMAX AC header file +;// +;// Formula used: +;// MACRO for calculating median for three values. + + + + IF :LNOT::DEF:ARMVCCOMM_S_H + INCLUDE armCOMM_s.h + M_VARIANTS CortexA8, ARM1136JS + + IF ARM1136JS :LOR: CortexA8 + + ;///* + ;// * Macro: M_MEDIAN3 + ;// * + ;// * Description: Finds the median of three numbers + ;// * + ;// * Remarks: + ;// * + ;// * Parameters: + ;// * [in] x First entry for the list of three numbers. + ;// * [in] y Second entry for the list of three numbers. + ;// * Input value may be corrupted at the end of + ;// * the execution of this macro. + ;// * [in] z Third entry of the list of three numbers. + ;// * Input value corrupted at the end of the + ;// * execution of this macro. + ;// * [in] t Temporary scratch register. + ;// * [out]z Median of the three numbers. + ;// */ + + MACRO + + M_MEDIAN3 $x, $y, $z, $t + + SUBS $t, $y, $z; // if (y < z) + ADDLT $z, $z, $t; // swap y and z + SUBLT $y, $y, $t; + + ;// Now z' <= y', so there are three cases for the + ;// median value, depending on x. + + ;// 1) x <= z' <= y' : median value is z' + ;// 2) z' <= x <= y' : median value is x + ;// 3) z' <= y' <= x : median value is y' + + CMP $z, $x; // if ( x > min(y,z) ) + MOVLT $z, $x; // ans = x + + CMP $x, $y; // if ( x > max(y,z) ) + MOVGT $z, $y; // ans = max(y,z) + + MEND + ENDIF + + + + ENDIF ;// ARMACCOMM_S_H + + END \ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/api/omxVC.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/api/omxVC.h new file mode 100755 index 0000000000000000000000000000000000000000..7b3cc7289554a10744eacffc0d0af5ef39d61e8c --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/api/omxVC.h @@ -0,0 +1,4381 @@ +/** + * File: omxVC.h + * Brief: OpenMAX DL v1.0.2 - Video Coding library + * + * Copyright © 2005-2008 The Khronos Group Inc. All Rights Reserved. + * + * These materials are protected by copyright laws and contain material + * proprietary to the Khronos Group, Inc. You may use these materials + * for implementing Khronos specifications, without altering or removing + * any trademark, copyright or other notice from the specification. + * + * Khronos Group makes no, and expressly disclaims any, representations + * or warranties, express or implied, regarding these materials, including, + * without limitation, any implied warranties of merchantability or fitness + * for a particular purpose or non-infringement of any intellectual property. + * Khronos Group makes no, and expressly disclaims any, warranties, express + * or implied, regarding the correctness, accuracy, completeness, timeliness, + * and reliability of these materials. + * + * Under no circumstances will the Khronos Group, or any of its Promoters, + * Contributors or Members or their respective partners, officers, directors, + * employees, agents or representatives be liable for any damages, whether + * direct, indirect, special or consequential damages for lost revenues, + * lost profits, or otherwise, arising from or in connection with these + * materials. + * + * Khronos and OpenMAX are trademarks of the Khronos Group Inc. + * + */ + +/* *****************************************************************************************/ + +#ifndef _OMXVC_H_ +#define _OMXVC_H_ + +#include "omxtypes.h" + +#ifdef __cplusplus +extern "C" { +#endif + + +/* 6.1.1.1 Motion Vectors */ +/* In omxVC, motion vectors are represented as follows: */ + +typedef struct { + OMX_S16 dx; + OMX_S16 dy; +} OMXVCMotionVector; + + + +/** + * Function: omxVCCOMM_Average_8x (6.1.3.1.1) + * + * Description: + * This function calculates the average of two 8x4, 8x8, or 8x16 blocks. The + * result is rounded according to (a+b+1)/2. The block average function can + * be used in conjunction with half-pixel interpolation to obtain quarter + * pixel motion estimates, as described in [ISO14496-10], subclause 8.4.2.2.1. + * + * Input Arguments: + * + * pPred0 - Pointer to the top-left corner of reference block 0 + * pPred1 - Pointer to the top-left corner of reference block 1 + * iPredStep0 - Step of reference block 0 + * iPredStep1 - Step of reference block 1 + * iDstStep - Step of the destination buffer. + * iHeight - Height of the blocks + * + * Output Arguments: + * + * pDstPred - Pointer to the destination buffer. 8-byte aligned. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned under any of the following + * conditions: + * - one or more of the following pointers is NULL: pPred0, pPred1, or + * pDstPred. + * - pDstPred is not aligned on an 8-byte boundary. + * - iPredStep0 <= 0 or iPredStep0 is not a multiple of 8. + * - iPredStep1 <= 0 or iPredStep1 is not a multiple of 8. + * - iDstStep <= 0 or iDstStep is not a multiple of 8. + * - iHeight is not 4, 8, or 16. + * + */ +OMXResult omxVCCOMM_Average_8x ( + const OMX_U8 *pPred0, + const OMX_U8 *pPred1, + OMX_U32 iPredStep0, + OMX_U32 iPredStep1, + OMX_U8 *pDstPred, + OMX_U32 iDstStep, + OMX_U32 iHeight +); + + + +/** + * Function: omxVCCOMM_Average_16x (6.1.3.1.2) + * + * Description: + * This function calculates the average of two 16x16 or 16x8 blocks. The + * result is rounded according to (a+b+1)/2. The block average function can + * be used in conjunction with half-pixel interpolation to obtain quarter + * pixel motion estimates, as described in [ISO14496-10], subclause 8.4.2.2.1. + * + * Input Arguments: + * + * pPred0 - Pointer to the top-left corner of reference block 0 + * pPred1 - Pointer to the top-left corner of reference block 1 + * iPredStep0 - Step of reference block 0 + * iPredStep1 - Step of reference block 1 + * iDstStep - Step of the destination buffer + * iHeight - Height of the blocks + * + * Output Arguments: + * + * pDstPred - Pointer to the destination buffer. 16-byte aligned. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned under any of the following + * conditions: + * - one or more of the following pointers is NULL: pPred0, pPred1, or + * pDstPred. + * - pDstPred is not aligned on a 16-byte boundary. + * - iPredStep0 <= 0 or iPredStep0 is not a multiple of 16. + * - iPredStep1 <= 0 or iPredStep1 is not a multiple of 16. + * - iDstStep <= 0 or iDstStep is not a multiple of 16. + * - iHeight is not 8 or 16. + * + */ +OMXResult omxVCCOMM_Average_16x ( + const OMX_U8 *pPred0, + const OMX_U8 *pPred1, + OMX_U32 iPredStep0, + OMX_U32 iPredStep1, + OMX_U8 *pDstPred, + OMX_U32 iDstStep, + OMX_U32 iHeight +); + + + +/** + * Function: omxVCCOMM_ExpandFrame_I (6.1.3.2.1) + * + * Description: + * This function expands a reconstructed frame in-place. The unexpanded + * source frame should be stored in a plane buffer with sufficient space + * pre-allocated for edge expansion, and the input frame should be located in + * the plane buffer center. This function executes the pixel expansion by + * replicating source frame edge pixel intensities in the empty pixel + * locations (expansion region) between the source frame edge and the plane + * buffer edge. The width/height of the expansion regions on the + * horizontal/vertical edges is controlled by the parameter iExpandPels. + * + * Input Arguments: + * + * pSrcDstPlane - pointer to the top-left corner of the frame to be + * expanded; must be aligned on an 8-byte boundary. + * iFrameWidth - frame width; must be a multiple of 8. + * iFrameHeight -frame height; must be a multiple of 8. + * iExpandPels - number of pixels to be expanded in the horizontal and + * vertical directions; must be a multiple of 8. + * iPlaneStep - distance, in bytes, between the start of consecutive lines + * in the plane buffer; must be larger than or equal to + * (iFrameWidth + 2 * iExpandPels). + * + * Output Arguments: + * + * pSrcDstPlane -Pointer to the top-left corner of the frame (NOT the + * top-left corner of the plane); must be aligned on an 8-byte + * boundary. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned under any of the following + * conditions: + * - pSrcDstPlane is NULL. + * - pSrcDstPlane is not aligned on an 8-byte boundary. + * - one of the following parameters is either equal to zero or is a + * non-multiple of 8: iFrameHeight, iFrameWidth, iPlaneStep, or + * iExpandPels. + * - iPlaneStep < (iFrameWidth + 2 * iExpandPels). + * + */ +OMXResult omxVCCOMM_ExpandFrame_I ( + OMX_U8 *pSrcDstPlane, + OMX_U32 iFrameWidth, + OMX_U32 iFrameHeight, + OMX_U32 iExpandPels, + OMX_U32 iPlaneStep +); + + + +/** + * Function: omxVCCOMM_Copy8x8 (6.1.3.3.1) + * + * Description: + * Copies the reference 8x8 block to the current block. + * + * Input Arguments: + * + * pSrc - pointer to the reference block in the source frame; must be + * aligned on an 8-byte boundary. + * step - distance between the starts of consecutive lines in the reference + * frame, in bytes; must be a multiple of 8 and must be larger than + * or equal to 8. + * + * Output Arguments: + * + * pDst - pointer to the destination block; must be aligned on an 8-byte + * boundary. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned under any of the following + * conditions: + * - one or more of the following pointers is NULL: pSrc, pDst + * - one or more of the following pointers is not aligned on an 8-byte + * boundary: pSrc, pDst + * - step <8 or step is not a multiple of 8. + * + */ +OMXResult omxVCCOMM_Copy8x8 ( + const OMX_U8 *pSrc, + OMX_U8 *pDst, + OMX_INT step +); + + + +/** + * Function: omxVCCOMM_Copy16x16 (6.1.3.3.2) + * + * Description: + * Copies the reference 16x16 macroblock to the current macroblock. + * + * Input Arguments: + * + * pSrc - pointer to the reference macroblock in the source frame; must be + * aligned on a 16-byte boundary. + * step - distance between the starts of consecutive lines in the reference + * frame, in bytes; must be a multiple of 16 and must be larger + * than or equal to 16. + * + * Output Arguments: + * + * pDst - pointer to the destination macroblock; must be aligned on a + * 16-byte boundary. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned under any of the following + * conditions: + * - one or more of the following pointers is NULL: pSrc, pDst + * - one or more of the following pointers is not aligned on a 16-byte + * boundary: pSrc, pDst + * - step <16 or step is not a multiple of 16. + * + */ +OMXResult omxVCCOMM_Copy16x16 ( + const OMX_U8 *pSrc, + OMX_U8 *pDst, + OMX_INT step +); + + + +/** + * Function: omxVCCOMM_ComputeTextureErrorBlock_SAD (6.1.4.1.1) + * + * Description: + * Computes texture error of the block; also returns SAD. + * + * Input Arguments: + * + * pSrc - pointer to the source plane; must be aligned on an 8-byte + * boundary. + * srcStep - step of the source plane + * pSrcRef - pointer to the reference buffer, an 8x8 block; must be aligned + * on an 8-byte boundary. + * + * Output Arguments: + * + * pDst - pointer to the destination buffer, an 8x8 block; must be aligned + * on an 8-byte boundary. + * pDstSAD - pointer to the Sum of Absolute Differences (SAD) value + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments + * - At least one of the following + * pointers is NULL: pSrc, pSrcRef, pDst and pDstSAD. + * - pSrc is not 8-byte aligned. + * - SrcStep <= 0 or srcStep is not a multiple of 8. + * - pSrcRef is not 8-byte aligned. + * - pDst is not 8-byte aligned. + * + */ +OMXResult omxVCCOMM_ComputeTextureErrorBlock_SAD ( + const OMX_U8 *pSrc, + OMX_INT srcStep, + const OMX_U8 *pSrcRef, + OMX_S16 *pDst, + OMX_INT *pDstSAD +); + + + +/** + * Function: omxVCCOMM_ComputeTextureErrorBlock (6.1.4.1.2) + * + * Description: + * Computes the texture error of the block. + * + * Input Arguments: + * + * pSrc - pointer to the source plane. This should be aligned on an 8-byte + * boundary. + * srcStep - step of the source plane + * pSrcRef - pointer to the reference buffer, an 8x8 block. This should be + * aligned on an 8-byte boundary. + * + * Output Arguments: + * + * pDst - pointer to the destination buffer, an 8x8 block. This should be + * aligned on an 8-byte boundary. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments: + * - At least one of the following pointers is NULL: + * pSrc, pSrcRef, pDst. + * - pSrc is not 8-byte aligned. + * - SrcStep <= 0 or srcStep is not a multiple of 8. + * - pSrcRef is not 8-byte aligned. + * - pDst is not 8-byte aligned + * + */ +OMXResult omxVCCOMM_ComputeTextureErrorBlock ( + const OMX_U8 *pSrc, + OMX_INT srcStep, + const OMX_U8 *pSrcRef, + OMX_S16 *pDst +); + + + +/** + * Function: omxVCCOMM_LimitMVToRect (6.1.4.1.3) + * + * Description: + * Limits the motion vector associated with the current block/macroblock to + * prevent the motion compensated block/macroblock from moving outside a + * bounding rectangle as shown in Figure 6-1. + * + * Input Arguments: + * + * pSrcMV - pointer to the motion vector associated with the current block + * or macroblock + * pRectVOPRef - pointer to the bounding rectangle + * Xcoord, Ycoord - coordinates of the current block or macroblock + * size - size of the current block or macroblock; must be equal to 8 or + * 16. + * + * Output Arguments: + * + * pDstMV - pointer to the limited motion vector + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments. Returned if one or more of the + * following conditions is true: + * - at least one of the following pointers is NULL: + * pSrcMV, pDstMV, or pRectVOPRef. + * - size is not equal to either 8 or 16. + * - the width or height of the bounding rectangle is less than + * twice the block size. + */ +OMXResult omxVCCOMM_LimitMVToRect ( + const OMXVCMotionVector *pSrcMV, + OMXVCMotionVector *pDstMV, + const OMXRect *pRectVOPRef, + OMX_INT Xcoord, + OMX_INT Ycoord, + OMX_INT size +); + + + +/** + * Function: omxVCCOMM_SAD_16x (6.1.4.1.4) + * + * Description: + * This function calculates the SAD for 16x16 and 16x8 blocks. + * + * Input Arguments: + * + * pSrcOrg - Pointer to the original block; must be aligned on a 16-byte + * boundary. + * iStepOrg - Step of the original block buffer + * pSrcRef - Pointer to the reference block + * iStepRef - Step of the reference block buffer + * iHeight - Height of the block + * + * Output Arguments: + * + * pDstSAD - Pointer of result SAD + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments. Returned if one or more of the + * following conditions is true: + * - at least one of the following pointers is NULL: + * pSrcOrg, pDstSAD, or pSrcRef + * - pSrcOrg is not 16-byte aligned. + * - iStepOrg <= 0 or iStepOrg is not a multiple of 16 + * - iStepRef <= 0 or iStepRef is not a multiple of 16 + * - iHeight is not 8 or 16 + * + */ +OMXResult omxVCCOMM_SAD_16x ( + const OMX_U8 *pSrcOrg, + OMX_U32 iStepOrg, + const OMX_U8 *pSrcRef, + OMX_U32 iStepRef, + OMX_S32 *pDstSAD, + OMX_U32 iHeight +); + + + +/** + * Function: omxVCCOMM_SAD_8x (6.1.4.1.5) + * + * Description: + * This function calculates the SAD for 8x16, 8x8, 8x4 blocks. + * + * Input Arguments: + * + * pSrcOrg - Pointer to the original block; must be aligned on a 8-byte + * boundary. + * iStepOrg - Step of the original block buffer + * pSrcRef - Pointer to the reference block + * iStepRef - Step of the reference block buffer + * iHeight - Height of the block + * + * Output Arguments: + * + * pDstSAD -Pointer of result SAD + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments. Returned if one or more of the + * following conditions is true: + * - at least one of the following pointers is NULL: + * pSrcOrg, pDstSAD, or pSrcRef + * - pSrcOrg is not 8-byte aligned. + * - iStepOrg <= 0 or iStepOrg is not a multiple of 8 + * - iStepRef <= 0 or iStepRef is not a multiple of 8 + * - iHeight is not 4, 8 or 16 + * + */ +OMXResult omxVCCOMM_SAD_8x ( + const OMX_U8 *pSrcOrg, + OMX_U32 iStepOrg, + const OMX_U8 *pSrcRef, + OMX_U32 iStepRef, + OMX_S32*pDstSAD, + OMX_U32 iHeight +); + + + +/* 6.2.1.1 Direction */ +/* The direction enumerator is used with functions that perform AC/DC prediction and zig-zag scan. */ + +enum { + OMX_VC_NONE = 0, + OMX_VC_HORIZONTAL = 1, + OMX_VC_VERTICAL = 2 +}; + + + +/* 6.2.1.2 Bilinear Interpolation */ +/* The bilinear interpolation enumerator is used with motion estimation, motion compensation, and reconstruction functions. */ + +enum { + OMX_VC_INTEGER_PIXEL = 0, /* case a */ + OMX_VC_HALF_PIXEL_X = 1, /* case b */ + OMX_VC_HALF_PIXEL_Y = 2, /* case c */ + OMX_VC_HALF_PIXEL_XY = 3 /* case d */ +}; + + + +/* 6.2.1.3 Neighboring Macroblock Availability */ +/* Neighboring macroblock availability is indicated using the following flags: */ + +enum { + OMX_VC_UPPER = 1, /** above macroblock is available */ + OMX_VC_LEFT = 2, /** left macroblock is available */ + OMX_VC_CENTER = 4, + OMX_VC_RIGHT = 8, + OMX_VC_LOWER = 16, + OMX_VC_UPPER_LEFT = 32, /** above-left macroblock is available */ + OMX_VC_UPPER_RIGHT = 64, /** above-right macroblock is available */ + OMX_VC_LOWER_LEFT = 128, + OMX_VC_LOWER_RIGHT = 256 +}; + + + +/* 6.2.1.4 Video Components */ +/* A data type that enumerates video components is defined as follows: */ + +typedef enum { + OMX_VC_LUMINANCE, /** Luminance component */ + OMX_VC_CHROMINANCE /** chrominance component */ +} OMXVCM4P2VideoComponent; + + + +/* 6.2.1.5 MacroblockTypes */ +/* A data type that enumerates macroblock types is defined as follows: */ + +typedef enum { + OMX_VC_INTER = 0, /** P picture or P-VOP */ + OMX_VC_INTER_Q = 1, /** P picture or P-VOP */ + OMX_VC_INTER4V = 2, /** P picture or P-VOP */ + OMX_VC_INTRA = 3, /** I and P picture, I- and P-VOP */ + OMX_VC_INTRA_Q = 4, /** I and P picture, I- and P-VOP */ + OMX_VC_INTER4V_Q = 5 /** P picture or P-VOP (H.263)*/ +} OMXVCM4P2MacroblockType; + + + +/* 6.2.1.6 Coordinates */ +/* Coordinates are represented as follows: */ + +typedef struct { + OMX_INT x; + OMX_INT y; +} OMXVCM4P2Coordinate; + + + +/* 6.2.1.7 Motion Estimation Algorithms */ +/* A data type that enumerates motion estimation search methods is defined as follows: */ + +typedef enum { + OMX_VC_M4P2_FAST_SEARCH = 0, /** Fast motion search */ + OMX_VC_M4P2_FULL_SEARCH = 1 /** Full motion search */ +} OMXVCM4P2MEMode; + + + +/* 6.2.1.8 Motion Estimation Parameters */ +/* A data structure containing control parameters for + * motion estimation functions is defined as follows: + */ + +typedef struct { + OMX_INT searchEnable8x8; /** enables 8x8 search */ + OMX_INT halfPelSearchEnable; /** enables half-pel resolution */ + OMX_INT searchRange; /** search range */ + OMX_INT rndVal; /** rounding control; 0-disabled, 1-enabled*/ +} OMXVCM4P2MEParams; + + + +/* 6.2.1.9 Macroblock Information */ +/* A data structure containing macroblock parameters for + * motion estimation functions is defined as follows: + */ + +typedef struct { + OMX_S32 sliceId; /* slice number */ + OMXVCM4P2MacroblockType mbType; /* MB type: OMX_VC_INTRA, OMX_VC_INTER, or OMX_VC_INTER4 */ + OMX_S32 qp; /* quantization parameter*/ + OMX_U32 cbpy; /* CBP Luma */ + OMX_U32 cbpc; /* CBP Chroma */ + OMXVCMotionVector pMV0[2][2]; /* motion vector, represented using 1/2-pel units, + * pMV0[blocky][blockx] (blocky = 0~1, blockx =0~1) + */ + OMXVCMotionVector pMVPred[2][2]; /* motion vector prediction, represented using 1/2-pel units, + * pMVPred[blocky][blockx] (blocky = 0~1, blockx = 0~1) + */ + OMX_U8 pPredDir[2][2]; /* AC prediction direction: + * OMX_VC_NONE, OMX_VC_VERTICAL, OMX_VC_HORIZONTAL + */ +} OMXVCM4P2MBInfo, *OMXVCM4P2MBInfoPtr; + + + +/** + * Function: omxVCM4P2_FindMVpred (6.2.3.1.1) + * + * Description: + * Predicts a motion vector for the current block using the procedure + * specified in [ISO14496-2], subclause 7.6.5. The resulting predicted MV is + * returned in pDstMVPred. If the parameter pDstMVPredME if is not NULL then + * the set of three MV candidates used for prediction is also returned, + * otherwise pDstMVPredMEis NULL upon return. + * + * Input Arguments: + * + * pSrcMVCurMB - pointer to the MV buffer associated with the current Y + * macroblock; a value of NULL indicates unavailability. + * pSrcCandMV1 - pointer to the MV buffer containing the 4 MVs associated + * with the MB located to the left of the current MB; set to NULL + * if there is no MB to the left. + * pSrcCandMV2 - pointer to the MV buffer containing the 4 MVs associated + * with the MB located above the current MB; set to NULL if there + * is no MB located above the current MB. + * pSrcCandMV3 - pointer to the MV buffer containing the 4 MVs associated + * with the MB located to the right and above the current MB; set + * to NULL if there is no MB located to the above-right. + * iBlk - the index of block in the current macroblock + * pDstMVPredME - MV candidate return buffer; if set to NULL then + * prediction candidate MVs are not returned and pDstMVPredME will + * be NULL upon function return; if pDstMVPredME is non-NULL then it + * must point to a buffer containing sufficient space for three + * return MVs. + * + * Output Arguments: + * + * pDstMVPred - pointer to the predicted motion vector + * pDstMVPredME - if non-NULL upon input then pDstMVPredME points upon + * return to a buffer containing the three motion vector candidates + * used for prediction as specified in [ISO14496-2], subclause + * 7.6.5, otherwise if NULL upon input then pDstMVPredME is NULL + * upon output. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned under any of the following + * conditions: + * - the pointer pDstMVPred is NULL + * - the parameter iBlk does not fall into the range 0 <= iBlk<=3 + * + */ +OMXResult omxVCM4P2_FindMVpred ( + const OMXVCMotionVector *pSrcMVCurMB, + const OMXVCMotionVector *pSrcCandMV1, + const OMXVCMotionVector *pSrcCandMV2, + const OMXVCMotionVector *pSrcCandMV3, + OMXVCMotionVector *pDstMVPred, + OMXVCMotionVector *pDstMVPredME, + OMX_INT iBlk +); + + + +/** + * Function: omxVCM4P2_IDCT8x8blk (6.2.3.2.1) + * + * Description: + * Computes a 2D inverse DCT for a single 8x8 block, as defined in + * [ISO14496-2]. + * + * Input Arguments: + * + * pSrc - pointer to the start of the linearly arranged IDCT input buffer; + * must be aligned on a 16-byte boundary. According to + * [ISO14496-2], the input coefficient values should lie within the + * range [-2048, 2047]. + * + * Output Arguments: + * + * pDst - pointer to the start of the linearly arranged IDCT output buffer; + * must be aligned on a 16-byte boundary. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments: + * - pSrc or pDst is NULL. + * - pSrc or pDst is not 16-byte aligned. + * + */ +OMXResult omxVCM4P2_IDCT8x8blk ( + const OMX_S16 *pSrc, + OMX_S16 *pDst +); + + + +/** + * Function: omxVCM4P2_MEGetBufSize (6.2.4.1.1) + * + * Description: + * Computes the size, in bytes, of the vendor-specific specification + * structure for the following motion estimation functions: + * BlockMatch_Integer_8x8, BlockMatch_Integer_16x16, and MotionEstimationMB. + * + * Input Arguments: + * + * MEmode - motion estimation mode; available modes are defined by the + * enumerated type OMXVCM4P2MEMode + * pMEParams - motion estimation parameters + * + * Output Arguments: + * + * pSize - pointer to the number of bytes required for the specification + * structure + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - one or more of the following is true: + * - an invalid value was specified for the parameter MEmode + * - a negative or zero value was specified for the + * parameter pMEParams->searchRange + * + */ +OMXResult omxVCM4P2_MEGetBufSize ( + OMXVCM4P2MEMode MEmode, + const OMXVCM4P2MEParams *pMEParams, + OMX_U32 *pSize +); + + + +/** + * Function: omxVCM4P2_MEInit (6.2.4.1.2) + * + * Description: + * Initializes the vendor-specific specification structure required for the + * following motion estimation functions: BlockMatch_Integer_8x8, + * BlockMatch_Integer_16x16, and MotionEstimationMB. Memory for the + * specification structure *pMESpec must be allocated prior to calling the + * function, and should be aligned on a 4-byte boundary. Following + * initialization by this function, the vendor-specific structure *pMESpec + * should contain an implementation-specific representation of all motion + * estimation parameters received via the structure pMEParams, for example + * rndVal, searchRange, etc. The number of bytes required for the + * specification structure can be determined using the function + * omxVCM4P2_MEGetBufSize. + * + * Input Arguments: + * + * MEmode - motion estimation mode; available modes are defined by the + * enumerated type OMXVCM4P2MEMode + * pMEParams - motion estimation parameters + * pMESpec - pointer to the uninitialized ME specification structure + * + * Output Arguments: + * + * pMESpec - pointer to the initialized ME specification structure + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - one or more of the following is true: + * - an invalid value was specified for the parameter MEmode + * - a negative or zero value was specified for the + * parameter pMEParams->searchRange + * + */ +OMXResult omxVCM4P2_MEInit ( + OMXVCM4P2MEMode MEmode, + const OMXVCM4P2MEParams*pMEParams, + void *pMESpec +); + + + +/** + * Function: omxVCM4P2_BlockMatch_Integer_16x16 (6.2.4.2.1) + * + * Description: + * Performs a 16x16 block search; estimates motion vector and associated + * minimum SAD. Both the input and output motion vectors are represented using + * half-pixel units, and therefore a shift left or right by 1 bit may be + * required, respectively, to match the input or output MVs with other + * functions that either generate output MVs or expect input MVs represented + * using integer pixel units. + * + * Input Arguments: + * + * pSrcRefBuf - pointer to the reference Y plane; points to the reference + * MB that corresponds to the location of the current macroblock in + * the current plane. + * refWidth - width of the reference plane + * pRefRect - pointer to the valid reference plane rectangle; coordinates + * are specified relative to the image origin. Rectangle + * boundaries may extend beyond image boundaries if the image has + * been padded. For example, if padding extends 4 pixels beyond + * frame border, then the value for the left border could be set to + * -4. + * pSrcCurrBuf - pointer to the current block in the current macroblock + * buffer extracted from the original plane (linear array, 256 + * entries); must be aligned on a 16-byte boundary. The number of + * bytes between lines (step) is 16. + * pCurrPointPos - position of the current macroblock in the current plane + * pSrcPreMV - pointer to predicted motion vector; NULL indicates no + * predicted MV + * pSrcPreSAD - pointer to SAD associated with the predicted MV (referenced + * by pSrcPreMV); may be set to NULL if unavailable. + * pMESpec - vendor-specific motion estimation specification structure; + * must have been allocated and then initialized using + * omxVCM4P2_MEInit prior to calling the block matching function. + * + * Output Arguments: + * + * pDstMV - pointer to estimated MV + * pDstSAD - pointer to minimum SAD + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments. Returned if one of the following + * conditions is true: + * - at least one of the following pointers is NULL: pSrcRefBuf, + * pRefRect, pSrcCurrBuff, pCurrPointPos, pDstMV, pDstSAD or + * pMESpec, or + * - pSrcCurrBuf is not 16-byte aligned + * + */ +OMXResult omxVCM4P2_BlockMatch_Integer_16x16 ( + const OMX_U8 *pSrcRefBuf, + OMX_INT refWidth, + const OMXRect *pRefRect, + const OMX_U8 *pSrcCurrBuf, + const OMXVCM4P2Coordinate *pCurrPointPos, + const OMXVCMotionVector*pSrcPreMV, + const OMX_INT *pSrcPreSAD, + void *pMESpec, + OMXVCMotionVector*pDstMV, + OMX_INT *pDstSAD +); + + + +/** + * Function: omxVCM4P2_BlockMatch_Integer_8x8 (6.2.4.2.2) + * + * Description: + * Performs an 8x8 block search; estimates motion vector and associated + * minimum SAD. Both the input and output motion vectors are represented + * using half-pixel units, and therefore a shift left or right by 1 bit may be + * required, respectively, to match the input or output MVs with other + * functions that either generate output MVs or expect input MVs represented + * using integer pixel units. + * + * Input Arguments: + * + * pSrcRefBuf - pointer to the reference Y plane; points to the reference + * block that corresponds to the location of the current 8x8 block + * in the current plane. + * refWidth - width of the reference plane + * pRefRect - pointer to the valid reference plane rectangle; coordinates + * are specified relative to the image origin. Rectangle + * boundaries may extend beyond image boundaries if the image has + * been padded. + * pSrcCurrBuf - pointer to the current block in the current macroblock + * buffer extracted from the original plane (linear array, 128 + * entries); must be aligned on an 8-byte boundary. The number of + * bytes between lines (step) is 16 bytes. + * pCurrPointPos - position of the current block in the current plane + * pSrcPreMV - pointer to predicted motion vector; NULL indicates no + * predicted MV + * pSrcPreSAD - pointer to SAD associated with the predicted MV (referenced + * by pSrcPreMV); may be set to NULL if unavailable. + * pMESpec - vendor-specific motion estimation specification structure; + * must have been allocated and then initialized using + * omxVCM4P2_MEInit prior to calling the block matching function. + * + * Output Arguments: + * + * pDstMV - pointer to estimated MV + * pDstSAD - pointer to minimum SAD + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments. Returned if one of the following + * conditions is true: + * - at least one of the following pointers is NULL: pSrcRefBuf, + * pRefRect, pSrcCurrBuff, pCurrPointPos, pDstMV, pDstSAD or + * pMESpec, or + * - pSrcCurrBuf is not 8-byte aligned + * + */ +OMXResult omxVCM4P2_BlockMatch_Integer_8x8 ( + const OMX_U8 *pSrcRefBuf, + OMX_INT refWidth, + const OMXRect *pRefRect, + const OMX_U8 *pSrcCurrBuf, + const OMXVCM4P2Coordinate *pCurrPointPos, + const OMXVCMotionVector *pSrcPreMV, + const OMX_INT *pSrcPreSAD, + void *pMESpec, + OMXVCMotionVector *pDstMV, + OMX_INT *pDstSAD +); + + + +/** + * Function: omxVCM4P2_BlockMatch_Half_16x16 (6.2.4.2.3) + * + * Description: + * Performs a 16x16 block match with half-pixel resolution. Returns the + * estimated motion vector and associated minimum SAD. This function + * estimates the half-pixel motion vector by interpolating the integer + * resolution motion vector referenced by the input parameter pSrcDstMV, i.e., + * the initial integer MV is generated externally. The input parameters + * pSrcRefBuf and pSearchPointRefPos should be shifted by the winning MV of + * 16x16 integer search prior to calling BlockMatch_Half_16x16. The function + * BlockMatch_Integer_16x16 may be used for integer motion estimation. + * + * Input Arguments: + * + * pSrcRefBuf - pointer to the reference Y plane; points to the reference + * macroblock that corresponds to the location of the current + * macroblock in the current plane. + * refWidth - width of the reference plane + * pRefRect - reference plane valid region rectangle + * pSrcCurrBuf - pointer to the current block in the current macroblock + * buffer extracted from the original plane (linear array, 256 + * entries); must be aligned on a 16-byte boundary. The number of + * bytes between lines (step) is 16. + * pSearchPointRefPos - position of the starting point for half pixel + * search (specified in terms of integer pixel units) in the + * reference plane, i.e., the reference position pointed to by the + * predicted motion vector. + * rndVal - rounding control parameter: 0 - disabled; 1 - enabled. + * pSrcDstMV - pointer to the initial MV estimate; typically generated + * during a prior 16X16 integer search; specified in terms of + * half-pixel units. + * + * Output Arguments: + * + * pSrcDstMV - pointer to estimated MV + * pDstSAD - pointer to minimum SAD + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments. Returned if one of the following + * conditions is true: + * - at least one of the following pointers is NULL: pSrcRefBuf, + * pRefRect, pSrcCurrBuff, pSearchPointRefPos, pSrcDstMV. + * - pSrcCurrBuf is not 16-byte aligned, or + * + */ +OMXResult omxVCM4P2_BlockMatch_Half_16x16 ( + const OMX_U8 *pSrcRefBuf, + OMX_INT refWidth, + const OMXRect *pRefRect, + const OMX_U8 *pSrcCurrBuf, + const OMXVCM4P2Coordinate *pSearchPointRefPos, + OMX_INT rndVal, + OMXVCMotionVector *pSrcDstMV, + OMX_INT *pDstSAD +); + + + +/** + * Function: omxVCM4P2_BlockMatch_Half_8x8 (6.2.4.2.4) + * + * Description: + * Performs an 8x8 block match with half-pixel resolution. Returns the + * estimated motion vector and associated minimum SAD. This function + * estimates the half-pixel motion vector by interpolating the integer + * resolution motion vector referenced by the input parameter pSrcDstMV, i.e., + * the initial integer MV is generated externally. The input parameters + * pSrcRefBuf and pSearchPointRefPos should be shifted by the winning MV of + * 8x8 integer search prior to calling BlockMatch_Half_8x8. The function + * BlockMatch_Integer_8x8 may be used for integer motion estimation. + * + * Input Arguments: + * + * pSrcRefBuf - pointer to the reference Y plane; points to the reference + * block that corresponds to the location of the current 8x8 block + * in the current plane. + * refWidth - width of the reference plane + * pRefRect - reference plane valid region rectangle + * pSrcCurrBuf - pointer to the current block in the current macroblock + * buffer extracted from the original plane (linear array, 128 + * entries); must be aligned on a 8-byte boundary. The number of + * bytes between lines (step) is 16. + * pSearchPointRefPos - position of the starting point for half pixel + * search (specified in terms of integer pixel units) in the + * reference plane. + * rndVal - rounding control parameter: 0 - disabled; 1 - enabled. + * pSrcDstMV - pointer to the initial MV estimate; typically generated + * during a prior 8x8 integer search, specified in terms of + * half-pixel units. + * + * Output Arguments: + * + * pSrcDstMV - pointer to estimated MV + * pDstSAD - pointer to minimum SAD + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments. Returned if one of the following + * conditions is true: + * - at least one of the following pointers is NULL: + * pSrcRefBuf, pRefRect, pSrcCurrBuff, pSearchPointRefPos, pSrcDstMV + * - pSrcCurrBuf is not 8-byte aligned + * + */ +OMXResult omxVCM4P2_BlockMatch_Half_8x8 ( + const OMX_U8 *pSrcRefBuf, + OMX_INT refWidth, + const OMXRect *pRefRect, + const OMX_U8 *pSrcCurrBuf, + const OMXVCM4P2Coordinate *pSearchPointRefPos, + OMX_INT rndVal, + OMXVCMotionVector *pSrcDstMV, + OMX_INT *pDstSAD +); + + + +/** + * Function: omxVCM4P2_MotionEstimationMB (6.2.4.3.1) + * + * Description: + * Performs motion search for a 16x16 macroblock. Selects best motion search + * strategy from among inter-1MV, inter-4MV, and intra modes. Supports + * integer and half pixel resolution. + * + * Input Arguments: + * + * pSrcCurrBuf - pointer to the top-left corner of the current MB in the + * original picture plane; must be aligned on a 16-byte boundary. + * The function does not expect source data outside the region + * bounded by the MB to be available; for example it is not + * necessary for the caller to guarantee the availability of + * pSrcCurrBuf[-SrcCurrStep], i.e., the row of pixels above the MB + * to be processed. + * srcCurrStep - width of the original picture plane, in terms of full + * pixels; must be a multiple of 16. + * pSrcRefBuf - pointer to the reference Y plane; points to the reference + * plane location corresponding to the location of the current + * macroblock in the current plane; must be aligned on a 16-byte + * boundary. + * srcRefStep - width of the reference picture plane, in terms of full + * pixels; must be a multiple of 16. + * pRefRect - reference plane valid region rectangle, specified relative to + * the image origin + * pCurrPointPos - position of the current macroblock in the current plane + * pMESpec - pointer to the vendor-specific motion estimation specification + * structure; must be allocated and then initialized using + * omxVCM4P2_MEInit prior to calling this function. + * pMBInfo - array, of dimension four, containing pointers to information + * associated with four nearby MBs: + * - pMBInfo[0] - pointer to left MB information + * - pMBInfo[1] - pointer to top MB information + * - pMBInfo[2] - pointer to top-left MB information + * - pMBInfo[3] - pointer to top-right MB information + * Any pointer in the array may be set equal to NULL if the + * corresponding MB doesn't exist. For each MB, the following structure + * members are used: + * - mbType - macroblock type, either OMX_VC_INTRA, OMX_VC_INTER, or + * OMX_VC_INTER4V + * - pMV0[2][2] - estimated motion vectors; represented + * in 1/2 pixel units + * - sliceID - number of the slice to which the MB belongs + * pSrcDstMBCurr - pointer to information structure for the current MB. + * The following entries should be set prior to calling the + * function: sliceID - the number of the slice the to which the + * current MB belongs. The structure elements cbpy and cbpc are + * ignored. + * + * Output Arguments: + * + * pSrcDstMBCurr - pointer to updated information structure for the current + * MB after MB-level motion estimation has been completed. The + * following structure members are updated by the ME function: + * - mbType - macroblock type: OMX_VC_INTRA, OMX_VC_INTER, or + * OMX_VC_INTER4V. + * - pMV0[2][2] - estimated motion vectors; represented in + * terms of 1/2 pel units. + * - pMVPred[2][2] - predicted motion vectors; represented + * in terms of 1/2 pel units. + * The structure members cbpy and cbpc are not updated by the function. + * pDstSAD - pointer to the minimum SAD for INTER1V, or sum of minimum SADs + * for INTER4V + * pDstBlockSAD - pointer to an array of SAD values for each of the four + * 8x8 luma blocks in the MB. The block SADs are in scan order for + * each MB. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments. Returned if one or more of the + * following conditions is true: + * - at least one of the following pointers is NULL: pSrcCurrBuf, + * pSrcRefBuf, pRefRect, pCurrPointPos, pMBInter, pMBIntra, + * pSrcDstMBCurr, or pDstSAD. + * + */ +OMXResult omxVCM4P2_MotionEstimationMB ( + const OMX_U8 *pSrcCurrBuf, + OMX_S32 srcCurrStep, + const OMX_U8 *pSrcRefBuf, + OMX_S32 srcRefStep, + const OMXRect*pRefRect, + const OMXVCM4P2Coordinate *pCurrPointPos, + void *pMESpec, + const OMXVCM4P2MBInfoPtr *pMBInfo, + OMXVCM4P2MBInfo *pSrcDstMBCurr, + OMX_U16 *pDstSAD, + OMX_U16 *pDstBlockSAD +); + + + +/** + * Function: omxVCM4P2_DCT8x8blk (6.2.4.4.1) + * + * Description: + * Computes a 2D forward DCT for a single 8x8 block, as defined in + * [ISO14496-2]. + * + * Input Arguments: + * + * pSrc - pointer to the start of the linearly arranged input buffer; must + * be aligned on a 16-byte boundary. Input values (pixel + * intensities) are valid in the range [-255,255]. + * + * Output Arguments: + * + * pDst - pointer to the start of the linearly arranged output buffer; must + * be aligned on a 16-byte boundary. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments, returned if: + * - pSrc or pDst is NULL. + * - pSrc or pDst is not 16-byte aligned. + * + */ +OMXResult omxVCM4P2_DCT8x8blk ( + const OMX_S16 *pSrc, + OMX_S16 *pDst +); + + + +/** + * Function: omxVCM4P2_QuantIntra_I (6.2.4.4.2) + * + * Description: + * Performs quantization on intra block coefficients. This function supports + * bits_per_pixel == 8. + * + * Input Arguments: + * + * pSrcDst - pointer to the input intra block coefficients; must be aligned + * on a 16-byte boundary. + * QP - quantization parameter (quantizer_scale). + * blockIndex - block index indicating the component type and position, + * valid in the range 0 to 5, as defined in [ISO14496-2], subclause + * 6.1.3.8. + * shortVideoHeader - binary flag indicating presence of + * short_video_header; shortVideoHeader==1 selects linear intra DC + * mode, and shortVideoHeader==0 selects non linear intra DC mode. + * + * Output Arguments: + * + * pSrcDst - pointer to the output (quantized) interblock coefficients. + * When shortVideoHeader==1, AC coefficients are saturated on the + * interval [-127, 127], and DC coefficients are saturated on the + * interval [1, 254]. When shortVideoHeader==0, AC coefficients + * are saturated on the interval [-2047, 2047]. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments: + * - pSrcDst is NULL. + * - blockIndex < 0 or blockIndex >= 10 + * - QP <= 0 or QP >= 32. + * + */ +OMXResult omxVCM4P2_QuantIntra_I ( + OMX_S16 *pSrcDst, + OMX_U8 QP, + OMX_INT blockIndex, + OMX_INT shortVideoHeader +); + + + +/** + * Function: omxVCM4P2_QuantInter_I (6.2.4.4.3) + * + * Description: + * Performs quantization on an inter coefficient block; supports + * bits_per_pixel == 8. + * + * Input Arguments: + * + * pSrcDst - pointer to the input inter block coefficients; must be aligned + * on a 16-byte boundary. + * QP - quantization parameter (quantizer_scale) + * shortVideoHeader - binary flag indicating presence of short_video_header; + * shortVideoHeader==1 selects linear intra DC mode, and + * shortVideoHeader==0 selects non linear intra DC mode. + * + * Output Arguments: + * + * pSrcDst - pointer to the output (quantized) interblock coefficients. + * When shortVideoHeader==1, AC coefficients are saturated on the + * interval [-127, 127], and DC coefficients are saturated on the + * interval [1, 254]. When shortVideoHeader==0, AC coefficients + * are saturated on the interval [-2047, 2047]. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments: + * - pSrcDst is NULL. + * - QP <= 0 or QP >= 32. + * + */ +OMXResult omxVCM4P2_QuantInter_I ( + OMX_S16 *pSrcDst, + OMX_U8 QP, + OMX_INT shortVideoHeader +); + + + +/** + * Function: omxVCM4P2_TransRecBlockCoef_intra (6.2.4.4.4) + * + * Description: + * Quantizes the DCT coefficients, implements intra block AC/DC coefficient + * prediction, and reconstructs the current intra block texture for prediction + * on the next frame. Quantized row and column coefficients are returned in + * the updated coefficient buffers. + * + * Input Arguments: + * + * pSrc - pointer to the pixels of current intra block; must be aligned on + * an 8-byte boundary. + * pPredBufRow - pointer to the coefficient row buffer containing + * ((num_mb_per_row * 2 + 1) * 8) elements of type OMX_S16. + * Coefficients are organized into blocks of eight as described + * below (Internal Prediction Coefficient Update Procedures). The + * DC coefficient is first, and the remaining buffer locations + * contain the quantized AC coefficients. Each group of eight row + * buffer elements combined with one element eight elements ahead + * contains the coefficient predictors of the neighboring block + * that is spatially above or to the left of the block currently to + * be decoded. A negative-valued DC coefficient indicates that this + * neighboring block is not INTRA-coded or out of bounds, and + * therefore the AC and DC coefficients are invalid. Pointer must + * be aligned on an 8-byte boundary. + * pPredBufCol - pointer to the prediction coefficient column buffer + * containing 16 elements of type OMX_S16. Coefficients are + * organized as described in section 6.2.2.5. Pointer must be + * aligned on an 8-byte boundary. + * pSumErr - pointer to a flag indicating whether or not AC prediction is + * required; AC prediction is enabled if *pSumErr >=0, but the + * value is not used for coefficient prediction, i.e., the sum of + * absolute differences starts from 0 for each call to this + * function. Otherwise AC prediction is disabled if *pSumErr < 0 . + * blockIndex - block index indicating the component type and position, as + * defined in [ISO14496-2], subclause 6.1.3.8. + * curQp - quantization parameter of the macroblock to which the current + * block belongs + * pQpBuf - pointer to a 2-element quantization parameter buffer; pQpBuf[0] + * contains the quantization parameter associated with the 8x8 + * block left of the current block (QPa), and pQpBuf[1] contains + * the quantization parameter associated with the 8x8 block above + * the current block (QPc). In the event that the corresponding + * block is outside of the VOP bound, the Qp value will not affect + * the intra prediction process, as described in [ISO14496-2], + * sub-clause 7.4.3.3, Adaptive AC Coefficient Prediction. + * srcStep - width of the source buffer; must be a multiple of 8. + * dstStep - width of the reconstructed destination buffer; must be a + * multiple of 16. + * shortVideoHeader - binary flag indicating presence of + * short_video_header; shortVideoHeader==1 selects linear intra DC + * mode, and shortVideoHeader==0 selects non linear intra DC mode. + * + * Output Arguments: + * + * pDst - pointer to the quantized DCT coefficient buffer; pDst[0] contains + * the predicted DC coefficient; the remaining entries contain the + * quantized AC coefficients (without prediction). The pointer + * pDstmust be aligned on a 16-byte boundary. + * pRec - pointer to the reconstructed texture; must be aligned on an + * 8-byte boundary. + * pPredBufRow - pointer to the updated coefficient row buffer + * pPredBufCol - pointer to the updated coefficient column buffer + * pPreACPredict - if prediction is enabled, the parameter points to the + * start of the buffer containing the coefficient differences for + * VLC encoding. The entry pPreACPredict[0]indicates prediction + * direction for the current block and takes one of the following + * values: OMX_VC_NONE (prediction disabled), OMX_VC_HORIZONTAL, or + * OMX_VC_VERTICAL. The entries + * pPreACPredict[1]-pPreACPredict[7]contain predicted AC + * coefficients. If prediction is disabled (*pSumErr<0) then the + * contents of this buffer are undefined upon return from the + * function + * pSumErr - pointer to the value of the accumulated AC coefficient errors, + * i.e., sum of the absolute differences between predicted and + * unpredicted AC coefficients + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - Bad arguments: + * - At least one of the following pointers is NULL: pSrc, pDst, pRec, + * pCoefBufRow, pCoefBufCol, pQpBuf, pPreACPredict, pSumErr. + * - blockIndex < 0 or blockIndex >= 10; + * - curQP <= 0 or curQP >= 32. + * - srcStep, or dstStep <= 0 or not a multiple of 8. + * - pDst is not 16-byte aligned: . + * - At least one of the following pointers is not 8-byte aligned: + * pSrc, pRec. + * + * Note: The coefficient buffers must be updated in accordance with the + * update procedures defined in section in 6.2.2. + * + */ +OMXResult omxVCM4P2_TransRecBlockCoef_intra ( + const OMX_U8 *pSrc, + OMX_S16 *pDst, + OMX_U8 *pRec, + OMX_S16 *pPredBufRow, + OMX_S16 *pPredBufCol, + OMX_S16 *pPreACPredict, + OMX_INT *pSumErr, + OMX_INT blockIndex, + OMX_U8 curQp, + const OMX_U8 *pQpBuf, + OMX_INT srcStep, + OMX_INT dstStep, + OMX_INT shortVideoHeader +); + + + +/** + * Function: omxVCM4P2_TransRecBlockCoef_inter (6.2.4.4.5) + * + * Description: + * Implements DCT, and quantizes the DCT coefficients of the inter block + * while reconstructing the texture residual. There is no boundary check for + * the bit stream buffer. + * + * Input Arguments: + * + * pSrc -pointer to the residuals to be encoded; must be aligned on an + * 16-byte boundary. + * QP - quantization parameter. + * shortVideoHeader - binary flag indicating presence of short_video_header; + * shortVideoHeader==1 selects linear intra DC mode, and + * shortVideoHeader==0 selects non linear intra DC mode. + * + * Output Arguments: + * + * pDst - pointer to the quantized DCT coefficients buffer; must be aligned + * on a 16-byte boundary. + * pRec - pointer to the reconstructed texture residuals; must be aligned + * on a 16-byte boundary. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments: + * - At least one of the following pointers is either NULL or + * not 16-byte aligned: + * - pSrc + * - pDst + * - pRec + * - QP <= 0 or QP >= 32. + * + */ +OMXResult omxVCM4P2_TransRecBlockCoef_inter ( + const OMX_S16 *pSrc, + OMX_S16 *pDst, + OMX_S16 *pRec, + OMX_U8 QP, + OMX_INT shortVideoHeader +); + + + +/** + * Function: omxVCM4P2_EncodeVLCZigzag_IntraDCVLC (6.2.4.5.2) + * + * Description: + * Performs zigzag scan and VLC encoding of AC and DC coefficients for one + * intra block. Two versions of the function (DCVLC and ACVLC) are provided + * in order to support the two different methods of processing DC + * coefficients, as described in [ISO14496-2], subclause 7.4.1.4, "Intra DC + * Coefficient Decoding for the Case of Switched VLC Encoding". + * + * Input Arguments: + * + * ppBitStream - double pointer to the current byte in the bitstream + * pBitOffset - pointer to the bit position in the byte pointed by + * *ppBitStream. Valid within 0 to 7. + * pQDctBlkCoef - pointer to the quantized DCT coefficient + * predDir - AC prediction direction, which is used to decide the zigzag + * scan pattern; takes one of the following values: + * - OMX_VC_NONE - AC prediction not used. + * Performs classical zigzag scan. + * - OMX_VC_HORIZONTAL - Horizontal prediction. + * Performs alternate-vertical zigzag scan. + * - OMX_VC_VERTICAL - Vertical prediction. + * Performs alternate-horizontal zigzag scan. + * pattern - block pattern which is used to decide whether this block is + * encoded + * shortVideoHeader - binary flag indicating presence of + * short_video_header; escape modes 0-3 are used if + * shortVideoHeader==0, and escape mode 4 is used when + * shortVideoHeader==1. + * videoComp - video component type (luminance, chrominance) of the current + * block + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after the block is encoded, so + * that it points to the current byte in the bit stream buffer. + * pBitOffset - *pBitOffset is updated so that it points to the current bit + * position in the byte pointed by *ppBitStream. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - Bad arguments: + * - At least one of the following pointers is NULL: ppBitStream, + * *ppBitStream, pBitOffset, pQDctBlkCoef. + * - *pBitOffset < 0, or *pBitOffset >7. + * - PredDir is not one of: OMX_VC_NONE, OMX_VC_HORIZONTAL, or + * OMX_VC_VERTICAL. + * - VideoComp is not one component of enum OMXVCM4P2VideoComponent. + * + */ +OMXResult omxVCM4P2_EncodeVLCZigzag_IntraDCVLC ( + OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + const OMX_S16 *pQDctBlkCoef, + OMX_U8 predDir, + OMX_U8 pattern, + OMX_INT shortVideoHeader, + OMXVCM4P2VideoComponent videoComp +); + + + +/** + * Function: omxVCM4P2_EncodeVLCZigzag_IntraACVLC (6.2.4.5.2) + * + * Description: + * Performs zigzag scan and VLC encoding of AC and DC coefficients for one + * intra block. Two versions of the function (DCVLC and ACVLC) are provided + * in order to support the two different methods of processing DC + * coefficients, as described in [ISO14496-2], subclause 7.4.1.4, Intra DC + * Coefficient Decoding for the Case of Switched VLC Encoding. + * + * Input Arguments: + * + * ppBitStream - double pointer to the current byte in the bitstream + * pBitOffset - pointer to the bit position in the byte pointed by + * *ppBitStream. Valid within 0 to 7. + * pQDctBlkCoef - pointer to the quantized DCT coefficient + * predDir - AC prediction direction, which is used to decide the zigzag + * scan pattern; takes one of the following values: + * - OMX_VC_NONE - AC prediction not used. + * Performs classical zigzag scan. + * - OMX_VC_HORIZONTAL - Horizontal prediction. + * Performs alternate-vertical zigzag scan. + * - OMX_VC_VERTICAL - Vertical prediction. + * Performs alternate-horizontal zigzag scan. + * pattern - block pattern which is used to decide whether this block is + * encoded + * shortVideoHeader - binary flag indicating presence of + * short_video_header; escape modes 0-3 are used if + * shortVideoHeader==0, and escape mode 4 is used when + * shortVideoHeader==1. + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after the block is encoded, so + * that it points to the current byte in the bit stream buffer. + * pBitOffset - *pBitOffset is updated so that it points to the current bit + * position in the byte pointed by *ppBitStream. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - Bad arguments: + * - At least one of the following pointers is NULL: ppBitStream, + * *ppBitStream, pBitOffset, pQDctBlkCoef. + * - *pBitOffset < 0, or *pBitOffset >7. + * - PredDir is not one of: OMX_VC_NONE, OMX_VC_HORIZONTAL, or + * OMX_VC_VERTICAL. + * - VideoComp is not one component of enum OMXVCM4P2VideoComponent. + * + */ +OMXResult omxVCM4P2_EncodeVLCZigzag_IntraACVLC ( + OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + const OMX_S16 *pQDctBlkCoef, + OMX_U8 predDir, + OMX_U8 pattern, + OMX_INT shortVideoHeader +); + + + +/** + * Function: omxVCM4P2_EncodeVLCZigzag_Inter (6.2.4.5.3) + * + * Description: + * Performs classical zigzag scanning and VLC encoding for one inter block. + * + * Input Arguments: + * + * ppBitStream - pointer to the pointer to the current byte in the bit + * stream + * pBitOffset - pointer to the bit position in the byte pointed by + * *ppBitStream. Valid within 0 to 7 + * pQDctBlkCoef - pointer to the quantized DCT coefficient + * pattern - block pattern which is used to decide whether this block is + * encoded + * shortVideoHeader - binary flag indicating presence of + * short_video_header; escape modes 0-3 are used if + * shortVideoHeader==0, and escape mode 4 is used when + * shortVideoHeader==1. + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after the block is encoded so that + * it points to the current byte in the bit stream buffer. + * pBitOffset - *pBitOffset is updated so that it points to the current bit + * position in the byte pointed by *ppBitStream. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - Bad arguments + * - At least one of the pointers: is NULL: ppBitStream, *ppBitStream, + * pBitOffset, pQDctBlkCoef + * - *pBitOffset < 0, or *pBitOffset >7. + * + */ +OMXResult omxVCM4P2_EncodeVLCZigzag_Inter ( + OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + const OMX_S16 *pQDctBlkCoef, + OMX_U8 pattern, + OMX_INT shortVideoHeader +); + + + +/** + * Function: omxVCM4P2_EncodeMV (6.2.4.5.4) + * + * Description: + * Predicts a motion vector for the current macroblock, encodes the + * difference, and writes the output to the stream buffer. The input MVs + * pMVCurMB, pSrcMVLeftMB, pSrcMVUpperMB, and pSrcMVUpperRightMB should lie + * within the ranges associated with the input parameter fcodeForward, as + * described in [ISO14496-2], subclause 7.6.3. This function provides a + * superset of the functionality associated with the function + * omxVCM4P2_FindMVpred. + * + * Input Arguments: + * + * ppBitStream - double pointer to the current byte in the bitstream buffer + * pBitOffset - index of the first free (next available) bit in the stream + * buffer referenced by *ppBitStream, valid in the range 0 to 7. + * pMVCurMB - pointer to the current macroblock motion vector; a value of + * NULL indicates unavailability. + * pSrcMVLeftMB - pointer to the source left macroblock motion vector; a + * value of NULLindicates unavailability. + * pSrcMVUpperMB - pointer to source upper macroblock motion vector; a + * value of NULL indicates unavailability. + * pSrcMVUpperRightMB - pointer to source upper right MB motion vector; a + * value of NULL indicates unavailability. + * fcodeForward - an integer with values from 1 to 7; used in encoding + * motion vectors related to search range, as described in + * [ISO14496-2], subclause 7.6.3. + * MBType - macro block type, valid in the range 0 to 5 + * + * Output Arguments: + * + * ppBitStream - updated pointer to the current byte in the bit stream + * buffer + * pBitOffset - updated index of the next available bit position in stream + * buffer referenced by *ppBitStream + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments + * - At least one of the following pointers is NULL: ppBitStream, + * *ppBitStream, pBitOffset, pMVCurMB + * - *pBitOffset < 0, or *pBitOffset >7. + * - fcodeForward <= 0, or fcodeForward > 7, or MBType < 0. + * + */ +OMXResult omxVCM4P2_EncodeMV ( + OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + const OMXVCMotionVector *pMVCurMB, + const OMXVCMotionVector*pSrcMVLeftMB, + const OMXVCMotionVector *pSrcMVUpperMB, + const OMXVCMotionVector *pSrcMVUpperRightMB, + OMX_INT fcodeForward, + OMXVCM4P2MacroblockType MBType +); + + + +/** + * Function: omxVCM4P2_DecodePadMV_PVOP (6.2.5.1.1) + * + * Description: + * Decodes and pads the four motion vectors associated with a non-intra P-VOP + * macroblock. For macroblocks of type OMX_VC_INTER4V, the output MV is + * padded as specified in [ISO14496-2], subclause 7.6.1.6. Otherwise, for + * macroblocks of types other than OMX_VC_INTER4V, the decoded MV is copied to + * all four output MV buffer entries. + * + * Input Arguments: + * + * ppBitStream - pointer to the pointer to the current byte in the bit + * stream buffer + * pBitOffset - pointer to the bit position in the byte pointed to by + * *ppBitStream. *pBitOffset is valid within [0-7]. + * pSrcMVLeftMB, pSrcMVUpperMB, and pSrcMVUpperRightMB - pointers to the + * motion vector buffers of the macroblocks specially at the left, + * upper, and upper-right side of the current macroblock, + * respectively; a value of NULL indicates unavailability. Note: + * Any neighborhood macroblock outside the current VOP or video + * packet or outside the current GOB (when short_video_header is + * 1 ) for which gob_header_empty is 0 is treated as + * transparent, according to [ISO14496-2], subclause 7.6.5. + * fcodeForward - a code equal to vop_fcode_forward in MPEG-4 bit stream + * syntax + * MBType - the type of the current macroblock. If MBType is not equal to + * OMX_VC_INTER4V, the destination motion vector buffer is still + * filled with the same decoded vector. + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after the block is decoded, so + * that it points to the current byte in the bit stream buffer + * pBitOffset - *pBitOffset is updated so that it points to the current bit + * position in the byte pointed by *ppBitStream + * pDstMVCurMB - pointer to the motion vector buffer for the current + * macroblock; contains four decoded motion vectors + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments: + * - At least one of the following pointers is NULL: + * ppBitStream, *ppBitStream, pBitOffset, pDstMVCurMB + * - *pBitOffset exceeds [0,7] + * - fcodeForward exceeds (0,7] + * - MBType less than zero + * - motion vector buffer is not 4-byte aligned. + * OMX_Sts_Err - status error + * + */ +OMXResult omxVCM4P2_DecodePadMV_PVOP ( + const OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + OMXVCMotionVector *pSrcMVLeftMB, + OMXVCMotionVector*pSrcMVUpperMB, + OMXVCMotionVector *pSrcMVUpperRightMB, + OMXVCMotionVector*pDstMVCurMB, + OMX_INT fcodeForward, + OMXVCM4P2MacroblockType MBType +); + + + +/** + * Function: omxVCM4P2_DecodeVLCZigzag_IntraDCVLC (6.2.5.2.2) + * + * Description: + * Performs VLC decoding and inverse zigzag scan of AC and DC coefficients + * for one intra block. Two versions of the function (DCVLC and ACVLC) are + * provided in order to support the two different methods of processing DC + * coefficients, as described in [ISO14496-2], subclause 7.4.1.4, Intra DC + * Coefficient Decoding for the Case of Switched VLC Encoding. + * + * Input Arguments: + * + * ppBitStream - pointer to the pointer to the current byte in the + * bitstream buffer + * pBitOffset - pointer to the bit position in the current byte referenced + * by *ppBitStream. The parameter *pBitOffset is valid in the + * range [0-7]. + * Bit Position in one byte: |Most Least| + * *pBitOffset |0 1 2 3 4 5 6 7| + * predDir - AC prediction direction; used to select the zigzag scan + * pattern; takes one of the following values: + * - OMX_VC_NONE - AC prediction not used; + * performs classical zigzag scan. + * - OMX_VC_HORIZONTAL - Horizontal prediction; + * performs alternate-vertical zigzag scan; + * - OMX_VC_VERTICAL - Vertical prediction; + * performs alternate-horizontal zigzag scan. + * shortVideoHeader - binary flag indicating presence of + * short_video_header; escape modes 0-3 are used if + * shortVideoHeader==0, and escape mode 4 is used when + * shortVideoHeader==1. + * videoComp - video component type (luminance or chrominance) of the + * current block + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after the block is decoded such + * that it points to the current byte in the bit stream buffer + * pBitOffset - *pBitOffset is updated such that it points to the current + * bit position in the byte pointed by *ppBitStream + * pDst - pointer to the coefficient buffer of current block; must be + * 4-byte aligned. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments, if: + * - At least one of the following pointers is NULL: + * ppBitStream, *ppBitStream, pBitOffset, pDst + * - *pBitOffset exceeds [0,7] + * - preDir exceeds [0,2] + * - pDst is not 4-byte aligned + * OMX_Sts_Err - if: + * - In DecodeVLCZigzag_IntraDCVLC, dc_size > 12 + * - At least one of mark bits equals zero + * - Illegal stream encountered; code cannot be located in VLC table + * - Forbidden code encountered in the VLC FLC table. + * - The number of coefficients is greater than 64 + * + */ +OMXResult omxVCM4P2_DecodeVLCZigzag_IntraDCVLC ( + const OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + OMX_S16 *pDst, + OMX_U8 predDir, + OMX_INT shortVideoHeader, + OMXVCM4P2VideoComponent videoComp +); + + + +/** + * Function: omxVCM4P2_DecodeVLCZigzag_IntraACVLC (6.2.5.2.2) + * + * Description: + * Performs VLC decoding and inverse zigzag scan of AC and DC coefficients + * for one intra block. Two versions of the function (DCVLC and ACVLC) are + * provided in order to support the two different methods of processing DC + * coefficients, as described in [ISO14496-2], subclause 7.4.1.4, Intra DC + * Coefficient Decoding for the Case of Switched VLC Encoding. + * + * Input Arguments: + * + * ppBitStream - pointer to the pointer to the current byte in the + * bitstream buffer + * pBitOffset - pointer to the bit position in the current byte referenced + * by *ppBitStream. The parameter *pBitOffset is valid in the + * range [0-7]. Bit Position in one byte: |Most Least| *pBitOffset + * |0 1 2 3 4 5 6 7| + * predDir - AC prediction direction; used to select the zigzag scan + * pattern; takes one of the following values: OMX_VC_NONE - AC + * prediction not used; performs classical zigzag scan. + * OMX_VC_HORIZONTAL - Horizontal prediction; performs + * alternate-vertical zigzag scan; OMX_VC_VERTICAL - Vertical + * prediction; performs alternate-horizontal zigzag scan. + * shortVideoHeader - binary flag indicating presence of + * short_video_header; escape modes 0-3 are used if + * shortVideoHeader==0, and escape mode 4 is used when + * shortVideoHeader==1. + * videoComp - video component type (luminance or chrominance) of the + * current block + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after the block is decoded such + * that it points to the current byte in the bit stream buffer + * pBitOffset - *pBitOffset is updated such that it points to the current + * bit position in the byte pointed by *ppBitStream + * pDst - pointer to the coefficient buffer of current block; must be + * 4-byte aligned. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments At least one of the following + * pointers is NULL: ppBitStream, *ppBitStream, pBitOffset, pDst, + * or At least one of the following conditions is true: + * *pBitOffset exceeds [0,7], preDir exceeds [0,2], or pDst is + * not 4-byte aligned + * OMX_Sts_Err In DecodeVLCZigzag_IntraDCVLC, dc_size > 12 At least one of + * mark bits equals zero Illegal stream encountered; code cannot + * be located in VLC table Forbidden code encountered in the VLC + * FLC table The number of coefficients is greater than 64 + * + */ +OMXResult omxVCM4P2_DecodeVLCZigzag_IntraACVLC ( + const OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + OMX_S16 *pDst, + OMX_U8 predDir, + OMX_INT shortVideoHeader +); + + + +/** + * Function: omxVCM4P2_DecodeVLCZigzag_Inter (6.2.5.2.3) + * + * Description: + * Performs VLC decoding and inverse zigzag scan for one inter-coded block. + * + * Input Arguments: + * + * ppBitStream - double pointer to the current byte in the stream buffer + * pBitOffset - pointer to the next available bit in the current stream + * byte referenced by *ppBitStream. The parameter *pBitOffset is + * valid within the range [0-7]. + * shortVideoHeader - binary flag indicating presence of + * short_video_header; escape modes 0-3 are used if + * shortVideoHeader==0, and escape mode 4 is used when + * shortVideoHeader==1. + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after the block is decoded such + * that it points to the current byte in the stream buffer + * pBitOffset - *pBitOffset is updated after decoding such that it points + * to the next available bit in the stream byte referenced by + * *ppBitStream + * pDst - pointer to the coefficient buffer of current block; must be + * 4-byte aligned. + * + * Return Value: + * + * OMX_Sts_BadArgErr - bad arguments: + * - At least one of the following pointers is NULL: + * ppBitStream, *ppBitStream, pBitOffset, pDst + * - pDst is not 4-byte aligned + * - *pBitOffset exceeds [0,7] + * OMX_Sts_Err - status error, if: + * - At least one mark bit is equal to zero + * - Encountered an illegal stream code that cannot be found in the VLC table + * - Encountered an illegal code in the VLC FLC table + * - The number of coefficients is greater than 64 + * + */ +OMXResult omxVCM4P2_DecodeVLCZigzag_Inter ( + const OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + OMX_S16 *pDst, + OMX_INT shortVideoHeader +); + + + +/** + * Function: omxVCM4P2_QuantInvIntra_I (6.2.5.3.2) + * + * Description: + * Performs the second inverse quantization mode on an intra/inter coded + * block. Supports bits_per_pixel = 8. The output coefficients are clipped to + * the range [-2048, 2047]. + * + * Input Arguments: + * + * pSrcDst - pointer to the input (quantized) intra/inter block; must be + * aligned on a 16-byte boundary. + * QP - quantization parameter (quantizer_scale) + * videoComp - video component type of the current block. Takes one of the + * following flags: OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE (intra + * version only). + * shortVideoHeader - binary flag indicating presence of short_video_header + * (intra version only). + * + * Output Arguments: + * + * pSrcDst - pointer to the output (dequantized) intra/inter block + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; one or more of the following is + * true: + * - pSrcDst is NULL + * - QP <= 0 or QP >=31 + * - videoComp is neither OMX_VC_LUMINANCE nor OMX_VC_CHROMINANCE. + * + */ +OMXResult omxVCM4P2_QuantInvIntra_I ( + OMX_S16 *pSrcDst, + OMX_INT QP, + OMXVCM4P2VideoComponent videoComp, + OMX_INT shortVideoHeader +); + + + +/** + * Function: omxVCM4P2_QuantInvInter_I (6.2.5.3.2) + * + * Description: + * Performs the second inverse quantization mode on an intra/inter coded + * block. Supports bits_per_pixel = 8. The output coefficients are clipped to + * the range [-2048, 2047]. + * + * Input Arguments: + * + * pSrcDst - pointer to the input (quantized) intra/inter block; must be + * aligned on a 16-byte boundary. + * QP - quantization parameter (quantizer_scale) + * videoComp - video component type of the current block. Takes one of the + * following flags: OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE (intra + * version only). + * shortVideoHeader - binary flag indicating presence of short_video_header + * (intra version only). + * + * Output Arguments: + * + * pSrcDst - pointer to the output (dequantized) intra/inter block + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; one or more of the following is + * true: + * - pSrcDst is NULL + * - QP <= 0 or QP >=31 + * - videoComp is neither OMX_VC_LUMINANCE nor OMX_VC_CHROMINANCE. + * + */ +OMXResult omxVCM4P2_QuantInvInter_I ( + OMX_S16 *pSrcDst, + OMX_INT QP +); + + + +/** + * Function: omxVCM4P2_DecodeBlockCoef_Intra (6.2.5.4.1) + * + * Description: + * Decodes the INTRA block coefficients. Inverse quantization, inversely + * zigzag positioning, and IDCT, with appropriate clipping on each step, are + * performed on the coefficients. The results are then placed in the output + * frame/plane on a pixel basis. Note: This function will be used only when + * at least one non-zero AC coefficient of current block exists in the bit + * stream. The DC only condition will be handled in another function. + * + * + * Input Arguments: + * + * ppBitStream - pointer to the pointer to the current byte in the bit + * stream buffer. There is no boundary check for the bit stream + * buffer. + * pBitOffset - pointer to the bit position in the byte pointed to by + * *ppBitStream. *pBitOffset is valid within [0-7]. + * step - width of the destination plane + * pCoefBufRow - pointer to the coefficient row buffer; must be aligned on + * an 8-byte boundary. + * pCoefBufCol - pointer to the coefficient column buffer; must be aligned + * on an 8-byte boundary. + * curQP - quantization parameter of the macroblock which the current block + * belongs to + * pQPBuf - pointer to the quantization parameter buffer + * blockIndex - block index indicating the component type and position as + * defined in [ISO14496-2], subclause 6.1.3.8, Figure 6-5. + * intraDCVLC - a code determined by intra_dc_vlc_thr and QP. This allows a + * mechanism to switch between two VLC for coding of Intra DC + * coefficients as per [ISO14496-2], Table 6-21. + * ACPredFlag - a flag equal to ac_pred_flag (of luminance) indicating if + * the ac coefficients of the first row or first column are + * differentially coded for intra coded macroblock. + * shortVideoHeader - binary flag indicating presence of + * short_video_header; shortVideoHeader==1 selects linear intra DC + * mode, and shortVideoHeader==0 selects non linear intra DC mode. + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after the block is decoded, so + * that it points to the current byte in the bit stream buffer + * pBitOffset - *pBitOffset is updated so that it points to the current bit + * position in the byte pointed by *ppBitStream + * pDst - pointer to the block in the destination plane; must be aligned on + * an 8-byte boundary. + * pCoefBufRow - pointer to the updated coefficient row buffer. + * pCoefBufCol - pointer to the updated coefficient column buffer Note: + * The coefficient buffers must be updated in accordance with the + * update procedure defined in section 6.2.2. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments, if: + * - At least one of the following pointers is NULL: + * ppBitStream, *ppBitStream, pBitOffset, pCoefBufRow, pCoefBufCol, + * pQPBuf, pDst. + * - *pBitOffset exceeds [0,7] + * - curQP exceeds (1, 31) + * - blockIndex exceeds [0,5] + * - step is not the multiple of 8 + * - a pointer alignment requirement was violated. + * OMX_Sts_Err - status error. Refer to OMX_Sts_Err of DecodeVLCZigzag_Intra. + * + */ +OMXResult omxVCM4P2_DecodeBlockCoef_Intra ( + const OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + OMX_U8 *pDst, + OMX_INT step, + OMX_S16 *pCoefBufRow, + OMX_S16 *pCoefBufCol, + OMX_U8 curQP, + const OMX_U8 *pQPBuf, + OMX_INT blockIndex, + OMX_INT intraDCVLC, + OMX_INT ACPredFlag, + OMX_INT shortVideoHeader +); + + + +/** + * Function: omxVCM4P2_DecodeBlockCoef_Inter (6.2.5.4.2) + * + * Description: + * Decodes the INTER block coefficients. This function performs inverse + * quantization, inverse zigzag positioning, and IDCT (with appropriate + * clipping on each step) on the coefficients. The results (residuals) are + * placed in a contiguous array of 64 elements. For INTER block, the output + * buffer holds the residuals for further reconstruction. + * + * Input Arguments: + * + * ppBitStream - pointer to the pointer to the current byte in the bit + * stream buffer. There is no boundary check for the bit stream + * buffer. + * pBitOffset - pointer to the bit position in the byte pointed to by + * *ppBitStream. *pBitOffset is valid within [0-7] + * QP - quantization parameter + * shortVideoHeader - binary flag indicating presence of + * short_video_header; shortVideoHeader==1 selects linear intra DC + * mode, and shortVideoHeader==0 selects non linear intra DC mode. + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after the block is decoded, so + * that it points to the current byte in the bit stream buffer + * pBitOffset - *pBitOffset is updated so that it points to the current bit + * position in the byte pointed by *ppBitStream + * pDst - pointer to the decoded residual buffer (a contiguous array of 64 + * elements of OMX_S16 data type); must be aligned on a 16-byte + * boundary. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments, if: + * - At least one of the following pointers is Null: + * ppBitStream, *ppBitStream, pBitOffset , pDst + * - *pBitOffset exceeds [0,7] + * - QP <= 0. + * - pDst is not 16-byte aligned + * OMX_Sts_Err - status error. Refer to OMX_Sts_Err of DecodeVLCZigzag_Inter . + * + */ +OMXResult omxVCM4P2_DecodeBlockCoef_Inter ( + const OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + OMX_S16 *pDst, + OMX_INT QP, + OMX_INT shortVideoHeader +); + + + +/** + * Function: omxVCM4P2_PredictReconCoefIntra (6.2.5.4.3) + * + * Description: + * Performs adaptive DC/AC coefficient prediction for an intra block. Prior + * to the function call, prediction direction (predDir) should be selected as + * specified in [ISO14496-2], subclause 7.4.3.1. + * + * Input Arguments: + * + * pSrcDst - pointer to the coefficient buffer which contains the quantized + * coefficient residuals (PQF) of the current block; must be + * aligned on a 4-byte boundary. The output coefficients are + * saturated to the range [-2048, 2047]. + * pPredBufRow - pointer to the coefficient row buffer; must be aligned on + * a 4-byte boundary. + * pPredBufCol - pointer to the coefficient column buffer; must be aligned + * on a 4-byte boundary. + * curQP - quantization parameter of the current block. curQP may equal to + * predQP especially when the current block and the predictor block + * are in the same macroblock. + * predQP - quantization parameter of the predictor block + * predDir - indicates the prediction direction which takes one of the + * following values: OMX_VC_HORIZONTAL - predict horizontally + * OMX_VC_VERTICAL - predict vertically + * ACPredFlag - a flag indicating if AC prediction should be performed. It + * is equal to ac_pred_flag in the bit stream syntax of MPEG-4 + * videoComp - video component type (luminance or chrominance) of the + * current block + * + * Output Arguments: + * + * pSrcDst - pointer to the coefficient buffer which contains the quantized + * coefficients (QF) of the current block + * pPredBufRow - pointer to the updated coefficient row buffer + * pPredBufCol - pointer to the updated coefficient column buffer Note: + * Buffer update: Update the AC prediction buffer (both row and + * column buffer). + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments, if: + * - At least one of the pointers is NULL: + * pSrcDst, pPredBufRow, or pPredBufCol. + * - curQP <= 0, + * - predQP <= 0, + * - curQP >31, + * - predQP > 31, + * - preDir exceeds [1,2] + * - pSrcDst, pPredBufRow, or pPredBufCol is not 4-byte aligned. + * + */ +OMXResult omxVCM4P2_PredictReconCoefIntra ( + OMX_S16 *pSrcDst, + OMX_S16 *pPredBufRow, + OMX_S16 *pPredBufCol, + OMX_INT curQP, + OMX_INT predQP, + OMX_INT predDir, + OMX_INT ACPredFlag, + OMXVCM4P2VideoComponent videoComp +); + + + +/** + * Function: omxVCM4P2_MCReconBlock (6.2.5.5.1) + * + * Description: + * Performs motion compensation prediction for an 8x8 block using + * interpolation described in [ISO14496-2], subclause 7.6.2. + * + * Input Arguments: + * + * pSrc - pointer to the block in the reference plane. + * srcStep - distance between the start of consecutive lines in the + * reference plane, in bytes; must be a multiple of 8. + * dstStep - distance between the start of consecutive lines in the + * destination plane, in bytes; must be a multiple of 8. + * pSrcResidue - pointer to a buffer containing the 16-bit prediction + * residuals; must be 16-byte aligned. If the pointer is NULL, then + * no prediction is done, only motion compensation, i.e., the block + * is moved with interpolation. + * predictType - bilinear interpolation type, as defined in section + * 6.2.1.2. + * rndVal - rounding control parameter: 0 - disabled; 1 - enabled. + * + * Output Arguments: + * + * pDst - pointer to the destination buffer; must be 8-byte aligned. If + * prediction residuals are added then output intensities are + * clipped to the range [0,255]. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned under any of the following + * conditions: + * - pDst is not 8-byte aligned. + * - pSrcResidue is not 16-byte aligned. + * - one or more of the following pointers is NULL: pSrc or pDst. + * - either srcStep or dstStep is not a multiple of 8. + * - invalid type specified for the parameter predictType. + * - the parameter rndVal is not equal either to 0 or 1. + * + */ +OMXResult omxVCM4P2_MCReconBlock ( + const OMX_U8 *pSrc, + OMX_INT srcStep, + const OMX_S16 *pSrcResidue, + OMX_U8 *pDst, + OMX_INT dstStep, + OMX_INT predictType, + OMX_INT rndVal +); + + + +/* 6.3.1.1 Intra 16x16 Prediction Modes */ +/* A data type that enumerates intra_16x16 macroblock prediction modes is defined as follows: */ + +typedef enum { + OMX_VC_16X16_VERT = 0, /** Intra_16x16_Vertical */ + OMX_VC_16X16_HOR = 1, /** Intra_16x16_Horizontal */ + OMX_VC_16X16_DC = 2, /** Intra_16x16_DC */ + OMX_VC_16X16_PLANE = 3 /** Intra_16x16_Plane */ +} OMXVCM4P10Intra16x16PredMode; + + + +/* 6.3.1.2 Intra 4x4 Prediction Modes */ +/* A data type that enumerates intra_4x4 macroblock prediction modes is defined as follows: */ + +typedef enum { + OMX_VC_4X4_VERT = 0, /** Intra_4x4_Vertical */ + OMX_VC_4X4_HOR = 1, /** Intra_4x4_Horizontal */ + OMX_VC_4X4_DC = 2, /** Intra_4x4_DC */ + OMX_VC_4X4_DIAG_DL = 3, /** Intra_4x4_Diagonal_Down_Left */ + OMX_VC_4X4_DIAG_DR = 4, /** Intra_4x4_Diagonal_Down_Right */ + OMX_VC_4X4_VR = 5, /** Intra_4x4_Vertical_Right */ + OMX_VC_4X4_HD = 6, /** Intra_4x4_Horizontal_Down */ + OMX_VC_4X4_VL = 7, /** Intra_4x4_Vertical_Left */ + OMX_VC_4X4_HU = 8 /** Intra_4x4_Horizontal_Up */ +} OMXVCM4P10Intra4x4PredMode; + + + +/* 6.3.1.3 Chroma Prediction Modes */ +/* A data type that enumerates intra chroma prediction modes is defined as follows: */ + +typedef enum { + OMX_VC_CHROMA_DC = 0, /** Intra_Chroma_DC */ + OMX_VC_CHROMA_HOR = 1, /** Intra_Chroma_Horizontal */ + OMX_VC_CHROMA_VERT = 2, /** Intra_Chroma_Vertical */ + OMX_VC_CHROMA_PLANE = 3 /** Intra_Chroma_Plane */ +} OMXVCM4P10IntraChromaPredMode; + + + +/* 6.3.1.4 Motion Estimation Modes */ +/* A data type that enumerates H.264 motion estimation modes is defined as follows: */ + +typedef enum { + OMX_VC_M4P10_FAST_SEARCH = 0, /** Fast motion search */ + OMX_VC_M4P10_FULL_SEARCH = 1 /** Full motion search */ +} OMXVCM4P10MEMode; + + + +/* 6.3.1.5 Macroblock Types */ +/* A data type that enumerates H.264 macroblock types is defined as follows: */ + +typedef enum { + OMX_VC_P_16x16 = 0, /* defined by [ISO14496-10] */ + OMX_VC_P_16x8 = 1, + OMX_VC_P_8x16 = 2, + OMX_VC_P_8x8 = 3, + OMX_VC_PREF0_8x8 = 4, + OMX_VC_INTER_SKIP = 5, + OMX_VC_INTRA_4x4 = 8, + OMX_VC_INTRA_16x16 = 9, + OMX_VC_INTRA_PCM = 10 +} OMXVCM4P10MacroblockType; + + + +/* 6.3.1.6 Sub-Macroblock Types */ +/* A data type that enumerates H.264 sub-macroblock types is defined as follows: */ + +typedef enum { + OMX_VC_SUB_P_8x8 = 0, /* defined by [ISO14496-10] */ + OMX_VC_SUB_P_8x4 = 1, + OMX_VC_SUB_P_4x8 = 2, + OMX_VC_SUB_P_4x4 = 3 +} OMXVCM4P10SubMacroblockType; + + + +/* 6.3.1.7 Variable Length Coding (VLC) Information */ + +typedef struct { + OMX_U8 uTrailing_Ones; /* Trailing ones; 3 at most */ + OMX_U8 uTrailing_One_Signs; /* Trailing ones signal */ + OMX_U8 uNumCoeffs; /* Total number of non-zero coefs, including trailing ones */ + OMX_U8 uTotalZeros; /* Total number of zero coefs */ + OMX_S16 iLevels[16]; /* Levels of non-zero coefs, in reverse zig-zag order */ + OMX_U8 uRuns[16]; /* Runs for levels and trailing ones, in reverse zig-zag order */ +} OMXVCM4P10VLCInfo; + + + +/* 6.3.1.8 Macroblock Information */ + +typedef struct { + OMX_S32 sliceId; /* slice number */ + OMXVCM4P10MacroblockType mbType; /* MB type */ + OMXVCM4P10SubMacroblockType subMBType[4]; /* sub-block type */ + OMX_S32 qpy; /* qp for luma */ + OMX_S32 qpc; /* qp for chroma */ + OMX_U32 cbpy; /* CBP Luma */ + OMX_U32 cbpc; /* CBP Chroma */ + OMXVCMotionVector pMV0[4][4]; /* motion vector, represented using 1/4-pel units, pMV0[blocky][blockx] (blocky = 0~3, blockx =0~3) */ + OMXVCMotionVector pMVPred[4][4]; /* motion vector prediction, Represented using 1/4-pel units, pMVPred[blocky][blockx] (blocky = 0~3, blockx = 0~3) */ + OMX_U8 pRefL0Idx[4]; /* reference picture indices */ + OMXVCM4P10Intra16x16PredMode Intra16x16PredMode; /* best intra 16x16 prediction mode */ + OMXVCM4P10Intra4x4PredMode pIntra4x4PredMode[16]; /* best intra 4x4 prediction mode for each block, pMV0 indexed as above */ +} OMXVCM4P10MBInfo, *OMXVCM4P10MBInfoPtr; + + + +/* 6.3.1.9 Motion Estimation Parameters */ + +typedef struct { + OMX_S32 blockSplitEnable8x8; /* enables 16x8, 8x16, 8x8 */ + OMX_S32 blockSplitEnable4x4; /* enable splitting of 8x4, 4x8, 4x4 blocks */ + OMX_S32 halfSearchEnable; + OMX_S32 quarterSearchEnable; + OMX_S32 intraEnable4x4; /* 1=enable, 0=disable */ + OMX_S32 searchRange16x16; /* integer pixel units */ + OMX_S32 searchRange8x8; + OMX_S32 searchRange4x4; +} OMXVCM4P10MEParams; + + + +/** + * Function: omxVCM4P10_PredictIntra_4x4 (6.3.3.1.1) + * + * Description: + * Perform Intra_4x4 prediction for luma samples. If the upper-right block is + * not available, then duplication work should be handled inside the function. + * Users need not define them outside. + * + * Input Arguments: + * + * pSrcLeft - Pointer to the buffer of 4 left pixels: + * p[x, y] (x = -1, y = 0..3) + * pSrcAbove - Pointer to the buffer of 8 above pixels: + * p[x,y] (x = 0..7, y =-1); + * must be aligned on a 4-byte boundary. + * pSrcAboveLeft - Pointer to the above left pixels: p[x,y] (x = -1, y = -1) + * leftStep - Step of left pixel buffer; must be a multiple of 4. + * dstStep - Step of the destination buffer; must be a multiple of 4. + * predMode - Intra_4x4 prediction mode. + * availability - Neighboring 4x4 block availability flag, refer to + * "Neighboring Macroblock Availability" . + * + * Output Arguments: + * + * pDst - Pointer to the destination buffer; must be aligned on a 4-byte + * boundary. + * + * Return Value: + * If the function runs without error, it returns OMX_Sts_NoErr. + * If one of the following cases occurs, the function returns + * OMX_Sts_BadArgErr: + * pDst is NULL. + * dstStep < 4, or dstStep is not a multiple of 4. + * leftStep is not a multiple of 4. + * predMode is not in the valid range of enumeration + * OMXVCM4P10Intra4x4PredMode. + * predMode is OMX_VC_4x4_VERT, but availability doesn't set OMX_VC_UPPER + * indicating p[x,-1] (x = 0..3) is not available. + * predMode is OMX_VC_4x4_HOR, but availability doesn't set OMX_VC_LEFT + * indicating p[-1,y] (y = 0..3) is not available. + * predMode is OMX_VC_4x4_DIAG_DL, but availability doesn't set + * OMX_VC_UPPER indicating p[x, -1] (x = 0..3) is not available. + * predMode is OMX_VC_4x4_DIAG_DR, but availability doesn't set + * OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating + * p[x,-1] (x = 0..3), or p[-1,y] (y = 0..3) or p[-1,-1] is not + * available. + * predMode is OMX_VC_4x4_VR, but availability doesn't set + * OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating + * p[x,-1] (x = 0..3), or p[-1,y] (y = 0..3) or p[-1,-1] is not + * available. + * predMode is OMX_VC_4x4_HD, but availability doesn't set + * OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating + * p[x,-1] (x = 0..3), or p[-1,y] (y = 0..3) or p[-1,-1] is not + * available. + * predMode is OMX_VC_4x4_VL, but availability doesn't set OMX_VC_UPPER + * indicating p[x,-1] (x = 0..3) is not available. + * predMode is OMX_VC_4x4_HU, but availability doesn't set OMX_VC_LEFT + * indicating p[-1,y] (y = 0..3) is not available. + * availability sets OMX_VC_UPPER, but pSrcAbove is NULL. + * availability sets OMX_VC_LEFT, but pSrcLeft is NULL. + * availability sets OMX_VC_UPPER_LEFT, but pSrcAboveLeft is NULL. + * either pSrcAbove or pDst is not aligned on a 4-byte boundary. + * + * Note: + * pSrcAbove, pSrcAbove, pSrcAboveLeft may be invalid pointers if + * they are not used by intra prediction as implied in predMode. + * + */ +OMXResult omxVCM4P10_PredictIntra_4x4 ( + const OMX_U8 *pSrcLeft, + const OMX_U8 *pSrcAbove, + const OMX_U8 *pSrcAboveLeft, + OMX_U8 *pDst, + OMX_INT leftStep, + OMX_INT dstStep, + OMXVCM4P10Intra4x4PredMode predMode, + OMX_S32 availability +); + + + +/** + * Function: omxVCM4P10_PredictIntra_16x16 (6.3.3.1.2) + * + * Description: + * Perform Intra_16x16 prediction for luma samples. If the upper-right block + * is not available, then duplication work should be handled inside the + * function. Users need not define them outside. + * + * Input Arguments: + * + * pSrcLeft - Pointer to the buffer of 16 left pixels: p[x, y] (x = -1, y = + * 0..15) + * pSrcAbove - Pointer to the buffer of 16 above pixels: p[x,y] (x = 0..15, + * y= -1); must be aligned on a 16-byte boundary. + * pSrcAboveLeft - Pointer to the above left pixels: p[x,y] (x = -1, y = -1) + * leftStep - Step of left pixel buffer; must be a multiple of 16. + * dstStep - Step of the destination buffer; must be a multiple of 16. + * predMode - Intra_16x16 prediction mode, please refer to section 3.4.1. + * availability - Neighboring 16x16 MB availability flag. Refer to + * section 3.4.4. + * + * Output Arguments: + * + * pDst -Pointer to the destination buffer; must be aligned on a 16-byte + * boundary. + * + * Return Value: + * If the function runs without error, it returns OMX_Sts_NoErr. + * If one of the following cases occurs, the function returns + * OMX_Sts_BadArgErr: + * pDst is NULL. + * dstStep < 16. or dstStep is not a multiple of 16. + * leftStep is not a multiple of 16. + * predMode is not in the valid range of enumeration + * OMXVCM4P10Intra16x16PredMode + * predMode is OMX_VC_16X16_VERT, but availability doesn't set + * OMX_VC_UPPER indicating p[x,-1] (x = 0..15) is not available. + * predMode is OMX_VC_16X16_HOR, but availability doesn't set OMX_VC_LEFT + * indicating p[-1,y] (y = 0..15) is not available. + * predMode is OMX_VC_16X16_PLANE, but availability doesn't set + * OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating + * p[x,-1](x = 0..15), or p[-1,y] (y = 0..15), or p[-1,-1] is not + * available. + * availability sets OMX_VC_UPPER, but pSrcAbove is NULL. + * availability sets OMX_VC_LEFT, but pSrcLeft is NULL. + * availability sets OMX_VC_UPPER_LEFT, but pSrcAboveLeft is NULL. + * either pSrcAbove or pDst is not aligned on a 16-byte boundary. + * + * Note: + * pSrcAbove, pSrcAbove, pSrcAboveLeft may be invalid pointers if + * they are not used by intra prediction implied in predMode. + * Note: + * OMX_VC_UPPER_RIGHT is not used in intra_16x16 luma prediction. + * + */ +OMXResult omxVCM4P10_PredictIntra_16x16 ( + const OMX_U8 *pSrcLeft, + const OMX_U8 *pSrcAbove, + const OMX_U8 *pSrcAboveLeft, + OMX_U8 *pDst, + OMX_INT leftStep, + OMX_INT dstStep, + OMXVCM4P10Intra16x16PredMode predMode, + OMX_S32 availability +); + + + +/** + * Function: omxVCM4P10_PredictIntraChroma_8x8 (6.3.3.1.3) + * + * Description: + * Performs intra prediction for chroma samples. + * + * Input Arguments: + * + * pSrcLeft - Pointer to the buffer of 8 left pixels: p[x, y] (x = -1, y= + * 0..7). + * pSrcAbove - Pointer to the buffer of 8 above pixels: p[x,y] (x = 0..7, y + * = -1); must be aligned on an 8-byte boundary. + * pSrcAboveLeft - Pointer to the above left pixels: p[x,y] (x = -1, y = -1) + * leftStep - Step of left pixel buffer; must be a multiple of 8. + * dstStep - Step of the destination buffer; must be a multiple of 8. + * predMode - Intra chroma prediction mode, please refer to section 3.4.3. + * availability - Neighboring chroma block availability flag, please refer + * to "Neighboring Macroblock Availability". + * + * Output Arguments: + * + * pDst - Pointer to the destination buffer; must be aligned on an 8-byte + * boundary. + * + * Return Value: + * If the function runs without error, it returns OMX_Sts_NoErr. + * If any of the following cases occurs, the function returns + * OMX_Sts_BadArgErr: + * pDst is NULL. + * dstStep < 8 or dstStep is not a multiple of 8. + * leftStep is not a multiple of 8. + * predMode is not in the valid range of enumeration + * OMXVCM4P10IntraChromaPredMode. + * predMode is OMX_VC_CHROMA_VERT, but availability doesn't set + * OMX_VC_UPPER indicating p[x,-1] (x = 0..7) is not available. + * predMode is OMX_VC_CHROMA_HOR, but availability doesn't set OMX_VC_LEFT + * indicating p[-1,y] (y = 0..7) is not available. + * predMode is OMX_VC_CHROMA_PLANE, but availability doesn't set + * OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating + * p[x,-1](x = 0..7), or p[-1,y] (y = 0..7), or p[-1,-1] is not + * available. + * availability sets OMX_VC_UPPER, but pSrcAbove is NULL. + * availability sets OMX_VC_LEFT, but pSrcLeft is NULL. + * availability sets OMX_VC_UPPER_LEFT, but pSrcAboveLeft is NULL. + * either pSrcAbove or pDst is not aligned on a 8-byte boundary. + * + * Note: pSrcAbove, pSrcAbove, pSrcAboveLeft may be invalid pointer if + * they are not used by intra prediction implied in predMode. + * + * Note: OMX_VC_UPPER_RIGHT is not used in intra chroma prediction. + * + */ +OMXResult omxVCM4P10_PredictIntraChroma_8x8 ( + const OMX_U8 *pSrcLeft, + const OMX_U8 *pSrcAbove, + const OMX_U8 *pSrcAboveLeft, + OMX_U8 *pDst, + OMX_INT leftStep, + OMX_INT dstStep, + OMXVCM4P10IntraChromaPredMode predMode, + OMX_S32 availability +); + + + +/** + * Function: omxVCM4P10_InterpolateLuma (6.3.3.2.1) + * + * Description: + * Performs quarter-pixel interpolation for inter luma MB. It is assumed that + * the frame is already padded when calling this function. + * + * Input Arguments: + * + * pSrc - Pointer to the source reference frame buffer + * srcStep - reference frame step, in bytes; must be a multiple of roi.width + * dstStep - destination frame step, in bytes; must be a multiple of + * roi.width + * dx - Fractional part of horizontal motion vector component in 1/4 pixel + * unit; valid in the range [0,3] + * dy - Fractional part of vertical motion vector y component in 1/4 pixel + * unit; valid in the range [0,3] + * roi - Dimension of the interpolation region; the parameters roi.width and + * roi.height must be equal to either 4, 8, or 16. + * + * Output Arguments: + * + * pDst - Pointer to the destination frame buffer: + * if roi.width==4, 4-byte alignment required + * if roi.width==8, 8-byte alignment required + * if roi.width==16, 16-byte alignment required + * + * Return Value: + * If the function runs without error, it returns OMX_Sts_NoErr. + * If one of the following cases occurs, the function returns + * OMX_Sts_BadArgErr: + * pSrc or pDst is NULL. + * srcStep or dstStep < roi.width. + * dx or dy is out of range [0,3]. + * roi.width or roi.height is out of range {4, 8, 16}. + * roi.width is equal to 4, but pDst is not 4 byte aligned. + * roi.width is equal to 8 or 16, but pDst is not 8 byte aligned. + * srcStep or dstStep is not a multiple of 8. + * + */ +OMXResult omxVCM4P10_InterpolateLuma ( + const OMX_U8 *pSrc, + OMX_S32 srcStep, + OMX_U8 *pDst, + OMX_S32 dstStep, + OMX_S32 dx, + OMX_S32 dy, + OMXSize roi +); + + + +/** + * Function: omxVCM4P10_InterpolateChroma (6.3.3.2.2) + * + * Description: + * Performs 1/8-pixel interpolation for inter chroma MB. + * + * Input Arguments: + * + * pSrc -Pointer to the source reference frame buffer + * srcStep -Reference frame step in bytes + * dstStep -Destination frame step in bytes; must be a multiple of + * roi.width. + * dx -Fractional part of horizontal motion vector component in 1/8 pixel + * unit; valid in the range [0,7] + * dy -Fractional part of vertical motion vector component in 1/8 pixel + * unit; valid in the range [0,7] + * roi -Dimension of the interpolation region; the parameters roi.width and + * roi.height must be equal to either 2, 4, or 8. + * + * Output Arguments: + * + * pDst -Pointer to the destination frame buffer: + * if roi.width==2, 2-byte alignment required + * if roi.width==4, 4-byte alignment required + * if roi.width==8, 8-byte alignment required + * + * Return Value: + * If the function runs without error, it returns OMX_Sts_NoErr. + * If one of the following cases occurs, the function returns + * OMX_Sts_BadArgErr: + * pSrc or pDst is NULL. + * srcStep or dstStep < 8. + * dx or dy is out of range [0-7]. + * roi.width or roi.height is out of range {2,4,8}. + * roi.width is equal to 2, but pDst is not 2-byte aligned. + * roi.width is equal to 4, but pDst is not 4-byte aligned. + * roi.width is equal to 8, but pDst is not 8 byte aligned. + * srcStep or dstStep is not a multiple of 8. + * + */ +OMXResult omxVCM4P10_InterpolateChroma ( + const OMX_U8 *pSrc, + OMX_S32 srcStep, + OMX_U8 *pDst, + OMX_S32 dstStep, + OMX_S32 dx, + OMX_S32 dy, + OMXSize roi +); + + + +/** + * Function: omxVCM4P10_FilterDeblockingLuma_VerEdge_I (6.3.3.3.1) + * + * Description: + * Performs in-place deblock filtering on four vertical edges of the luma + * macroblock (16x16). + * + * Input Arguments: + * + * pSrcDst - Pointer to the input macroblock; must be 16-byte aligned. + * srcdstStep -Step of the arrays; must be a multiple of 16. + * pAlpha -Array of size 2 of alpha thresholds (the first item is the alpha + * threshold for the external vertical edge, and the second item is + * for the internal vertical edge); per [ISO14496-10] alpha values + * must be in the range [0,255]. + * pBeta -Array of size 2 of beta thresholds (the first item is the beta + * threshold for the external vertical edge, and the second item is + * for the internal vertical edge); per [ISO14496-10] beta values + * must be in the range [0,18]. + * pThresholds -Array of size 16 of Thresholds (TC0) (values for the left + * edge of each 4x4 block, arranged in vertical block order); must + * be aligned on a 4-byte boundary.. Per [ISO14496-10] values must + * be in the range [0,25]. + * pBS -Array of size 16 of BS parameters (arranged in vertical block + * order); valid in the range [0,4] with the following + * restrictions: i) pBS[i]== 4 may occur only for 0<=i<=3, ii) + * pBS[i]== 4 if and only if pBS[i^3]== 4. Must be 4-byte aligned. + * + * Output Arguments: + * + * pSrcDst -Pointer to filtered output macroblock. + * + * Return Value: + * If the function runs without error, it returns OMX_Sts_NoErr. + * If one of the following cases occurs, the function returns + * OMX_Sts_BadArgErr: + * Either of the pointers in pSrcDst, pAlpha, pBeta, pThresholds, or pBS + * is NULL. + * Either pThresholds or pBS is not aligned on a 4-byte boundary. + * pSrcDst is not 16-byte aligned. + * srcdstStep is not a multiple of 16. + * pAlpha[0] and/or pAlpha[1] is outside the range [0,255]. + * pBeta[0] and/or pBeta[1] is outside the range [0,18]. + * One or more entries in the table pThresholds[0..15]is outside of the + * range [0,25]. + * pBS is out of range, i.e., one of the following conditions is true: + * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or (pBS[i]==4 && + * pBS[i^3]!=4) for 0<=i<=3. + * + */ +OMXResult omxVCM4P10_FilterDeblockingLuma_VerEdge_I ( + OMX_U8 *pSrcDst, + OMX_S32 srcdstStep, + const OMX_U8 *pAlpha, + const OMX_U8 *pBeta, + const OMX_U8 *pThresholds, + const OMX_U8 *pBS +); + + + +/** + * Function: omxVCM4P10_FilterDeblockingLuma_HorEdge_I (6.3.3.3.2) + * + * Description: + * Performs in-place deblock filtering on four horizontal edges of the luma + * macroblock (16x16). + * + * Input Arguments: + * + * pSrcDst - pointer to the input macroblock; must be 16-byte aligned. + * srcdstStep - step of the arrays; must be a multiple of 16. + * pAlpha - array of size 2 of alpha thresholds (the first item is the alpha + * threshold for the external vertical edge, and the second item is + * for the internal horizontal edge); per [ISO14496-10] alpha + * values must be in the range [0,255]. + * pBeta - array of size 2 of beta thresholds (the first item is the beta + * threshold for the external horizontal edge, and the second item + * is for the internal horizontal edge). Per [ISO14496-10] beta + * values must be in the range [0,18]. + * pThresholds - array of size 16 containing thresholds, TC0, for the top + * horizontal edge of each 4x4 block, arranged in horizontal block + * order; must be aligned on a 4-byte boundary. Per [ISO14496 10] + * values must be in the range [0,25]. + * pBS - array of size 16 of BS parameters (arranged in horizontal block + * order); valid in the range [0,4] with the following + * restrictions: i) pBS[i]== 4 may occur only for 0<=i<=3, ii) + * pBS[i]== 4 if and only if pBS[i^3]== 4. Must be 4-byte aligned. + * + * Output Arguments: + * + * pSrcDst -Pointer to filtered output macroblock. + * + * Return Value: + * + * OMX_Sts_NoErr, if the function runs without error. + * + * OMX_Sts_BadArgErr, if one of the following cases occurs: + * - one or more of the following pointers is NULL: pSrcDst, pAlpha, + * pBeta, pThresholds, or pBS. + * - either pThresholds or pBS is not aligned on a 4-byte boundary. + * - pSrcDst is not 16-byte aligned. + * - srcdstStep is not a multiple of 16. + * - pAlpha[0] and/or pAlpha[1] is outside the range [0,255]. + * - pBeta[0] and/or pBeta[1] is outside the range [0,18]. + * - One or more entries in the table pThresholds[0..15] is + * outside of the range [0,25]. + * - pBS is out of range, i.e., one of the following conditions is true: + * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or + * (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3. + * + */ +OMXResult omxVCM4P10_FilterDeblockingLuma_HorEdge_I ( + OMX_U8 *pSrcDst, + OMX_S32 srcdstStep, + const OMX_U8 *pAlpha, + const OMX_U8 *pBeta, + const OMX_U8 *pThresholds, + const OMX_U8 *pBS +); + + + +/** + * Function: omxVCM4P10_FilterDeblockingChroma_VerEdge_I (6.3.3.3.3) + * + * Description: + * Performs in-place deblock filtering on four vertical edges of the chroma + * macroblock (8x8). + * + * Input Arguments: + * + * pSrcDst - Pointer to the input macroblock; must be 8-byte aligned. + * srcdstStep - Step of the arrays; must be a multiple of 8. + * pAlpha - Array of size 2 of alpha thresholds (the first item is alpha + * threshold for external vertical edge, and the second item is for + * internal vertical edge); per [ISO14496-10] alpha values must be + * in the range [0,255]. + * pBeta - Array of size 2 of beta thresholds (the first item is the beta + * threshold for the external vertical edge, and the second item is + * for the internal vertical edge); per [ISO14496-10] beta values + * must be in the range [0,18]. + * pThresholds - Array of size 8 containing thresholds, TC0, for the left + * vertical edge of each 4x2 chroma block, arranged in vertical + * block order; must be aligned on a 4-byte boundary. Per + * [ISO14496-10] values must be in the range [0,25]. + * pBS - Array of size 16 of BS parameters (values for each 2x2 chroma + * block, arranged in vertical block order). This parameter is the + * same as the pBS parameter passed into FilterDeblockLuma_VerEdge; + * valid in the range [0,4] with the following restrictions: i) + * pBS[i]== 4 may occur only for 0<=i<=3, ii) pBS[i]== 4 if and + * only if pBS[i^3]== 4. Must be 4 byte aligned. + * + * Output Arguments: + * + * pSrcDst -Pointer to filtered output macroblock. + * + * Return Value: + * + * OMX_Sts_NoErr, if the function runs without error. + * + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - one or more of the following pointers is NULL: pSrcDst, pAlpha, + * pBeta, pThresholds, or pBS. + * - pSrcDst is not 8-byte aligned. + * - srcdstStep is not a multiple of 8. + * - pThresholds is not 4-byte aligned. + * - pAlpha[0] and/or pAlpha[1] is outside the range [0,255]. + * - pBeta[0] and/or pBeta[1] is outside the range [0,18]. + * - One or more entries in the table pThresholds[0..7] is outside + * of the range [0,25]. + * - pBS is out of range, i.e., one of the following conditions is true: + * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or + * (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3. + * - pBS is not 4-byte aligned. + * + */ +OMXResult omxVCM4P10_FilterDeblockingChroma_VerEdge_I ( + OMX_U8 *pSrcDst, + OMX_S32 srcdstStep, + const OMX_U8 *pAlpha, + const OMX_U8 *pBeta, + const OMX_U8 *pThresholds, + const OMX_U8 *pBS +); + + + +/** + * Function: omxVCM4P10_FilterDeblockingChroma_HorEdge_I (6.3.3.3.4) + * + * Description: + * Performs in-place deblock filtering on the horizontal edges of the chroma + * macroblock (8x8). + * + * Input Arguments: + * + * pSrcDst - pointer to the input macroblock; must be 8-byte aligned. + * srcdstStep - array step; must be a multiple of 8. + * pAlpha - array of size 2 containing alpha thresholds; the first element + * contains the threshold for the external horizontal edge, and the + * second element contains the threshold for internal horizontal + * edge. Per [ISO14496-10] alpha values must be in the range + * [0,255]. + * pBeta - array of size 2 containing beta thresholds; the first element + * contains the threshold for the external horizontal edge, and the + * second element contains the threshold for the internal + * horizontal edge. Per [ISO14496-10] beta values must be in the + * range [0,18]. + * pThresholds - array of size 8 containing thresholds, TC0, for the top + * horizontal edge of each 2x4 chroma block, arranged in horizontal + * block order; must be aligned on a 4-byte boundary. Per + * [ISO14496-10] values must be in the range [0,25]. + * pBS - array of size 16 containing BS parameters for each 2x2 chroma + * block, arranged in horizontal block order; valid in the range + * [0,4] with the following restrictions: i) pBS[i]== 4 may occur + * only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^3]== 4. + * Must be 4-byte aligned. + * + * Output Arguments: + * + * pSrcDst -Pointer to filtered output macroblock. + * + * Return Value: + * + * OMX_Sts_NoErr, if the function runs without error. + * + * OMX_Sts_BadArgErr, if one of the following cases occurs: + * - any of the following pointers is NULL: + * pSrcDst, pAlpha, pBeta, pThresholds, or pBS. + * - pSrcDst is not 8-byte aligned. + * - srcdstStep is not a multiple of 8. + * - pThresholds is not 4-byte aligned. + * - pAlpha[0] and/or pAlpha[1] is outside the range [0,255]. + * - pBeta[0] and/or pBeta[1] is outside the range [0,18]. + * - One or more entries in the table pThresholds[0..7] is outside + * of the range [0,25]. + * - pBS is out of range, i.e., one of the following conditions is true: + * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or + * (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3. + * - pBS is not 4-byte aligned. + * + */ +OMXResult omxVCM4P10_FilterDeblockingChroma_HorEdge_I ( + OMX_U8 *pSrcDst, + OMX_S32 srcdstStep, + const OMX_U8 *pAlpha, + const OMX_U8 *pBeta, + const OMX_U8 *pThresholds, + const OMX_U8 *pBS +); + + + +/** + * Function: omxVCM4P10_DeblockLuma_I (6.3.3.3.5) + * + * Description: + * This function performs in-place deblock filtering the horizontal and + * vertical edges of a luma macroblock (16x16). + * + * Input Arguments: + * + * pSrcDst - pointer to the input macroblock; must be 16-byte aligned. + * srcdstStep - image width; must be a multiple of 16. + * pAlpha - pointer to a 2x2 table of alpha thresholds, organized as + * follows: {external vertical edge, internal vertical edge, + * external horizontal edge, internal horizontal edge }. Per + * [ISO14496-10] alpha values must be in the range [0,255]. + * pBeta - pointer to a 2x2 table of beta thresholds, organized as follows: + * {external vertical edge, internal vertical edge, external + * horizontal edge, internal horizontal edge }. Per [ISO14496-10] + * beta values must be in the range [0,18]. + * pThresholds - pointer to a 16x2 table of threshold (TC0), organized as + * follows: {values for the left or above edge of each 4x4 block, + * arranged in vertical block order and then in horizontal block + * order}; must be aligned on a 4-byte boundary. Per [ISO14496-10] + * values must be in the range [0,25]. + * pBS - pointer to a 16x2 table of BS parameters arranged in scan block + * order for vertical edges and then horizontal edges; valid in the + * range [0,4] with the following restrictions: i) pBS[i]== 4 may + * occur only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^3]== + * 4. Must be 4-byte aligned. + * + * Output Arguments: + * + * pSrcDst - pointer to filtered output macroblock. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments + * - one or more of the following pointers is NULL: pSrcDst, pAlpha, + * pBeta, pThresholds or pBS. + * - pSrcDst is not 16-byte aligned. + * - either pThresholds or pBS is not aligned on a 4-byte boundary. + * - one or more entries in the table pAlpha[0..3] is outside the range + * [0,255]. + * - one or more entries in the table pBeta[0..3] is outside the range + * [0,18]. + * - one or more entries in the table pThresholds[0..31]is outside of + * the range [0,25]. + * - pBS is out of range, i.e., one of the following conditions is true: + * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or + * (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3. + * - srcdstStep is not a multiple of 16. + * + */ +OMXResult omxVCM4P10_DeblockLuma_I ( + OMX_U8 *pSrcDst, + OMX_S32 srcdstStep, + const OMX_U8 *pAlpha, + const OMX_U8 *pBeta, + const OMX_U8 *pThresholds, + const OMX_U8 *pBS +); + + + +/** + * Function: omxVCM4P10_DeblockChroma_I (6.3.3.3.6) + * + * Description: + * Performs in-place deblocking filtering on all edges of the chroma + * macroblock (16x16). + * + * Input Arguments: + * + * pSrcDst - pointer to the input macroblock; must be 8-byte aligned. + * srcdstStep - step of the arrays; must be a multiple of 8. + * pAlpha - pointer to a 2x2 array of alpha thresholds, organized as + * follows: {external vertical edge, internal vertical edge, + * external horizontal edge, internal horizontal edge }. Per + * [ISO14496-10] alpha values must be in the range [0,255]. + * pBeta - pointer to a 2x2 array of Beta Thresholds, organized as follows: + * { external vertical edge, internal vertical edge, external + * horizontal edge, internal horizontal edge }. Per [ISO14496-10] + * beta values must be in the range [0,18]. + * pThresholds - array of size 8x2 of Thresholds (TC0) (values for the left + * or above edge of each 4x2 or 2x4 block, arranged in vertical + * block order and then in horizontal block order); must be aligned + * on a 4-byte boundary. Per [ISO14496-10] values must be in the + * range [0,25]. + * pBS - array of size 16x2 of BS parameters (arranged in scan block order + * for vertical edges and then horizontal edges); valid in the + * range [0,4] with the following restrictions: i) pBS[i]== 4 may + * occur only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^3]== + * 4. Must be 4-byte aligned. + * + * Output Arguments: + * + * pSrcDst - pointer to filtered output macroblock. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments + * - one or more of the following pointers is NULL: pSrcDst, pAlpha, + * pBeta, pThresholds, or pBS. + * - pSrcDst is not 8-byte aligned. + * - either pThresholds or pBS is not 4-byte aligned. + * - one or more entries in the table pAlpha[0..3] is outside the range + * [0,255]. + * - one or more entries in the table pBeta[0..3] is outside the range + * [0,18]. + * - one or more entries in the table pThresholds[0..15]is outside of + * the range [0,25]. + * - pBS is out of range, i.e., one of the following conditions is true: + * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or + * (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3. + * - srcdstStep is not a multiple of 8. + * + */ +OMXResult omxVCM4P10_DeblockChroma_I ( + OMX_U8 *pSrcDst, + OMX_S32 srcdstStep, + const OMX_U8 *pAlpha, + const OMX_U8 *pBeta, + const OMX_U8 *pThresholds, + const OMX_U8 *pBS +); + + + +/** + * Function: omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC (6.3.4.1.1) + * + * Description: + * Performs CAVLC decoding and inverse raster scan for a 2x2 block of + * ChromaDCLevel. The decoded coefficients in the packed position-coefficient + * buffer are stored in reverse zig-zag order, i.e., the first buffer element + * contains the last non-zero postion-coefficient pair of the block. Within + * each position-coefficient pair, the position entry indicates the + * raster-scan position of the coefficient, while the coefficient entry + * contains the coefficient value. + * + * Input Arguments: + * + * ppBitStream - Double pointer to current byte in bit stream buffer + * pOffset - Pointer to current bit position in the byte pointed to by + * *ppBitStream; valid in the range [0,7]. + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after each block is decoded + * pOffset - *pOffset is updated after each block is decoded + * pNumCoeff - Pointer to the number of nonzero coefficients in this block + * ppPosCoefBuf - Double pointer to destination residual + * coefficient-position pair buffer. Buffer position + * (*ppPosCoefBuf) is updated upon return, unless there are only + * zero coefficients in the currently decoded block. In this case + * the caller is expected to bypass the transform/dequantization of + * the empty blocks. + * + * Return Value: + * + * OMX_Sts_NoErr, if the function runs without error. + * + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - ppBitStream or pOffset is NULL. + * - ppPosCoefBuf or pNumCoeff is NULL. + * OMX_Sts_Err - if one of the following is true: + * - an illegal code is encountered in the bitstream + * + */ +OMXResult omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC ( + const OMX_U8 **ppBitStream, + OMX_S32*pOffset, + OMX_U8 *pNumCoeff, + OMX_U8 **ppPosCoefbuf +); + + + +/** + * Function: omxVCM4P10_DecodeCoeffsToPairCAVLC (6.3.4.1.2) + * + * Description: + * Performs CAVLC decoding and inverse zigzag scan for 4x4 block of + * Intra16x16DCLevel, Intra16x16ACLevel, LumaLevel, and ChromaACLevel. Inverse + * field scan is not supported. The decoded coefficients in the packed + * position-coefficient buffer are stored in reverse zig-zag order, i.e., the + * first buffer element contains the last non-zero postion-coefficient pair of + * the block. Within each position-coefficient pair, the position entry + * indicates the raster-scan position of the coefficient, while the + * coefficient entry contains the coefficient value. + * + * Input Arguments: + * + * ppBitStream -Double pointer to current byte in bit stream buffer + * pOffset - Pointer to current bit position in the byte pointed to by + * *ppBitStream; valid in the range [0,7]. + * sMaxNumCoeff - Maximum the number of non-zero coefficients in current + * block + * sVLCSelect - VLC table selector, obtained from the number of non-zero + * coefficients contained in the above and left 4x4 blocks. It is + * equivalent to the variable nC described in H.264 standard table + * 9 5, except its value can t be less than zero. + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after each block is decoded. + * Buffer position (*ppPosCoefBuf) is updated upon return, unless + * there are only zero coefficients in the currently decoded block. + * In this case the caller is expected to bypass the + * transform/dequantization of the empty blocks. + * pOffset - *pOffset is updated after each block is decoded + * pNumCoeff - Pointer to the number of nonzero coefficients in this block + * ppPosCoefBuf - Double pointer to destination residual + * coefficient-position pair buffer + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - ppBitStream or pOffset is NULL. + * - ppPosCoefBuf or pNumCoeff is NULL. + * - sMaxNumCoeff is not equal to either 15 or 16. + * - sVLCSelect is less than 0. + * + * OMX_Sts_Err - if one of the following is true: + * - an illegal code is encountered in the bitstream + * + */ +OMXResult omxVCM4P10_DecodeCoeffsToPairCAVLC ( + const OMX_U8 **ppBitStream, + OMX_S32 *pOffset, + OMX_U8 *pNumCoeff, + OMX_U8 **ppPosCoefbuf, + OMX_INT sVLCSelect, + OMX_INT sMaxNumCoeff +); + + + +/** + * Function: omxVCM4P10_TransformDequantLumaDCFromPair (6.3.4.2.1) + * + * Description: + * Reconstructs the 4x4 LumaDC block from the coefficient-position pair + * buffer, performs integer inverse, and dequantization for 4x4 LumaDC + * coefficients, and updates the pair buffer pointer to the next non-empty + * block. + * + * Input Arguments: + * + * ppSrc - Double pointer to residual coefficient-position pair buffer + * output by CALVC decoding + * QP - Quantization parameter QpY + * + * Output Arguments: + * + * ppSrc - *ppSrc is updated to the start of next non empty block + * pDst - Pointer to the reconstructed 4x4 LumaDC coefficients buffer; must + * be aligned on a 8-byte boundary. + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - ppSrc or pDst is NULL. + * - pDst is not 8 byte aligned. + * - QP is not in the range of [0-51]. + * + */ +OMXResult omxVCM4P10_TransformDequantLumaDCFromPair ( + const OMX_U8 **ppSrc, + OMX_S16 *pDst, + OMX_INT QP +); + + + +/** + * Function: omxVCM4P10_TransformDequantChromaDCFromPair (6.3.4.2.2) + * + * Description: + * Reconstruct the 2x2 ChromaDC block from coefficient-position pair buffer, + * perform integer inverse transformation, and dequantization for 2x2 chroma + * DC coefficients, and update the pair buffer pointer to next non-empty + * block. + * + * Input Arguments: + * + * ppSrc - Double pointer to residual coefficient-position pair buffer + * output by CALVC decoding + * QP - Quantization parameter QpC + * + * Output Arguments: + * + * ppSrc - *ppSrc is updated to the start of next non empty block + * pDst - Pointer to the reconstructed 2x2 ChromaDC coefficients buffer; + * must be aligned on a 4-byte boundary. + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - ppSrc or pDst is NULL. + * - pDst is not 4-byte aligned. + * - QP is not in the range of [0-51]. + * + */ +OMXResult omxVCM4P10_TransformDequantChromaDCFromPair ( + const OMX_U8 **ppSrc, + OMX_S16 *pDst, + OMX_INT QP +); + + + +/** + * Function: omxVCM4P10_DequantTransformResidualFromPairAndAdd (6.3.4.2.3) + * + * Description: + * Reconstruct the 4x4 residual block from coefficient-position pair buffer, + * perform dequantization and integer inverse transformation for 4x4 block of + * residuals with previous intra prediction or motion compensation data, and + * update the pair buffer pointer to next non-empty block. If pDC == NULL, + * there re 16 non-zero AC coefficients at most in the packed buffer starting + * from 4x4 block position 0; If pDC != NULL, there re 15 non-zero AC + * coefficients at most in the packet buffer starting from 4x4 block position + * 1. + * + * Input Arguments: + * + * ppSrc - Double pointer to residual coefficient-position pair buffer + * output by CALVC decoding + * pPred - Pointer to the predicted 4x4 block; must be aligned on a 4-byte + * boundary + * predStep - Predicted frame step size in bytes; must be a multiple of 4 + * dstStep - Destination frame step in bytes; must be a multiple of 4 + * pDC - Pointer to the DC coefficient of this block, NULL if it doesn't + * exist + * QP - QP Quantization parameter. It should be QpC in chroma 4x4 block + * decoding, otherwise it should be QpY. + * AC - Flag indicating if at least one non-zero AC coefficient exists + * + * Output Arguments: + * + * pDst - pointer to the reconstructed 4x4 block data; must be aligned on a + * 4-byte boundary + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - pPred or pDst is NULL. + * - pPred or pDst is not 4-byte aligned. + * - predStep or dstStep is not a multiple of 4. + * - AC !=0 and Qp is not in the range of [0-51] or ppSrc == NULL. + * - AC ==0 && pDC ==NULL. + * + */ +OMXResult omxVCM4P10_DequantTransformResidualFromPairAndAdd ( + const OMX_U8 **ppSrc, + const OMX_U8 *pPred, + const OMX_S16 *pDC, + OMX_U8 *pDst, + OMX_INT predStep, + OMX_INT dstStep, + OMX_INT QP, + OMX_INT AC +); + + + +/** + * Function: omxVCM4P10_MEGetBufSize (6.3.5.1.1) + * + * Description: + * Computes the size, in bytes, of the vendor-specific specification + * structure for the omxVCM4P10 motion estimation functions BlockMatch_Integer + * and MotionEstimationMB. + * + * Input Arguments: + * + * MEmode - motion estimation mode; available modes are defined by the + * enumerated type OMXVCM4P10MEMode + * pMEParams -motion estimation parameters + * + * Output Arguments: + * + * pSize - pointer to the number of bytes required for the motion + * estimation specification structure + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - pMEParams or pSize is NULL. + * - an invalid MEMode is specified. + * + */ +OMXResult omxVCM4P10_MEGetBufSize ( + OMXVCM4P10MEMode MEmode, + const OMXVCM4P10MEParams *pMEParams, + OMX_U32 *pSize +); + + + +/** + * Function: omxVCM4P10_MEInit (6.3.5.1.2) + * + * Description: + * Initializes the vendor-specific specification structure required for the + * omxVCM4P10 motion estimation functions: BlockMatch_Integer and + * MotionEstimationMB. Memory for the specification structure *pMESpec must be + * allocated prior to calling the function, and should be aligned on a 4-byte + * boundary. The number of bytes required for the specification structure can + * be determined using the function omxVCM4P10_MEGetBufSize. Following + * initialization by this function, the vendor-specific structure *pMESpec + * should contain an implementation-specific representation of all motion + * estimation parameters received via the structure pMEParams, for example + * searchRange16x16, searchRange8x8, etc. + * + * Input Arguments: + * + * MEmode - motion estimation mode; available modes are defined by the + * enumerated type OMXVCM4P10MEMode + * pMEParams - motion estimation parameters + * pMESpec - pointer to the uninitialized ME specification structure + * + * Output Arguments: + * + * pMESpec - pointer to the initialized ME specification structure + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - pMEParams or pSize is NULL. + * - an invalid value was specified for the parameter MEmode + * - a negative or zero value was specified for one of the search ranges + * (e.g., pMBParams >searchRange8x8, pMEParams->searchRange16x16, etc.) + * - either in isolation or in combination, one or more of the enables or + * search ranges in the structure *pMEParams were configured such + * that the requested behavior fails to comply with [ISO14496-10]. + * + */ +OMXResult omxVCM4P10_MEInit ( + OMXVCM4P10MEMode MEmode, + const OMXVCM4P10MEParams *pMEParams, + void *pMESpec +); + + + +/** + * Function: omxVCM4P10_BlockMatch_Integer (6.3.5.2.1) + * + * Description: + * Performs integer block match. Returns best MV and associated cost. + * + * Input Arguments: + * + * pSrcOrgY - Pointer to the top-left corner of the current block: + * If iBlockWidth==4, 4-byte alignment required. + * If iBlockWidth==8, 8-byte alignment required. + * If iBlockWidth==16, 16-byte alignment required. + * pSrcRefY - Pointer to the top-left corner of the co-located block in the + * reference picture: + * If iBlockWidth==4, 4-byte alignment required. + * If iBlockWidth==8, 8-byte alignment required. + * If iBlockWidth==16, 16-byte alignment required. + * nSrcOrgStep - Stride of the original picture plane, expressed in terms + * of integer pixels; must be a multiple of iBlockWidth. + * nSrcRefStep - Stride of the reference picture plane, expressed in terms + * of integer pixels + * pRefRect - pointer to the valid reference rectangle inside the reference + * picture plane + * nCurrPointPos - position of the current block in the current plane + * iBlockWidth - Width of the current block, expressed in terms of integer + * pixels; must be equal to either 4, 8, or 16. + * iBlockHeight - Height of the current block, expressed in terms of + * integer pixels; must be equal to either 4, 8, or 16. + * nLamda - Lamda factor; used to compute motion cost + * pMVPred - Predicted MV; used to compute motion cost, expressed in terms + * of 1/4-pel units + * pMVCandidate - Candidate MV; used to initialize the motion search, + * expressed in terms of integer pixels + * pMESpec - pointer to the ME specification structure + * + * Output Arguments: + * + * pDstBestMV - Best MV resulting from integer search, expressed in terms + * of 1/4-pel units + * pBestCost - Motion cost associated with the best MV; computed as + * SAD+Lamda*BitsUsedByMV + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - any of the following pointers are NULL: + * pSrcOrgY, pSrcRefY, pRefRect, pMVPred, pMVCandidate, or pMESpec. + * - Either iBlockWidth or iBlockHeight are values other than 4, 8, or 16. + * - Any alignment restrictions are violated + * + */ +OMXResult omxVCM4P10_BlockMatch_Integer ( + const OMX_U8 *pSrcOrgY, + OMX_S32 nSrcOrgStep, + const OMX_U8 *pSrcRefY, + OMX_S32 nSrcRefStep, + const OMXRect *pRefRect, + const OMXVCM4P2Coordinate *pCurrPointPos, + OMX_U8 iBlockWidth, + OMX_U8 iBlockHeight, + OMX_U32 nLamda, + const OMXVCMotionVector *pMVPred, + const OMXVCMotionVector *pMVCandidate, + OMXVCMotionVector *pBestMV, + OMX_S32 *pBestCost, + void *pMESpec +); + + + +/** + * Function: omxVCM4P10_BlockMatch_Half (6.3.5.2.2) + * + * Description: + * Performs a half-pel block match using results from a prior integer search. + * Returns the best MV and associated cost. This function estimates the + * half-pixel motion vector by interpolating the integer resolution motion + * vector referenced by the input parameter pSrcDstBestMV, i.e., the initial + * integer MV is generated externally. The function + * omxVCM4P10_BlockMatch_Integer may be used for integer motion estimation. + * + * Input Arguments: + * + * pSrcOrgY - Pointer to the current position in original picture plane: + * If iBlockWidth==4, 4-byte alignment required. + * If iBlockWidth==8, 8-byte alignment required. + * If iBlockWidth==16, 16-byte alignment required. + * pSrcRefY - Pointer to the top-left corner of the co-located block in the + * reference picture: + * If iBlockWidth==4, 4-byte alignment required. + * If iBlockWidth==8, 8-byte alignment required. + * If iBlockWidth==16, 16-byte alignment required. + * nSrcOrgStep - Stride of the original picture plane in terms of full + * pixels; must be a multiple of iBlockWidth. + * nSrcRefStep - Stride of the reference picture plane in terms of full + * pixels + * iBlockWidth - Width of the current block in terms of full pixels; must + * be equal to either 4, 8, or 16. + * iBlockHeight - Height of the current block in terms of full pixels; must + * be equal to either 4, 8, or 16. + * nLamda - Lamda factor, used to compute motion cost + * pMVPred - Predicted MV, represented in terms of 1/4-pel units; used to + * compute motion cost + * pSrcDstBestMV - The best MV resulting from a prior integer search, + * represented in terms of 1/4-pel units + * + * Output Arguments: + * + * pSrcDstBestMV - Best MV resulting from the half-pel search, expressed in + * terms of 1/4-pel units + * pBestCost - Motion cost associated with the best MV; computed as + * SAD+Lamda*BitsUsedByMV + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - any of the following pointers is NULL: pSrcOrgY, pSrcRefY, + * pSrcDstBestMV, pMVPred, pBestCost + * - iBlockWidth or iBlockHeight are equal to values other than 4, 8, or 16. + * - Any alignment restrictions are violated + * + */ +OMXResult omxVCM4P10_BlockMatch_Half ( + const OMX_U8 *pSrcOrgY, + OMX_S32 nSrcOrgStep, + const OMX_U8 *pSrcRefY, + OMX_S32 nSrcRefStep, + OMX_U8 iBlockWidth, + OMX_U8 iBlockHeight, + OMX_U32 nLamda, + const OMXVCMotionVector *pMVPred, + OMXVCMotionVector *pSrcDstBestMV, + OMX_S32 *pBestCost +); + + + +/** + * Function: omxVCM4P10_BlockMatch_Quarter (6.3.5.2.3) + * + * Description: + * Performs a quarter-pel block match using results from a prior half-pel + * search. Returns the best MV and associated cost. This function estimates + * the quarter-pixel motion vector by interpolating the half-pel resolution + * motion vector referenced by the input parameter pSrcDstBestMV, i.e., the + * initial half-pel MV is generated externally. The function + * omxVCM4P10_BlockMatch_Half may be used for half-pel motion estimation. + * + * Input Arguments: + * + * pSrcOrgY - Pointer to the current position in original picture plane: + * If iBlockWidth==4, 4-byte alignment required. + * If iBlockWidth==8, 8-byte alignment required. + * If iBlockWidth==16, 16-byte alignment required. + * pSrcRefY - Pointer to the top-left corner of the co-located block in the + * reference picture: + * If iBlockWidth==4, 4-byte alignment required. + * If iBlockWidth==8, 8-byte alignment required. + * If iBlockWidth==16, 16-byte alignment required. + * nSrcOrgStep - Stride of the original picture plane in terms of full + * pixels; must be a multiple of iBlockWidth. + * nSrcRefStep - Stride of the reference picture plane in terms of full + * pixels + * iBlockWidth - Width of the current block in terms of full pixels; must + * be equal to either 4, 8, or 16. + * iBlockHeight - Height of the current block in terms of full pixels; must + * be equal to either 4, 8, or 16. + * nLamda - Lamda factor, used to compute motion cost + * pMVPred - Predicted MV, represented in terms of 1/4-pel units; used to + * compute motion cost + * pSrcDstBestMV - The best MV resulting from a prior half-pel search, + * represented in terms of 1/4 pel units + * + * Output Arguments: + * + * pSrcDstBestMV - Best MV resulting from the quarter-pel search, expressed + * in terms of 1/4-pel units + * pBestCost - Motion cost associated with the best MV; computed as + * SAD+Lamda*BitsUsedByMV + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - One or more of the following pointers is NULL: + * pSrcOrgY, pSrcRefY, pSrcDstBestMV, pMVPred, pBestCost + * - iBlockWidth or iBlockHeight are equal to values other than 4, 8, or 16. + * - Any alignment restrictions are violated + * + */ +OMXResult omxVCM4P10_BlockMatch_Quarter ( + const OMX_U8 *pSrcOrgY, + OMX_S32 nSrcOrgStep, + const OMX_U8 *pSrcRefY, + OMX_S32 nSrcRefStep, + OMX_U8 iBlockWidth, + OMX_U8 iBlockHeight, + OMX_U32 nLamda, + const OMXVCMotionVector *pMVPred, + OMXVCMotionVector *pSrcDstBestMV, + OMX_S32 *pBestCost +); + + + +/** + * Function: omxVCM4P10_MotionEstimationMB (6.3.5.3.1) + * + * Description: + * Performs MB-level motion estimation and selects best motion estimation + * strategy from the set of modes supported in baseline profile [ISO14496-10]. + * + * Input Arguments: + * + * pSrcCurrBuf - Pointer to the current position in original picture plane; + * 16-byte alignment required + * pSrcRefBufList - Pointer to an array with 16 entries. Each entry points + * to the top-left corner of the co-located MB in a reference + * picture. The array is filled from low-to-high with valid + * reference frame pointers; the unused high entries should be set + * to NULL. Ordering of the reference frames should follow + * [ISO14496-10] subclause 8.2.4 Decoding Process for Reference + * Picture Lists. The entries must be 16-byte aligned. + * pSrcRecBuf - Pointer to the top-left corner of the co-located MB in the + * reconstructed picture; must be 16-byte aligned. + * SrcCurrStep - Width of the original picture plane in terms of full + * pixels; must be a multiple of 16. + * SrcRefStep - Width of the reference picture plane in terms of full + * pixels; must be a multiple of 16. + * SrcRecStep - Width of the reconstructed picture plane in terms of full + * pixels; must be a multiple of 16. + * pRefRect - Pointer to the valid reference rectangle; relative to the + * image origin. + * pCurrPointPos - Position of the current macroblock in the current plane. + * Lambda - Lagrange factor for computing the cost function + * pMESpec - Pointer to the motion estimation specification structure; must + * have been allocated and initialized prior to calling this + * function. + * pMBInter - Array, of dimension four, containing pointers to information + * associated with four adjacent type INTER MBs (Left, Top, + * Top-Left, Top-Right). Any pointer in the array may be set equal + * to NULL if the corresponding MB doesn t exist or is not of type + * INTER. + * - pMBInter[0] - Pointer to left MB information + * - pMBInter[1] - Pointer to top MB information + * - pMBInter[2] - Pointer to top-left MB information + * - pMBInter[3] - Pointer to top-right MB information + * pMBIntra - Array, of dimension four, containing pointers to information + * associated with four adjacent type INTRA MBs (Left, Top, + * Top-Left, Top-Right). Any pointer in the array may be set equal + * to NULL if the corresponding MB doesn t exist or is not of type + * INTRA. + * - pMBIntra[0] - Pointer to left MB information + * - pMBIntra[1] - Pointer to top MB information + * - pMBIntra[2] - Pointer to top-left MB information + * - pMBIntra[3] - Pointer to top-right MB information + * pSrcDstMBCurr - Pointer to information structure for the current MB. + * The following entries should be set prior to calling the + * function: sliceID - the number of the slice the to which the + * current MB belongs. + * + * Output Arguments: + * + * pDstCost - Pointer to the minimum motion cost for the current MB. + * pDstBlockSAD - Pointer to the array of SADs for each of the sixteen luma + * 4x4 blocks in each MB. The block SADs are in scan order for + * each MB. For implementations that cannot compute the SAD values + * individually, the maximum possible value (0xffff) is returned + * for each of the 16 block SAD entries. + * pSrcDstMBCurr - Pointer to updated information structure for the current + * MB after MB-level motion estimation has been completed. The + * following fields are updated by the ME function. The following + * parameter set quantifies the MB-level ME search results: + * - MbType + * - subMBType[4] + * - pMV0[4][4] + * - pMVPred[4][4] + * - pRefL0Idx[4] + * - Intra16x16PredMode + * - pIntra4x4PredMode[4][4] + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - One or more of the following pointers is NULL: pSrcCurrBuf, + * pSrcRefBufList, pSrcRecBuf, pRefRect, pCurrPointPos, pMESpec, + * pMBInter, pMBIntra,pSrcDstMBCurr, pDstCost, pSrcRefBufList[0] + * - SrcRefStep, SrcRecStep are not multiples of 16 + * - iBlockWidth or iBlockHeight are values other than 4, 8, or 16. + * - Any alignment restrictions are violated + * + */ +OMXResult omxVCM4P10_MotionEstimationMB ( + const OMX_U8 *pSrcCurrBuf, + OMX_S32 SrcCurrStep, + const OMX_U8 *pSrcRefBufList[15], + OMX_S32 SrcRefStep, + const OMX_U8 *pSrcRecBuf, + OMX_S32 SrcRecStep, + const OMXRect *pRefRect, + const OMXVCM4P2Coordinate *pCurrPointPos, + OMX_U32 Lambda, + void *pMESpec, + const OMXVCM4P10MBInfoPtr *pMBInter, + const OMXVCM4P10MBInfoPtr *pMBIntra, + OMXVCM4P10MBInfoPtr pSrcDstMBCurr, + OMX_INT *pDstCost, + OMX_U16 *pDstBlockSAD +); + + + +/** + * Function: omxVCM4P10_SAD_4x (6.3.5.4.1) + * + * Description: + * This function calculates the SAD for 4x8 and 4x4 blocks. + * + * Input Arguments: + * + * pSrcOrg -Pointer to the original block; must be aligned on a 4-byte + * boundary. + * iStepOrg -Step of the original block buffer; must be a multiple of 4. + * pSrcRef -Pointer to the reference block + * iStepRef -Step of the reference block buffer + * iHeight -Height of the block; must be equal to either 4 or 8. + * + * Output Arguments: + * + * pDstSAD -Pointer of result SAD + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - One or more of the following pointers is NULL: + * pSrcOrg, pSrcRef, or pDstSAD + * - iHeight is not equal to either 4 or 8. + * - iStepOrg is not a multiple of 4 + * - Any alignment restrictions are violated + * + */ +OMXResult omxVCM4P10_SAD_4x ( + const OMX_U8 *pSrcOrg, + OMX_U32 iStepOrg, + const OMX_U8 *pSrcRef, + OMX_U32 iStepRef, + OMX_S32 *pDstSAD, + OMX_U32 iHeight +); + + + +/** + * Function: omxVCM4P10_SADQuar_4x (6.3.5.4.2) + * + * Description: + * This function calculates the SAD between one block (pSrc) and the average + * of the other two (pSrcRef0 and pSrcRef1) for 4x8 or 4x4 blocks. Rounding + * is applied according to the convention (a+b+1)>>1. + * + * Input Arguments: + * + * pSrc - Pointer to the original block; must be aligned on a 4-byte + * boundary. + * pSrcRef0 - Pointer to reference block 0 + * pSrcRef1 - Pointer to reference block 1 + * iSrcStep - Step of the original block buffer; must be a multiple of 4. + * iRefStep0 - Step of reference block 0 + * iRefStep1 - Step of reference block 1 + * iHeight - Height of the block; must be equal to either 4 or 8. + * + * Output Arguments: + * + * pDstSAD - Pointer of result SAD + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - iHeight is not equal to either 4 or 8. + * - One or more of the following pointers is NULL: pSrc, pSrcRef0, + * pSrcRef1, pDstSAD. + * - iSrcStep is not a multiple of 4 + * - Any alignment restrictions are violated + * + */ +OMXResult omxVCM4P10_SADQuar_4x ( + const OMX_U8 *pSrc, + const OMX_U8 *pSrcRef0, + const OMX_U8 *pSrcRef1, + OMX_U32 iSrcStep, + OMX_U32 iRefStep0, + OMX_U32 iRefStep1, + OMX_U32 *pDstSAD, + OMX_U32 iHeight +); + + + +/** + * Function: omxVCM4P10_SADQuar_8x (6.3.5.4.3) + * + * Description: + * This function calculates the SAD between one block (pSrc) and the average + * of the other two (pSrcRef0 and pSrcRef1) for 8x16, 8x8, or 8x4 blocks. + * Rounding is applied according to the convention (a+b+1)>>1. + * + * Input Arguments: + * + * pSrc - Pointer to the original block; must be aligned on an 8-byte + * boundary. + * pSrcRef0 - Pointer to reference block 0 + * pSrcRef1 - Pointer to reference block 1 + * iSrcStep - Step of the original block buffer; must be a multiple of 8. + * iRefStep0 - Step of reference block 0 + * iRefStep1 - Step of reference block 1 + * iHeight - Height of the block; must be equal either 4, 8, or 16. + * + * Output Arguments: + * + * pDstSAD - Pointer of result SAD + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - iHeight is not equal to either 4, 8, or 16. + * - One or more of the following pointers is NULL: pSrc, pSrcRef0, + * pSrcRef1, pDstSAD. + * - iSrcStep is not a multiple of 8 + * - Any alignment restrictions are violated + * + */ +OMXResult omxVCM4P10_SADQuar_8x ( + const OMX_U8 *pSrc, + const OMX_U8 *pSrcRef0, + const OMX_U8 *pSrcRef1, + OMX_U32 iSrcStep, + OMX_U32 iRefStep0, + OMX_U32 iRefStep1, + OMX_U32 *pDstSAD, + OMX_U32 iHeight +); + + + +/** + * Function: omxVCM4P10_SADQuar_16x (6.3.5.4.4) + * + * Description: + * This function calculates the SAD between one block (pSrc) and the average + * of the other two (pSrcRef0 and pSrcRef1) for 16x16 or 16x8 blocks. + * Rounding is applied according to the convention (a+b+1)>>1. + * + * Input Arguments: + * + * pSrc - Pointer to the original block; must be aligned on a 16-byte + * boundary. + * pSrcRef0 - Pointer to reference block 0 + * pSrcRef1 - Pointer to reference block 1 + * iSrcStep - Step of the original block buffer; must be a multiple of 16 + * iRefStep0 - Step of reference block 0 + * iRefStep1 - Step of reference block 1 + * iHeight - Height of the block; must be equal to either 8 or 16 + * + * Output Arguments: + * + * pDstSAD -Pointer of result SAD + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - iHeight is not equal to either 8 or 16. + * - One or more of the following pointers is NULL: pSrc, pSrcRef0, + * pSrcRef1, pDstSAD. + * - iSrcStep is not a multiple of 16 + * - Any alignment restrictions are violated + * + */ +OMXResult omxVCM4P10_SADQuar_16x ( + const OMX_U8 *pSrc, + const OMX_U8 *pSrcRef0, + const OMX_U8 *pSrcRef1, + OMX_U32 iSrcStep, + OMX_U32 iRefStep0, + OMX_U32 iRefStep1, + OMX_U32 *pDstSAD, + OMX_U32 iHeight +); + + + +/** + * Function: omxVCM4P10_SATD_4x4 (6.3.5.4.5) + * + * Description: + * This function calculates the sum of absolute transform differences (SATD) + * for a 4x4 block by applying a Hadamard transform to the difference block + * and then calculating the sum of absolute coefficient values. + * + * Input Arguments: + * + * pSrcOrg - Pointer to the original block; must be aligned on a 4-byte + * boundary + * iStepOrg - Step of the original block buffer; must be a multiple of 4 + * pSrcRef - Pointer to the reference block; must be aligned on a 4-byte + * boundary + * iStepRef - Step of the reference block buffer; must be a multiple of 4 + * + * Output Arguments: + * + * pDstSAD - pointer to the resulting SAD + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: + * pSrcOrg, pSrcRef, or pDstSAD either pSrcOrg + * - pSrcRef is not aligned on a 4-byte boundary + * - iStepOrg <= 0 or iStepOrg is not a multiple of 4 + * - iStepRef <= 0 or iStepRef is not a multiple of 4 + * + */ +OMXResult omxVCM4P10_SATD_4x4 ( + const OMX_U8 *pSrcOrg, + OMX_U32 iStepOrg, + const OMX_U8 *pSrcRef, + OMX_U32 iStepRef, + OMX_U32 *pDstSAD +); + + + +/** + * Function: omxVCM4P10_InterpolateHalfHor_Luma (6.3.5.5.1) + * + * Description: + * This function performs interpolation for two horizontal 1/2-pel positions + * (-1/2,0) and (1/2, 0) - around a full-pel position. + * + * Input Arguments: + * + * pSrc - Pointer to the top-left corner of the block used to interpolate in + * the reconstruction frame plane. + * iSrcStep - Step of the source buffer. + * iDstStep - Step of the destination(interpolation) buffer; must be a + * multiple of iWidth. + * iWidth - Width of the current block; must be equal to either 4, 8, or 16 + * iHeight - Height of the current block; must be equal to 4, 8, or 16 + * + * Output Arguments: + * + * pDstLeft -Pointer to the interpolation buffer of the left -pel position + * (-1/2, 0) + * If iWidth==4, 4-byte alignment required. + * If iWidth==8, 8-byte alignment required. + * If iWidth==16, 16-byte alignment required. + * pDstRight -Pointer to the interpolation buffer of the right -pel + * position (1/2, 0) + * If iWidth==4, 4-byte alignment required. + * If iWidth==8, 8-byte alignment required. + * If iWidth==16, 16-byte alignment required. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: + * pSrc, pDstLeft, or pDstRight + * - iWidth or iHeight have values other than 4, 8, or 16 + * - iWidth==4 but pDstLeft and/or pDstRight is/are not aligned on a 4-byte boundary + * - iWidth==8 but pDstLeft and/or pDstRight is/are not aligned on a 8-byte boundary + * - iWidth==16 but pDstLeft and/or pDstRight is/are not aligned on a 16-byte boundary + * - any alignment restrictions are violated + * + */ +OMXResult omxVCM4P10_InterpolateHalfHor_Luma ( + const OMX_U8 *pSrc, + OMX_U32 iSrcStep, + OMX_U8 *pDstLeft, + OMX_U8 *pDstRight, + OMX_U32 iDstStep, + OMX_U32 iWidth, + OMX_U32 iHeight +); + + + +/** + * Function: omxVCM4P10_InterpolateHalfVer_Luma (6.3.5.5.2) + * + * Description: + * This function performs interpolation for two vertical 1/2-pel positions - + * (0, -1/2) and (0, 1/2) - around a full-pel position. + * + * Input Arguments: + * + * pSrc - Pointer to top-left corner of block used to interpolate in the + * reconstructed frame plane + * iSrcStep - Step of the source buffer. + * iDstStep - Step of the destination (interpolation) buffer; must be a + * multiple of iWidth. + * iWidth - Width of the current block; must be equal to either 4, 8, or 16 + * iHeight - Height of the current block; must be equal to either 4, 8, or 16 + * + * Output Arguments: + * + * pDstUp -Pointer to the interpolation buffer of the -pel position above + * the current full-pel position (0, -1/2) + * If iWidth==4, 4-byte alignment required. + * If iWidth==8, 8-byte alignment required. + * If iWidth==16, 16-byte alignment required. + * pDstDown -Pointer to the interpolation buffer of the -pel position below + * the current full-pel position (0, 1/2) + * If iWidth==4, 4-byte alignment required. + * If iWidth==8, 8-byte alignment required. + * If iWidth==16, 16-byte alignment required. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: + * pSrc, pDstUp, or pDstDown + * - iWidth or iHeight have values other than 4, 8, or 16 + * - iWidth==4 but pDstUp and/or pDstDown is/are not aligned on a 4-byte boundary + * - iWidth==8 but pDstUp and/or pDstDown is/are not aligned on a 8-byte boundary + * - iWidth==16 but pDstUp and/or pDstDown is/are not aligned on a 16-byte boundary + * + */ +OMXResult omxVCM4P10_InterpolateHalfVer_Luma ( + const OMX_U8 *pSrc, + OMX_U32 iSrcStep, + OMX_U8 *pDstUp, + OMX_U8 *pDstDown, + OMX_U32 iDstStep, + OMX_U32 iWidth, + OMX_U32 iHeight +); + + + +/** + * Function: omxVCM4P10_Average_4x (6.3.5.5.3) + * + * Description: + * This function calculates the average of two 4x4, 4x8 blocks. The result + * is rounded according to (a+b+1)/2. + * + * Input Arguments: + * + * pPred0 - Pointer to the top-left corner of reference block 0 + * pPred1 - Pointer to the top-left corner of reference block 1 + * iPredStep0 - Step of reference block 0; must be a multiple of 4. + * iPredStep1 - Step of reference block 1; must be a multiple of 4. + * iDstStep - Step of the destination buffer; must be a multiple of 4. + * iHeight - Height of the blocks; must be either 4 or 8. + * + * Output Arguments: + * + * pDstPred - Pointer to the destination buffer. 4-byte alignment required. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: + * pPred0, pPred1, or pDstPred + * - pDstPred is not aligned on a 4-byte boundary + * - iPredStep0 <= 0 or iPredStep0 is not a multiple of 4 + * - iPredStep1 <= 0 or iPredStep1 is not a multiple of 4 + * - iDstStep <= 0 or iDstStep is not a multiple of 4 + * - iHeight is not equal to either 4 or 8 + * + */ +OMXResult omxVCM4P10_Average_4x ( + const OMX_U8 *pPred0, + const OMX_U8 *pPred1, + OMX_U32 iPredStep0, + OMX_U32 iPredStep1, + OMX_U8 *pDstPred, + OMX_U32 iDstStep, + OMX_U32 iHeight +); + + + +/** + * Function: omxVCM4P10_TransformQuant_ChromaDC (6.3.5.6.1) + * + * Description: + * This function performs 2x2 Hadamard transform of chroma DC coefficients + * and then quantizes the coefficients. + * + * Input Arguments: + * + * pSrcDst - Pointer to the 2x2 array of chroma DC coefficients. 8-byte + * alignment required. + * iQP - Quantization parameter; must be in the range [0,51]. + * bIntra - Indicate whether this is an INTRA block. 1-INTRA, 0-INTER + * + * Output Arguments: + * + * pSrcDst - Pointer to transformed and quantized coefficients. 8-byte + * alignment required. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: + * pSrcDst + * - pSrcDst is not aligned on an 8-byte boundary + * + */ +OMXResult omxVCM4P10_TransformQuant_ChromaDC ( + OMX_S16 *pSrcDst, + OMX_U32 iQP, + OMX_U8 bIntra +); + + + +/** + * Function: omxVCM4P10_TransformQuant_LumaDC (6.3.5.6.2) + * + * Description: + * This function performs a 4x4 Hadamard transform of luma DC coefficients + * and then quantizes the coefficients. + * + * Input Arguments: + * + * pSrcDst - Pointer to the 4x4 array of luma DC coefficients. 16-byte + * alignment required. + * iQP - Quantization parameter; must be in the range [0,51]. + * + * Output Arguments: + * + * pSrcDst - Pointer to transformed and quantized coefficients. 16-byte + * alignment required. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: pSrcDst + * - pSrcDst is not aligned on an 16-byte boundary + * + */ +OMXResult omxVCM4P10_TransformQuant_LumaDC ( + OMX_S16 *pSrcDst, + OMX_U32 iQP +); + + + +/** + * Function: omxVCM4P10_InvTransformDequant_LumaDC (6.3.5.6.3) + * + * Description: + * This function performs inverse 4x4 Hadamard transform and then dequantizes + * the coefficients. + * + * Input Arguments: + * + * pSrc - Pointer to the 4x4 array of the 4x4 Hadamard-transformed and + * quantized coefficients. 16 byte alignment required. + * iQP - Quantization parameter; must be in the range [0,51]. + * + * Output Arguments: + * + * pDst - Pointer to inverse-transformed and dequantized coefficients. + * 16-byte alignment required. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: pSrc + * - pSrc or pDst is not aligned on a 16-byte boundary + * + */ +OMXResult omxVCM4P10_InvTransformDequant_LumaDC ( + const OMX_S16 *pSrc, + OMX_S16 *pDst, + OMX_U32 iQP +); + + + +/** + * Function: omxVCM4P10_InvTransformDequant_ChromaDC (6.3.5.6.4) + * + * Description: + * This function performs inverse 2x2 Hadamard transform and then dequantizes + * the coefficients. + * + * Input Arguments: + * + * pSrc - Pointer to the 2x2 array of the 2x2 Hadamard-transformed and + * quantized coefficients. 8 byte alignment required. + * iQP - Quantization parameter; must be in the range [0,51]. + * + * Output Arguments: + * + * pDst - Pointer to inverse-transformed and dequantized coefficients. + * 8-byte alignment required. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: pSrc + * - pSrc or pDst is not aligned on an 8-byte boundary + * + */ +OMXResult omxVCM4P10_InvTransformDequant_ChromaDC ( + const OMX_S16 *pSrc, + OMX_S16 *pDst, + OMX_U32 iQP +); + + + +/** + * Function: omxVCM4P10_InvTransformResidualAndAdd (6.3.5.7.1) + * + * Description: + * This function performs inverse an 4x4 integer transformation to produce + * the difference signal and then adds the difference to the prediction to get + * the reconstructed signal. + * + * Input Arguments: + * + * pSrcPred - Pointer to prediction signal. 4-byte alignment required. + * pDequantCoeff - Pointer to the transformed coefficients. 8-byte + * alignment required. + * iSrcPredStep - Step of the prediction buffer; must be a multiple of 4. + * iDstReconStep - Step of the destination reconstruction buffer; must be a + * multiple of 4. + * bAC - Indicate whether there is AC coefficients in the coefficients + * matrix. + * + * Output Arguments: + * + * pDstRecon -Pointer to the destination reconstruction buffer. 4-byte + * alignment required. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: + * pSrcPred, pDequantCoeff, pDstRecon + * - pSrcPred is not aligned on a 4-byte boundary + * - iSrcPredStep or iDstReconStep is not a multiple of 4. + * - pDequantCoeff is not aligned on an 8-byte boundary + * + */ +OMXResult omxVCM4P10_InvTransformResidualAndAdd ( + const OMX_U8 *pSrcPred, + const OMX_S16 *pDequantCoeff, + OMX_U8 *pDstRecon, + OMX_U32 iSrcPredStep, + OMX_U32 iDstReconStep, + OMX_U8 bAC +); + + + +/** + * Function: omxVCM4P10_SubAndTransformQDQResidual (6.3.5.8.1) + * + * Description: + * This function subtracts the prediction signal from the original signal to + * produce the difference signal and then performs a 4x4 integer transform and + * quantization. The quantized transformed coefficients are stored as + * pDstQuantCoeff. This function can also output dequantized coefficients or + * unquantized DC coefficients optionally by setting the pointers + * pDstDeQuantCoeff, pDCCoeff. + * + * Input Arguments: + * + * pSrcOrg - Pointer to original signal. 4-byte alignment required. + * pSrcPred - Pointer to prediction signal. 4-byte alignment required. + * iSrcOrgStep - Step of the original signal buffer; must be a multiple of + * 4. + * iSrcPredStep - Step of the prediction signal buffer; must be a multiple + * of 4. + * pNumCoeff -Number of non-zero coefficients after quantization. If this + * parameter is not required, it is set to NULL. + * nThreshSAD - Zero-block early detection threshold. If this parameter is + * not required, it is set to 0. + * iQP - Quantization parameter; must be in the range [0,51]. + * bIntra - Indicates whether this is an INTRA block, either 1-INTRA or + * 0-INTER + * + * Output Arguments: + * + * pDstQuantCoeff - Pointer to the quantized transformed coefficients. + * 8-byte alignment required. + * pDstDeQuantCoeff - Pointer to the dequantized transformed coefficients + * if this parameter is not equal to NULL. 8-byte alignment + * required. + * pDCCoeff - Pointer to the unquantized DC coefficient if this parameter + * is not equal to NULL. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: + * pSrcOrg, pSrcPred, pNumCoeff, pDstQuantCoeff, + * pDstDeQuantCoeff, pDCCoeff + * - pSrcOrg is not aligned on a 4-byte boundary + * - pSrcPred is not aligned on a 4-byte boundary + * - iSrcOrgStep is not a multiple of 4 + * - iSrcPredStep is not a multiple of 4 + * - pDstQuantCoeff or pDstDeQuantCoeff is not aligned on an 8-byte boundary + * + */ +OMXResult omxVCM4P10_SubAndTransformQDQResidual ( + const OMX_U8 *pSrcOrg, + const OMX_U8 *pSrcPred, + OMX_U32 iSrcOrgStep, + OMX_U32 iSrcPredStep, + OMX_S16 *pDstQuantCoeff, + OMX_S16 *pDstDeQuantCoeff, + OMX_S16 *pDCCoeff, + OMX_S8 *pNumCoeff, + OMX_U32 nThreshSAD, + OMX_U32 iQP, + OMX_U8 bIntra +); + + + +/** + * Function: omxVCM4P10_GetVLCInfo (6.3.5.9.1) + * + * Description: + * This function extracts run-length encoding (RLE) information from the + * coefficient matrix. The results are returned in an OMXVCM4P10VLCInfo + * structure. + * + * Input Arguments: + * + * pSrcCoeff - pointer to the transform coefficient matrix. 8-byte + * alignment required. + * pScanMatrix - pointer to the scan order definition matrix. For a luma + * block the scan matrix should follow [ISO14496-10] section 8.5.4, + * and should contain the values 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, + * 10, 7, 11, 14, 15. For a chroma block, the scan matrix should + * contain the values 0, 1, 2, 3. + * bAC - indicates presence of a DC coefficient; 0 = DC coefficient + * present, 1= DC coefficient absent. + * MaxNumCoef - specifies the number of coefficients contained in the + * transform coefficient matrix, pSrcCoeff. The value should be 16 + * for blocks of type LUMADC, LUMAAC, LUMALEVEL, and CHROMAAC. The + * value should be 4 for blocks of type CHROMADC. + * + * Output Arguments: + * + * pDstVLCInfo - pointer to structure that stores information for + * run-length coding. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: + * pSrcCoeff, pScanMatrix, pDstVLCInfo + * - pSrcCoeff is not aligned on an 8-byte boundary + * + */ +OMXResult omxVCM4P10_GetVLCInfo ( + const OMX_S16 *pSrcCoeff, + const OMX_U8 *pScanMatrix, + OMX_U8 bAC, + OMX_U32 MaxNumCoef, + OMXVCM4P10VLCInfo*pDstVLCInfo +); + + + +#ifdef __cplusplus +} +#endif + +#endif /** end of #define _OMXVC_H_ */ + +/** EOF */ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/api/omxVC_s.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/api/omxVC_s.h new file mode 100755 index 0000000000000000000000000000000000000000..89f3040fac6e4aff283a10ea87fd10d81f127c17 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/api/omxVC_s.h @@ -0,0 +1,129 @@ +;/****************************************************************************** +;// Copyright (c) 1999-2005 The Khronos Group Inc. All Rights Reserved +;// +;// +;// +;// +;// +;// +;// +;// +;******************************************************************************/ + +;/** =============== Structure Definition for Sample Generation ============== */ +;/** transparent status */ + +;enum { +OMX_VIDEO_TRANSPARENT EQU 0; /** Wholly transparent */ +OMX_VIDEO_PARTIAL EQU 1; /** Partially transparent */ +OMX_VIDEO_OPAQUE EQU 2; /** Opaque */ +;} + +;/** direction */ +;enum { +OMX_VIDEO_NONE EQU 0; +OMX_VIDEO_HORIZONTAL EQU 1; +OMX_VIDEO_VERTICAL EQU 2; +;} + +;/** bilinear interpolation type */ +;enum { +OMX_VIDEO_INTEGER_PIXEL EQU 0; /** case a */ +OMX_VIDEO_HALF_PIXEL_X EQU 1; /** case b */ +OMX_VIDEO_HALF_PIXEL_Y EQU 2; /** case c */ +OMX_VIDEO_HALF_PIXEL_XY EQU 3; /** case d */ +;} + +;enum { +OMX_UPPER EQU 1; /** set if the above macroblock is available */ +OMX_LEFT EQU 2; /** set if the left macroblock is available */ +OMX_CENTER EQU 4; +OMX_RIGHT EQU 8; +OMX_LOWER EQU 16; +OMX_UPPER_LEFT EQU 32; /** set if the above-left macroblock is available */ +OMX_UPPER_RIGHT EQU 64; /** set if the above-right macroblock is available */ +OMX_LOWER_LEFT EQU 128; +OMX_LOWER_RIGHT EQU 256 +;} + +;enum { +OMX_VIDEO_LUMINANCE EQU 0; /** Luminance component */ +OMX_VIDEO_CHROMINANCE EQU 1; /** chrominance component */ +OMX_VIDEO_ALPHA EQU 2; /** Alpha component */ +;} + +;enum { +OMX_VIDEO_INTER EQU 0; /** P picture or P-VOP */ +OMX_VIDEO_INTER_Q EQU 1; /** P picture or P-VOP */ +OMX_VIDEO_INTER4V EQU 2; /** P picture or P-VOP */ +OMX_VIDEO_INTRA EQU 3; /** I and P picture; I- and P-VOP */ +OMX_VIDEO_INTRA_Q EQU 4; /** I and P picture; I- and P-VOP */ +OMX_VIDEO_INTER4V_Q EQU 5; /** P picture or P-VOP (H.263)*/ +OMX_VIDEO_DIRECT EQU 6; /** B picture or B-VOP (MPEG-4 only) */ +OMX_VIDEO_INTERPOLATE EQU 7; /** B picture or B-VOP */ +OMX_VIDEO_BACKWARD EQU 8; /** B picture or B-VOP */ +OMX_VIDEO_FORWARD EQU 9; /** B picture or B-VOP */ +OMX_VIDEO_NOTCODED EQU 10; /** B picture or B-VOP */ +;} + +;enum { +OMX_16X16_VERT EQU 0; /** Intra_16x16_Vertical (prediction mode) */ +OMX_16X16_HOR EQU 1; /** Intra_16x16_Horizontal (prediction mode) */ +OMX_16X16_DC EQU 2; /** Intra_16x16_DC (prediction mode) */ +OMX_16X16_PLANE EQU 3; /** Intra_16x16_Plane (prediction mode) */ +;} + +;enum { +OMX_4x4_VERT EQU 0; /** Intra_4x4_Vertical (prediction mode) */ +OMX_4x4_HOR EQU 1; /** Intra_4x4_Horizontal (prediction mode) */ +OMX_4x4_DC EQU 2; /** Intra_4x4_DC (prediction mode) */ +OMX_4x4_DIAG_DL EQU 3; /** Intra_4x4_Diagonal_Down_Left (prediction mode) */ +OMX_4x4_DIAG_DR EQU 4; /** Intra_4x4_Diagonal_Down_Right (prediction mode) */ +OMX_4x4_VR EQU 5; /** Intra_4x4_Vertical_Right (prediction mode) */ +OMX_4x4_HD EQU 6; /** Intra_4x4_Horizontal_Down (prediction mode) */ +OMX_4x4_VL EQU 7; /** Intra_4x4_Vertical_Left (prediction mode) */ +OMX_4x4_HU EQU 8; /** Intra_4x4_Horizontal_Up (prediction mode) */ +;} + +;enum { +OMX_CHROMA_DC EQU 0; /** Intra_Chroma_DC (prediction mode) */ +OMX_CHROMA_HOR EQU 1; /** Intra_Chroma_Horizontal (prediction mode) */ +OMX_CHROMA_VERT EQU 2; /** Intra_Chroma_Vertical (prediction mode) */ +OMX_CHROMA_PLANE EQU 3; /** Intra_Chroma_Plane (prediction mode) */ +;} + +;typedef struct { +x EQU 0; +y EQU 4; +;}OMXCoordinate; + +;typedef struct { +dx EQU 0; +dy EQU 2; +;}OMXMotionVector; + +;typedef struct { +xx EQU 0; +yy EQU 4; +width EQU 8; +height EQU 12; +;}OMXiRect; + +;typedef enum { +OMX_VC_INTER EQU 0; /** P picture or P-VOP */ +OMX_VC_INTER_Q EQU 1; /** P picture or P-VOP */ +OMX_VC_INTER4V EQU 2; /** P picture or P-VOP */ +OMX_VC_INTRA EQU 3; /** I and P picture, I- and P-VOP */ +OMX_VC_INTRA_Q EQU 4; /** I and P picture, I- and P-VOP */ +OMX_VC_INTER4V_Q EQU 5; /** P picture or P-VOP (H.263)*/ +;} OMXVCM4P2MacroblockType; + +;enum { +OMX_VC_NONE EQU 0 +OMX_VC_HORIZONTAL EQU 1 +OMX_VC_VERTICAL EQU 2 +;}; + + + END + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/comm/src/omxVCCOMM_Copy16x16_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/comm/src/omxVCCOMM_Copy16x16_s.s new file mode 100755 index 0000000000000000000000000000000000000000..296d59dd429fd1a4317efe6b979c1484ccf72544 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/comm/src/omxVCCOMM_Copy16x16_s.s @@ -0,0 +1,95 @@ + ;/** + ; * Function: omxVCCOMM_Copy16x16 + ; * + ; * Description: + ; * Copies the reference 16x16 block to the current block. + ; * Parameters: + ; * [in] pSrc - pointer to the reference block in the source frame; must be aligned on an 16-byte boundary. + ; * [in] step - distance between the starts of consecutive lines in the reference frame, in bytes; + ; * must be a multiple of 16 and must be larger than or equal to 16. + ; * [out] pDst - pointer to the destination block; must be aligned on an 8-byte boundary. + ; * Return Value: + ; * OMX_Sts_NoErr - no error + ; * OMX_Sts_BadArgErr - bad arguments; returned under any of the following conditions: + ; * - one or more of the following pointers is NULL: pSrc, pDst + ; * - one or more of the following pointers is not aligned on an 16-byte boundary: pSrc, pDst + ; * - step <16 or step is not a multiple of 16. + ; */ + + INCLUDE omxtypes_s.h + + + M_VARIANTS CortexA8 + + IF CortexA8 + + + ;//Input Arguments +pSrc RN 0 +pDst RN 1 +step RN 2 + +;//Local Variables +Return RN 0 +;// Neon Registers + +X0 DN D0.S8 +X1 DN D1.S8 +X2 DN D2.S8 +X3 DN D3.S8 +X4 DN D4.S8 +X5 DN D5.S8 +X6 DN D6.S8 +X7 DN D7.S8 + + M_START omxVCCOMM_Copy16x16 + + + VLD1 {X0,X1},[pSrc@128],step ;// Load 16 bytes from 16 byte aligned pSrc and pSrc=pSrc + step after loading + VLD1 {X2,X3},[pSrc@128],step + VLD1 {X4,X5},[pSrc@128],step + VLD1 {X6,X7},[pSrc@128],step + + VST1 {X0,X1,X2,X3},[pDst@128]! ;// Store 32 bytes to 16 byte aligned pDst + VST1 {X4,X5,X6,X7},[pDst@128]! + + + VLD1 {X0,X1},[pSrc@128],step + VLD1 {X2,X3},[pSrc@128],step + VLD1 {X4,X5},[pSrc@128],step + VLD1 {X6,X7},[pSrc@128],step + + VST1 {X0,X1,X2,X3},[pDst@128]! + VST1 {X4,X5,X6,X7},[pDst@128]! + + + VLD1 {X0,X1},[pSrc@128],step + VLD1 {X2,X3},[pSrc@128],step + VLD1 {X4,X5},[pSrc@128],step + VLD1 {X6,X7},[pSrc@128],step + + VST1 {X0,X1,X2,X3},[pDst@128]! + VST1 {X4,X5,X6,X7},[pDst@128]! + + + VLD1 {X0,X1},[pSrc@128],step + VLD1 {X2,X3},[pSrc@128],step + VLD1 {X4,X5},[pSrc@128],step + VLD1 {X6,X7},[pSrc@128],step + + VST1 {X0,X1,X2,X3},[pDst@128]! + VST1 {X4,X5,X6,X7},[pDst@128]! + + + MOV Return,#OMX_Sts_NoErr + + + + M_END + ENDIF + + + + + END + \ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/comm/src/omxVCCOMM_Copy8x8_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/comm/src/omxVCCOMM_Copy8x8_s.s new file mode 100755 index 0000000000000000000000000000000000000000..db9e5ef6ef279ffcc568ee554246df206aa78c13 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/comm/src/omxVCCOMM_Copy8x8_s.s @@ -0,0 +1,70 @@ + ;/** + ; * Function: omxVCCOMM_Copy8x8 + ; * + ; * Description: + ; * Copies the reference 8x8 block to the current block. + ; * Parameters: + ; * [in] pSrc - pointer to the reference block in the source frame; must be aligned on an 8-byte boundary. + ; * [in] step - distance between the starts of consecutive lines in the reference frame, in bytes; + ; * must be a multiple of 8 and must be larger than or equal to 8. + ; * [out] pDst - pointer to the destination block; must be aligned on an 8-byte boundary. + ; * Return Value: + ; * OMX_Sts_NoErr - no error + ; * OMX_Sts_BadArgErr - bad arguments; returned under any of the following conditions: + ; * - one or more of the following pointers is NULL: pSrc, pDst + ; * - one or more of the following pointers is not aligned on an 8-byte boundary: pSrc, pDst + ; * - step <8 or step is not a multiple of 8. + ; */ + + INCLUDE omxtypes_s.h + + + M_VARIANTS CortexA8 + + IF CortexA8 + + + ;//Input Arguments +pSrc RN 0 +pDst RN 1 +step RN 2 + +;//Local Variables +Count RN 3 +Return RN 0 +;// Neon Registers + +X0 DN D0.S8 +X1 DN D1.S8 +X2 DN D2.S8 +X3 DN D3.S8 + M_START omxVCCOMM_Copy8x8 + + + + VLD1 {X0},[pSrc],step ;// Load 8 bytes from 8 byte aligned pSrc, pSrc=pSrc+step after load + VLD1 {X1},[pSrc],step + VLD1 {X2},[pSrc],step + VLD1 {X3},[pSrc],step + + VST1 {X0,X1},[pDst]! ;// Store 16 bytes to 8 byte aligned pDst + VST1 {X2,X3},[pDst]! + + VLD1 {X0},[pSrc],step + VLD1 {X1},[pSrc],step + VLD1 {X2},[pSrc],step + VLD1 {X3},[pSrc],step + + VST1 {X0,X1},[pDst]! + VST1 {X2,X3},[pDst]! + + MOV Return,#OMX_Sts_NoErr + + M_END + ENDIF + + + + + END + \ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/comm/src/omxVCCOMM_ExpandFrame_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/comm/src/omxVCCOMM_ExpandFrame_I_s.s new file mode 100755 index 0000000000000000000000000000000000000000..5c5b7d8166b957b2306c6fa03f4676f7ba4294db --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/comm/src/omxVCCOMM_ExpandFrame_I_s.s @@ -0,0 +1,236 @@ +;// +;// +;// File Name: omxVCCOMM_ExpandFrame_I_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 12290 +;// Date: Wednesday, April 9, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// +;// Description: +;// This function will Expand Frame boundary pixels into Plane +;// +;// + +;// Include standard headers + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS CortexA8 + +;// Import symbols required from other files +;// (For example tables) + + +;// Set debugging level +DEBUG_ON SETL {FALSE} + + + + IF CortexA8 + + M_START omxVCCOMM_ExpandFrame_I,r11 + +;//Input registers + +pSrcDstPlane RN 0 +iFrameWidth RN 1 +iFrameHeight RN 2 +iExpandPels RN 3 +iPlaneStep RN 4 +pTop RN 5 +pBot RN 6 +pDstTop RN 7 +pDstBot RN 8 +pLeft RN 5 +pRight RN 6 +pDstLeft RN 9 +pDstRight RN 10 +Offset RN 11 +Temp RN 14 +Counter RN 12 +Tmp RN 7 +;//Output registers + +result RN 0 +;// Neon registers +qData0 QN 0.U8 +qData1 QN 1.U8 +dData0 DN 0.U8 +dData1 DN 1.U8 +dData2 DN 2.U8 +dData3 DN 3.U8 + + ;// Define stack arguments + M_ARG pPlaneStep, 4 + + ;// Load argument from the stack + M_LDR iPlaneStep, pPlaneStep + + SUB pTop, pSrcDstPlane, #0 ;// Top row pointer of the frame + MUL Offset, iExpandPels, iPlaneStep ;// E*Step + SUB Temp, iFrameHeight, #1 ;// H-1 + MUL Temp, iPlaneStep, Temp ;// (H-1)*Step + ADD pBot, Temp, pSrcDstPlane ;// BPtr = TPtr + (H-1)*Step + MOV Temp, iFrameWidth ;// Outer loop counter + + ;// Check if pSrcDstPlane and iPlaneStep are 16 byte aligned + TST pSrcDstPlane, #0xf + TSTEQ iPlaneStep, #0xf + BNE Hor8Loop00 + + ;// + ;// Copy top and bottom region of the plane as follows + ;// top region = top row elements from the frame + ;// bottom region = last row elements from the frame + ;// + + ;// Case for 16 byte alignment +Hor16Loop00 + SUB pDstTop, pTop, Offset + VLD1 qData0, [pTop @128]! + MOV Counter, iExpandPels ;// Inner loop counter + ADD pDstBot, pBot, iPlaneStep + VLD1 qData1, [pBot @128]! +Ver16Loop0 + VST1 qData0, [pDstTop @128], iPlaneStep + VST1 qData0, [pDstTop @128], iPlaneStep + VST1 qData0, [pDstTop @128], iPlaneStep + VST1 qData0, [pDstTop @128], iPlaneStep + VST1 qData0, [pDstTop @128], iPlaneStep + VST1 qData0, [pDstTop @128], iPlaneStep + VST1 qData0, [pDstTop @128], iPlaneStep + VST1 qData0, [pDstTop @128], iPlaneStep + SUBS Counter, Counter, #8 + VST1 qData1, [pDstBot @128], iPlaneStep + VST1 qData1, [pDstBot @128], iPlaneStep + VST1 qData1, [pDstBot @128], iPlaneStep + VST1 qData1, [pDstBot @128], iPlaneStep + VST1 qData1, [pDstBot @128], iPlaneStep + VST1 qData1, [pDstBot @128], iPlaneStep + VST1 qData1, [pDstBot @128], iPlaneStep + VST1 qData1, [pDstBot @128], iPlaneStep + BGT Ver16Loop0 + + SUBS Temp, Temp, #16 + BGT Hor16Loop00 + B EndAlignedLoop + + ;// Case for 8 byte alignment +Hor8Loop00 + SUB pDstTop, pTop, Offset + VLD1 qData0, [pTop @64]! + MOV Counter, iExpandPels ;// Inner loop counter + ADD pDstBot, pBot, iPlaneStep + VLD1 qData1, [pBot @64]! +Ver8Loop0 + VST1 qData0, [pDstTop @64], iPlaneStep + VST1 qData0, [pDstTop @64], iPlaneStep + VST1 qData0, [pDstTop @64], iPlaneStep + VST1 qData0, [pDstTop @64], iPlaneStep + VST1 qData0, [pDstTop @64], iPlaneStep + VST1 qData0, [pDstTop @64], iPlaneStep + VST1 qData0, [pDstTop @64], iPlaneStep + VST1 qData0, [pDstTop @64], iPlaneStep + SUBS Counter, Counter, #8 + VST1 qData1, [pDstBot @64], iPlaneStep + VST1 qData1, [pDstBot @64], iPlaneStep + VST1 qData1, [pDstBot @64], iPlaneStep + VST1 qData1, [pDstBot @64], iPlaneStep + VST1 qData1, [pDstBot @64], iPlaneStep + VST1 qData1, [pDstBot @64], iPlaneStep + VST1 qData1, [pDstBot @64], iPlaneStep + VST1 qData1, [pDstBot @64], iPlaneStep + BGT Ver8Loop0 + + SUBS Temp, Temp, #16 + BGT Hor8Loop00 + +EndAlignedLoop + ADD Temp, pSrcDstPlane, iFrameWidth + SUB pDstRight, Temp, Offset + SUB pRight, Temp, #1 + SUB pDstLeft, pSrcDstPlane, Offset + SUB pDstLeft, pDstLeft, iExpandPels + ADD pLeft, pSrcDstPlane, #0 + + VLD1 {dData0 []}, [pLeft], iPlaneStep ;// Top-Left corner pixel from frame duplicated in dData0 + SUB Offset, iPlaneStep, iExpandPels + VLD1 {dData1 []}, [pRight], iPlaneStep ;// Top-Right corner pixel from frame duplicated in dData1 + MOV Temp, iExpandPels + + ;// + ;// Copy top-left and top-right region of the plane as follows + ;// top-left region = top-left corner pixel from the frame + ;// top-right region = top-right corner pixel from the frame + ;// +HorLoop11 + MOV Counter, iExpandPels +VerLoop1 + VST1 dData0, [pDstLeft], #8 + SUBS Counter, Counter, #8 + VST1 dData1, [pDstRight], #8 + BGT VerLoop1 + + SUBS Temp, Temp, #1 + ADD pDstLeft, pDstLeft, Offset + ADD pDstRight, pDstRight, Offset + BPL HorLoop11 + + SUB iFrameHeight, iFrameHeight, #1 + ;// + ;// Copy left and right region of the plane as follows + ;// Left region = copy the row with left start pixel from the frame + ;// Right region = copy the row with right end pixel from the frame + ;// +HorLoop22 + VLD1 {dData0 []}, [pLeft], iPlaneStep + MOV Counter, iExpandPels + VLD1 {dData1 []}, [pRight], iPlaneStep +VerLoop2 + VST1 dData0, [pDstLeft], #8 + SUBS Counter, Counter, #8 + VST1 dData1, [pDstRight], #8 + BGT VerLoop2 + + SUBS iFrameHeight, iFrameHeight, #1 + ADD pDstLeft, pDstLeft, Offset + ADD pDstRight, pDstRight, Offset + BGT HorLoop22 + + MOV Temp, iExpandPels + ;// + ;// Copy bottom-left and bottom-right region of the plane as follows + ;// bottom-left region = bottom-left corner pixel from the frame + ;// bottom-right region = bottom-right corner pixel from the frame + ;// +HorLoop33 + MOV Counter, iExpandPels +VerLoop3 + VST1 dData0, [pDstLeft], #8 + SUBS Counter, Counter, #8 + VST1 dData1, [pDstRight], #8 + BGT VerLoop3 + + SUBS Temp, Temp, #1 + ADD pDstLeft, pDstLeft, Offset + ADD pDstRight, pDstRight, Offset + BGT HorLoop33 +End + MOV r0, #OMX_Sts_NoErr + + M_END + + ENDIF + + + + +;// Guarding implementation by the processor name + + + + END \ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/api/armVCM4P10_CAVLCTables.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/api/armVCM4P10_CAVLCTables.h new file mode 100755 index 0000000000000000000000000000000000000000..547a2d9869b87412c947b2656dca24f69d374fa8 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/api/armVCM4P10_CAVLCTables.h @@ -0,0 +1,30 @@ +/* ---------------------------------------------------------------- + * + * + * File Name: armVCM4P10_CAVLCTables.h + * OpenMAX DL: v1.0.2 + * Revision: 12290 + * Date: Wednesday, April 9, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Header file for optimized H.264 CALVC tables + * + */ + +#ifndef ARMVCM4P10_CAVLCTABLES_H +#define ARMVCM4P10_CAVLCTABLES_H + +/* CAVLC tables */ + +extern const OMX_U16 *armVCM4P10_CAVLCCoeffTokenTables[18]; +extern const OMX_U16 *armVCM4P10_CAVLCTotalZeroTables[15]; +extern const OMX_U16 *armVCM4P10_CAVLCTotalZeros2x2Tables[3]; +extern const OMX_U16 *armVCM4P10_CAVLCRunBeforeTables[15]; +extern const OMX_U8 armVCM4P10_ZigZag_4x4[16]; +extern const OMX_U8 armVCM4P10_ZigZag_2x2[4]; +extern const OMX_S8 armVCM4P10_SuffixToLevel[7]; + +#endif diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_Average_4x_Align_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_Average_4x_Align_unsafe_s.s new file mode 100755 index 0000000000000000000000000000000000000000..4f0892d63c2cc9fbb3536de834273ba529cecb56 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_Average_4x_Align_unsafe_s.s @@ -0,0 +1,222 @@ +;// +;// +;// File Name: armVCM4P10_Average_4x_Align_unsafe_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 12290 +;// Date: Wednesday, April 9, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + +;// Functions: +;// armVCM4P10_Average_4x4_Align_unsafe +;// +;// Implements Average of 4x4 with equation c = (a+b+1)>>1. +;// First operand will be at offset ALIGNMENT from aligned address +;// Second operand will be at aligned location and will be used as output. +;// destination pointed by (pDst) for vertical interpolation. +;// This function needs to copy 4 bytes in horizontal direction +;// +;// Registers used as input for this function +;// r0,r1,r2,r3 where r2 containings aligned memory pointer and r3 step size +;// +;// Registers preserved for top level function +;// r4,r5,r6,r8,r9,r14 +;// +;// Registers modified by the function +;// r7,r10,r11,r12 +;// +;// Output registers +;// r2 - pointer to the aligned location +;// r3 - step size to this aligned location + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS ARM1136JS + + EXPORT armVCM4P10_Average_4x4_Align0_unsafe + EXPORT armVCM4P10_Average_4x4_Align2_unsafe + EXPORT armVCM4P10_Average_4x4_Align3_unsafe + +DEBUG_ON SETL {FALSE} + +;// Declare input registers +pPred0 RN 0 +iPredStep0 RN 1 +pPred1 RN 2 +iPredStep1 RN 3 +pDstPred RN 2 +iDstStep RN 3 + +;// Declare other intermediate registers +iPredA0 RN 10 +iPredA1 RN 11 +iPredB0 RN 12 +iPredB1 RN 14 +Temp1 RN 4 +Temp2 RN 5 +ResultA RN 5 +ResultB RN 4 +r0x80808080 RN 7 + + IF ARM1136JS + + ;// This function calculates average of 4x4 block + ;// pPred0 is at alignment offset 0 and pPred1 is alignment 4 + + ;// Function header + M_START armVCM4P10_Average_4x4_Align0_unsafe, r6 + + ;// Code start + LDR r0x80808080, =0x80808080 + + ;// 1st load + M_LDR iPredB0, [pPred1] + M_LDR iPredA0, [pPred0], iPredStep0 + M_LDR iPredB1, [pPred1, iPredStep1] + M_LDR iPredA1, [pPred0], iPredStep0 + + ;// (a+b+1)/2 = (a+256-(255-b))/2 = (a-(255-b))/2 + 128 + MVN iPredB0, iPredB0 + MVN iPredB1, iPredB1 + UHSUB8 ResultA, iPredA0, iPredB0 + UHSUB8 ResultB, iPredA1, iPredB1 + EOR ResultA, ResultA, r0x80808080 + M_STR ResultA, [pDstPred], iDstStep + EOR ResultB, ResultB, r0x80808080 + M_STR ResultB, [pDstPred], iDstStep + + ;// 2nd load + M_LDR iPredA0, [pPred0], iPredStep0 + M_LDR iPredB0, [pPred1] + M_LDR iPredA1, [pPred0], iPredStep0 + M_LDR iPredB1, [pPred1, iPredStep1] + + MVN iPredB0, iPredB0 + UHSUB8 ResultA, iPredA0, iPredB0 + MVN iPredB1, iPredB1 + UHSUB8 ResultB, iPredA1, iPredB1 + EOR ResultA, ResultA, r0x80808080 + M_STR ResultA, [pDstPred], iDstStep + EOR ResultB, ResultB, r0x80808080 + M_STR ResultB, [pDstPred], iDstStep +End0 + M_END + + ;// This function calculates average of 4x4 block + ;// pPred0 is at alignment offset 2 and pPred1 is alignment 4 + + ;// Function header + M_START armVCM4P10_Average_4x4_Align2_unsafe, r6 + + ;// Code start + LDR r0x80808080, =0x80808080 + + ;// 1st load + LDR Temp1, [pPred0, #4] + M_LDR iPredA0, [pPred0], iPredStep0 + M_LDR iPredB0, [pPred1] + M_LDR iPredB1, [pPred1, iPredStep1] + M_LDR Temp2, [pPred0, #4] + M_LDR iPredA1, [pPred0], iPredStep0 + MVN iPredB0, iPredB0 + MVN iPredB1, iPredB1 + MOV iPredA0, iPredA0, LSR #16 + ORR iPredA0, iPredA0, Temp1, LSL #16 + MOV iPredA1, iPredA1, LSR #16 + ORR iPredA1, iPredA1, Temp2, LSL #16 + + ;// (a+b+1)/2 = (a+256-(255-b))/2 = (a-(255-b))/2 + 128 + UHSUB8 ResultA, iPredA0, iPredB0 + UHSUB8 ResultB, iPredA1, iPredB1 + EOR ResultA, ResultA, r0x80808080 + M_STR ResultA, [pDstPred], iDstStep + EOR ResultB, ResultB, r0x80808080 + M_STR ResultB, [pDstPred], iDstStep + + ;// 2nd load + LDR Temp1, [pPred0, #4] + M_LDR iPredA0, [pPred0], iPredStep0 + LDR iPredB0, [pPred1] + LDR iPredB1, [pPred1, iPredStep1] + LDR Temp2, [pPred0, #4] + M_LDR iPredA1, [pPred0], iPredStep0 + MVN iPredB0, iPredB0 + MVN iPredB1, iPredB1 + MOV iPredA0, iPredA0, LSR #16 + ORR iPredA0, iPredA0, Temp1, LSL #16 + MOV iPredA1, iPredA1, LSR #16 + ORR iPredA1, iPredA1, Temp2, LSL #16 + + UHSUB8 ResultA, iPredA0, iPredB0 + UHSUB8 ResultB, iPredA1, iPredB1 + EOR ResultA, ResultA, r0x80808080 + M_STR ResultA, [pDstPred], iDstStep + EOR ResultB, ResultB, r0x80808080 + M_STR ResultB, [pDstPred], iDstStep +End2 + M_END + + + ;// This function calculates average of 4x4 block + ;// pPred0 is at alignment offset 3 and pPred1 is alignment 4 + + ;// Function header + M_START armVCM4P10_Average_4x4_Align3_unsafe, r6 + + ;// Code start + LDR r0x80808080, =0x80808080 + + ;// 1st load + LDR Temp1, [pPred0, #4] + M_LDR iPredA0, [pPred0], iPredStep0 + LDR iPredB0, [pPred1] + LDR iPredB1, [pPred1, iPredStep1] + LDR Temp2, [pPred0, #4] + M_LDR iPredA1, [pPred0], iPredStep0 + + MVN iPredB0, iPredB0 + MVN iPredB1, iPredB1 + MOV iPredA0, iPredA0, LSR #24 + ORR iPredA0, iPredA0, Temp1, LSL #8 + MOV iPredA1, iPredA1, LSR #24 + ORR iPredA1, iPredA1, Temp2, LSL #8 + UHSUB8 ResultA, iPredA0, iPredB0 + UHSUB8 ResultB, iPredA1, iPredB1 + EOR ResultA, ResultA, r0x80808080 + M_STR ResultA, [pDstPred], iDstStep + EOR ResultB, ResultB, r0x80808080 + M_STR ResultB, [pDstPred], iDstStep + + ;// 2nd load + LDR Temp1, [pPred0, #4] + M_LDR iPredA0, [pPred0], iPredStep0 + LDR iPredB0, [pPred1] + LDR iPredB1, [pPred1, iPredStep1] + LDR Temp2, [pPred0, #4] + M_LDR iPredA1, [pPred0], iPredStep0 + + MVN iPredB0, iPredB0 + MVN iPredB1, iPredB1 + MOV iPredA0, iPredA0, LSR #24 + ORR iPredA0, iPredA0, Temp1, LSL #8 + MOV iPredA1, iPredA1, LSR #24 + ORR iPredA1, iPredA1, Temp2, LSL #8 + + UHSUB8 ResultA, iPredA0, iPredB0 + UHSUB8 ResultB, iPredA1, iPredB1 + EOR ResultA, ResultA, r0x80808080 + M_STR ResultA, [pDstPred], iDstStep + EOR ResultB, ResultB, r0x80808080 + M_STR ResultB, [pDstPred], iDstStep +End3 + M_END + + ENDIF + + END + \ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_CAVLCTables.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_CAVLCTables.c new file mode 100755 index 0000000000000000000000000000000000000000..137495d16939bf05882a1bd9edd4fb81bbed2590 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_CAVLCTables.c @@ -0,0 +1,327 @@ +/* ---------------------------------------------------------------- + * + * + * File Name: armVCM4P10_CAVLCTables.c + * OpenMAX DL: v1.0.2 + * Revision: 12290 + * Date: Wednesday, April 9, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Optimized CAVLC tables for H.264 + * + */ + +#include "omxtypes.h" +#include "armOMX.h" + +#include "armVCM4P10_CAVLCTables.h" + +/* 4x4 DeZigZag table */ + +const OMX_U8 armVCM4P10_ZigZag_4x4[16] = +{ + 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 +}; + +/* 2x2 DeZigZag table */ + +const OMX_U8 armVCM4P10_ZigZag_2x2[4] = +{ + 0, 1, 2, 3 +}; + + +/* + * Suffix To Level table + * We increment the suffix length if + * ((LevelCode>>1)+1)>(3<<(SuffixLength-1)) && SuffixLength<6 + * (LevelCode>>1)>=(3<<(SuffixLength-1)) && SuffixLength<6 + * LevelCode >= 3<= (3<> 3; + ;// dDelta = (qDp1q1 >> 2 + qDq0p0 + 1)>> 1 + + ;// qDp1q1-11 + ;// qDq0p0-10 + VSUBL qDp1q1, dP_1, dQ_1 + VMOV dTemp, dTC3210 + VSUBL qDq0p0, dQ_0, dP_0 + VSHR qDp1q1, qDp1q1, #2 + VZIP.8 dTC3210, dTemp + + ;// qDelta-qDq0p0-10 + + ;// dTC = dTC01 + (dAplg & 1) + (dAqflg & 1) + + ;// dTC3210-18 + ;// dTemp-28 + ;// dTC-31 + VBIF dTC3210, dMask_0, dFilt + VRHADD qDelta, qDp1q1, qDq0p0 + VADD dTC, dTC3210, dMask_1 + VQMOVN dDelta, qDelta + ;// dDelta-d20 + + ;// dDelta = (OMX_U8)armClip(0, 255, q0 - delta); + VLD1 {dAlpha[]}, [pAlpha] + VMIN dDelta, dDelta, dTCs + VNEG dTCs, dTCs + VLD1 {dBeta[]}, [pBeta] + ;1 + VMAX dDelta, dDelta, dTCs + + ;// dP_0n - 29 + ;// dQ_0n - 24 + + ;// pQ0[-1*Step] = (OMX_U8)armClip(0, 255, dP_0 - delta); + ;// pQ0[0*Step] = (OMX_U8)armClip(0, 255, dQ_0 - delta); + + ;// dP_0n = (OMX_U8)armClip(0, 255, dP_0 - dDelta); + ;// dQ_0n = (OMX_U8)armClip(0, 255, dP_0 - dDelta); + + ;// qP_0n - 14 + ;// qQ_0n - 12 + + VMOVL qP_0n, dP_0 + VMOVL qQ_0n, dQ_0 + + ;1 + VADDW qP_0n, qP_0n, dDelta + VSUBW qQ_0n, qQ_0n, dDelta + + VQMOVUN dP_0n, qP_0n + VQMOVUN dQ_0n, qQ_0n + + M_END + +;// Register usage for - armVCM4P10_DeblockingLumabSGE4_unsafe() +;// +;// Inputs - Pixels - p0-p3: D4-D7, q0-q3: D8-D11 +;// - Filter masks - filt: D16, aqflg: D12, apflg: D17 +;// - Additional Params - alpha: D0, dMask_1: D15 +;// +;// Outputs - Pixels - P0-P2: D29-D31, Q0-Q2: D24,D25,D28 + +;// Registers Corrupted - D18-D31 + + M_START armVCM4P10_DeblockingChromabSGE4_unsafe + + ;dHSq0p1 - 31 + ;dHSp0q1 - 13 + VHADD dHSp0q1, dP_0, dQ_1 + VHADD dHSq0p1, dQ_0, dP_1 + + ;// Prepare the bS mask + + ;// dHSp0q1-13 + ;// dP_0t-dHSp0q1-13 + ;// dHSq0p1-31 + ;// dQ_0t-Temp1-31 + VLD1 {dAlpha[]}, [pAlpha] + ADD pThresholds, pThresholds, #4 + VLD1 {dBeta[]}, [pBeta] + + VRHADD dP_0t, dHSp0q1, dP_1 + VRHADD dQ_0t, dHSq0p1, dQ_1 + + M_END + + ENDIF + + END diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_DeblockingLuma_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_DeblockingLuma_unsafe_s.s new file mode 100755 index 0000000000000000000000000000000000000000..0afe4fd73af73f3d77294daf1349feac88a25917 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_DeblockingLuma_unsafe_s.s @@ -0,0 +1,396 @@ +;// +;// +;// File Name: armVCM4P10_DeblockingLuma_unsafe_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 12290 +;// Date: Wednesday, April 9, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS CortexA8 + + + IF CortexA8 + +pThresholds RN 5 + +;// Pixels +dP_0 DN D4.U8 +dP_1 DN D5.U8 +dP_2 DN D6.U8 +dP_3 DN D7.U8 +dQ_0 DN D8.U8 +dQ_1 DN D9.U8 +dQ_2 DN D10.U8 +dQ_3 DN D11.U8 + + +;// Filtering Decision +dAlpha DN D0.U8 + +dFilt DN D16.U8 +dAqflg DN D12.U8 +dApflg DN D17.U8 + +dAp0q0 DN D13.U8 + +;// bSLT4 +dTC0 DN D18.U8 +dTC1 DN D19.U8 +dTC01 DN D18.U8 + +dTCs DN D31.S8 +dTC DN D31.U8 + +dMask_0 DN D14.U8 +dMask_1 DN D15.U8 + +dTemp DN D19.U8 + +;// Computing P0,Q0 +qDq0p0 QN Q10.S16 +qDp1q1 QN Q11.S16 +qDelta QN Q10.S16 ; reuse qDq0p0 +dDelta DN D20.S8 + + +;// Computing P1,Q1 +dRp0q0 DN D24.U8 + +dMaxP DN D23.U8 +dMinP DN D22.U8 + +dMaxQ DN D19.U8 +dMinQ DN D21.U8 + +dDeltaP DN D26.U8 +dDeltaQ DN D27.U8 + +qP_0n QN Q14.S16 +qQ_0n QN Q12.S16 + +dQ_0n DN D24.U8 +dQ_1n DN D25.U8 +dP_0n DN D29.U8 +dP_1n DN D30.U8 + +;// bSGE4 + +qSp0q0 QN Q10.U16 + +qSp2q1 QN Q11.U16 +qSp0q0p1 QN Q12.U16 +qSp3p2 QN Q13.U16 +dHSp0q1 DN D28.U8 + +qSq2p1 QN Q11.U16 +qSp0q0q1 QN Q12.U16 +qSq3q2 QN Q13.U16 ;!! +dHSq0p1 DN D28.U8 ;!! + +qTemp1 QN Q11.U16 ;!!;qSp2q1 +qTemp2 QN Q12.U16 ;!!;qSp0q0p1 + +dP_0t DN D28.U8 ;!!;dHSp0q1 +dQ_0t DN D22.U8 ;!!;Temp1 + +dP_0n DN D29.U8 +dP_1n DN D30.U8 +dP_2n DN D31.U8 + +dQ_0n DN D24.U8 ;!!;Temp2 +dQ_1n DN D25.U8 ;!!;Temp2 +dQ_2n DN D28.U8 ;!!;dQ_0t + +;// Register usage for - armVCM4P10_DeblockingLumabSLT4_unsafe +;// +;// Inputs - Pixels - p0-p3: D4-D7, q0-q3: D8-D11 +;// - Filter masks - filt: D16, aqflg: D12, apflg: D17 +;// - Additional Params - pThresholds: r5 +;// +;// Outputs - Pixels - P0-P1: D29-D30, Q0-Q1: D24-D25 +;// - Additional Params - pThresholds: r5 + +;// Registers Corrupted - D18-D31 + + + M_START armVCM4P10_DeblockingLumabSLT4_unsafe + + + ;// qDq0p0-10 + VSUBL qDp1q1, dP_1, dQ_1 + VLD1 {dTC0[]}, [pThresholds]! + ;// qDp1q1-11 + VSUBL qDq0p0, dQ_0, dP_0 + VLD1 {dTC1[]}, [pThresholds]! + + ;// dRp0q0-24 + VSHR qDp1q1, qDp1q1, #2 + + ;// dTC01 = (dTC1 << 4) | dTC0 + ;// dTC01-18 + VEXT dTC01, dTC0, dTC1, #4 + ;// dTemp-19 + VAND dTemp, dApflg, dMask_1 + + VBIF dTC01, dMask_0, dFilt + + + ;// delta = (((q0-p0)<<2) + (p1-q1) + 4) >> 3; + ;// dDelta = (qDp1q1 >> 2 + qDq0p0 + 1)>> 1 + + ;// qDelta-qDq0p0-10 + VRHADD qDelta, qDp1q1, qDq0p0 + VRHADD dRp0q0, dP_0, dQ_0 + VADD dTC, dTC01, dTemp + + ;// dTC = dTC01 + (dAplg & 1) + (dAqflg & 1) + + VAND dTemp, dAqflg, dMask_1 + VQADD dMaxP, dP_1, dTC01 + VQMOVN dDelta, qDelta + VADD dTC, dTC, dTemp + + ;// dMaxP = QADD(dP_1, dTC01) + ;// dMinP = QSUB(dP_1, dTC01) + + ;// dMaxP-d23 + ;// dMinP-d22 + VQSUB dMinP, dP_1, dTC01 + + ;// dDelta-d20 + + ;// dMaxQ = QADD(dQ_1, dTC01) + ;// dMinQ = QSUB(dQ_1, dTC01) + + ;// dMaxQ-19 + ;// dMinQ-21 + VQADD dMaxQ, dQ_1, dTC01 + VHADD dDeltaP, dRp0q0, dP_2 + VMIN dDelta, dDelta, dTCs + + ;// dDelta = (OMX_U8)armClip(0, 255, q0 - delta); + VNEG dTCs, dTCs + + VQSUB dMinQ, dQ_1, dTC01 + + ;// delta = (p2 + ((p0+q0+1)>>1) - (p1<<1))>>1; + ;// delta = armClip(-tC0, tC0, delta); + ;// pQ0[-2*Step] = (OMX_U8)(p1 + delta); + + ;// dDeltaP = (dP_2 + dRp0q0)>>1; + ;// dP_1n = armClip(dP_1 - dTC01, dP_1 + dTC01, dDeltaP); + ;// dP_1n = armClip(MinP, MaxP, dDeltaP); + + ;// delta = (q2 + ((p0+q0+1)>>1) - (q1<<1))>>1; + ;// delta = armClip(-tC0, tC0, delta); + ;// pQ0[1*Step] = (OMX_U8)(q1 + delta); + + ;// dDeltaQ = (dQ_2 + dRp0q0)>>1; + ;// dQ_1n = armClip(dQ_1 - dTC01, dQ_1 + dTC01, dDeltaQ); + ;// dQ_1n = armClip(MinQ, MaxQ, dDeltaQ); + + ;// dDeltaP-26 + VHADD dDeltaQ, dRp0q0, dQ_2 + + ;// dDeltaQ-27 + + ;// dP_0n - 29 + ;// dP_1n - 30 + ;// dQ_0n - 24 + ;// dQ_1n - 25 + + ;// delta = (q2 + ((p0+q0+1)>>1) - (q1<<1))>>1; + ;// dDeltaQ = (dQ_2 + dRp0q0)>>1; + + VMAX dP_1n, dDeltaP, dMinP + VMAX dDelta, dDelta, dTCs + + ;// pQ0[-1*Step] = (OMX_U8)armClip(0, 255, dP_0 - delta); + ;// pQ0[0*Step] = (OMX_U8)armClip(0, 255, dQ_0 - delta); + + ;// dP_0n = (OMX_U8)armClip(0, 255, dP_0 - dDelta); + ;// dQ_0n = (OMX_U8)armClip(0, 255, dP_0 - dDelta); + + ;// qP_0n - 14 + ;// qQ_0n - 12 + + VMOVL qP_0n, dP_0 + VMOVL qQ_0n, dQ_0 + + VADDW qP_0n, qP_0n, dDelta + VSUBW qQ_0n, qQ_0n, dDelta + + VQMOVUN dP_0n, qP_0n + VQMOVUN dQ_0n, qQ_0n + + VMAX dQ_1n, dDeltaQ, dMinQ + + VMIN dP_1n, dP_1n, dMaxP + VMIN dQ_1n, dQ_1n, dMaxQ + VBIF dP_0n, dP_0, dFilt + + VBIF dP_1n, dP_1, dApflg + VBIF dQ_0n, dQ_0, dFilt + VBIF dQ_1n, dQ_1, dAqflg + + M_END + +;// Register usage for - armVCM4P10_DeblockingLumabSGE4_unsafe() +;// +;// Inputs - Pixels - p0-p3: D4-D7, q0-q3: D8-D11 +;// - Filter masks - filt: D16, aqflg: D12, apflg: D17 +;// - Additional Params - alpha: D0, dMask_1: D15 +;// +;// Outputs - Pixels - P0-P2: D29-D31, Q0-Q2: D24,D25,D28 + +;// Registers Corrupted - D18-D31 + + M_START armVCM4P10_DeblockingLumabSGE4_unsafe + + + ;// ap>2)+2) + ;// aq>2)+2) + + ;// ( dApflg & dAp0q0 < (dAlpha >> 2 + 2) ) + ;// ( dAqflg & dAp0q0 < (dAlpha >> 2 + 2) ) + + ;// ( dApflg = dApflg & dAp0q0 < (dTemp + dMask_1 + dMask_1) ) + ;// ( dAqflg = dAqflg & dAp0q0 < (dTemp + dMask_1 + dMask_1) ) + + ;// P Filter + + VSHR dTemp, dAlpha, #2 + VADD dTemp, dTemp, dMask_1 + + ;// qSp0q0-10 + VADDL qSp0q0, dQ_0, dP_0 + VADD dTemp, dTemp, dMask_1 + + ;// qSp2q1-11 + ;// qSp0q0p1-12 + VADDL qSp2q1, dP_2, dQ_1 + VADDW qSp0q0p1, qSp0q0, dP_1 + + VCGT dTemp, dTemp, dAp0q0 + VSHR qSp2q1, #1 + + ;// pQ0[-1*Step] = (OMX_U8)((p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4)>>3); + ;// pQ0[-1*Step] = ( ( (p0 + q0 + p1) + (p2 + q1)>>1 ) >> 1 + 1 ) >> 1 + + ;// dP_0n = ( ( (qSp0q0 + dP_1) + qSp2q1>>1 ) >> 1 + 1 ) >> 1 + ;// dP_0n = ( ( qSp0q0p1 + qSp2q1>>1 ) >> 1 + 1 ) >> 1 + ;// dP_0n = ( qTemp1 + 1 ) >> 1 + + ;// pQ0[-2*Step] = (OMX_U8)((p2 + p1 + p0 + q0 + 2)>>2); + + ;// dP_1n = (OMX_U8)((dP_2 + qSp0q0p1 + 2)>>2); + ;// dP_1n = (OMX_U8)((qTemp2 + 2)>>2); + + ;// pQ0[-3*Step] = (OMX_U8)((2*p3 + 3*p2 + p1 + p0 + q0 + 4)>>3); + ;// pQ0[-3*Step] = (OMX_U8)(( (p3 + p2) + (p1 + p0 + q0 + p2) >> 1 + 2)>>2); + + ;// dP_2n = (OMX_U8)(( qSp3p2 + (dP_2 + qSp0q0p1) >> 1 + 2) >> 2); + ;// dP_2n = (OMX_U8)(( qSp3p2 + qTemp2 >> 1 + 2) >> 2); + + ;// qTemp1-qSp2q1-11 + ;// qTemp2-qSp0q0p1-12 + VHADD qTemp1, qSp0q0p1, qSp2q1 + VADDW qTemp2, qSp0q0p1, dP_2 + + ;// qSp3p2-13 + VADDL qSp3p2, dP_3, dP_2 + + VAND dApflg, dApflg, dTemp + VHADD dHSp0q1, dP_0, dQ_1 + VSRA qSp3p2, qTemp2, #1 + ;// dHSp0q1-28 + VAND dAqflg, dAqflg, dTemp + + ;// dP_0n-29 + ;// dP_0t-dHSp0q1-28 + VQRSHRN dP_0n, qTemp1, #1 + VRHADD dP_0t, dHSp0q1, dP_1 + + ;// dP_1n-30 + VQRSHRN dP_1n, qTemp2, #2 + + VADDL qSq2p1, dQ_2, dP_1 + VADDW qSp0q0q1, qSp0q0, dQ_1 + + VBIF dP_0n, dP_0t, dApflg + + ;// Q Filter + + ;// pQ0[0*Step] = (OMX_U8)((q2 + 2*q1 + 2*q0 + 2*p0 + p1 + 4)>>3); + ;// pQ0[0*Step] = ( ( (p0 + q0 + q1) + (q2 + p1)>>1 ) >> 1 + 1 ) >> 1 + + ;// dQ_0n = ( ( (qSp0q0 + dQ_1) + qSq2p1>>1 ) >> 1 + 1 ) >> 1 + ;// dQ_0n = ( ( qSp0q0q1 + qSq2p1>>1 ) >> 1 + 1 ) >> 1 + ;// dQ_0n = ( qTemp1 + 1 ) >> 1 + + ;// pQ0[1*Step] = (OMX_U8)((q2 + q1 + q0 + q0 + 2)>>2); + + ;// dQ_1n = (OMX_U8)((dQ_2 + qSp0q0q1 + 2)>>2); + ;// dQ_1n = (OMX_U8)((qTemp2 + 2)>>2); + + ;// pQ0[2*Step] = (OMX_U8)((2*q3 + 3*q2 + q1 + q0 + p0 + 4)>>3); + ;// pQ0[2*Step] = (OMX_U8)(( (q3 + q2) + (q1 + p0 + q0 + q2) >> 1 + 2)>>2); + + ;// dQ_2n = (OMX_U8)(( qSq3q2 + (dQ_2 + qSp0q0q1) >> 1 + 2) >> 2); + ;// dQ_2n = (OMX_U8)(( qSq3q2 + qTemp2 >> 1 + 2) >> 2); + + ;// qTemp1-qSp2q1-11 + ;// qTemp2-qSp0q0p1-12 + ;// qSq2p1-11 + ;// qSp0q0q1-12 + + + ;// qTemp2-qSp0q0p1-12 + ;// qTemp1-qSq2p1-11 + ;// qSq3q2-13 + ;// dP_2n-31 + + VQRSHRN dP_2n, qSp3p2, #2 + VADDL qSq3q2, dQ_3, dQ_2 + + VSHR qSq2p1, #1 + + VHADD qTemp1, qSp0q0q1, qSq2p1 + VADDW qTemp2, qSp0q0q1, dQ_2 + + ;// dHSq0p1-28 + VHADD dHSq0p1, dQ_0, dP_1 + + VBIF dP_0n, dP_0, dFilt + VBIF dP_1n, dP_1, dApflg + + VSRA qSq3q2, qTemp2, #1 + + ;// dQ_1-Temp2-25 + ;// dQ_0-Temp2-24 + VQRSHRN dQ_1n, qTemp2, #2 + VQRSHRN dQ_0n, qTemp1, #1 + + ;// dQ_0t-Temp1-22 + VRHADD dQ_0t, dHSq0p1, dQ_1 + VBIF dQ_1n, dQ_1, dAqflg + + VBIF dP_2n, dP_2, dApflg + VBIF dQ_0n, dQ_0t, dAqflg + VQRSHRN dQ_2n, qSq3q2, #2 + VBIF dQ_0n, dQ_0, dFilt + VBIF dQ_2n, dQ_2, dAqflg + + M_END + + ENDIF + + + END diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair_s.s new file mode 100755 index 0000000000000000000000000000000000000000..10a89e959270bf0ba21809654cb840f57052c106 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair_s.s @@ -0,0 +1,325 @@ +;// +;// +;// File Name: armVCM4P10_DecodeCoeffsToPair_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 12290 +;// Date: Wednesday, April 9, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + INCLUDE armCOMM_BitDec_s.h + + IMPORT armVCM4P10_CAVLCCoeffTokenTables + IMPORT armVCM4P10_CAVLCTotalZeroTables + IMPORT armVCM4P10_CAVLCTotalZeros2x2Tables + IMPORT armVCM4P10_CAVLCRunBeforeTables + IMPORT armVCM4P10_SuffixToLevel + IMPORT armVCM4P10_ZigZag_4x4 + IMPORT armVCM4P10_ZigZag_2x2 + + M_VARIANTS ARM1136JS + +;//DEBUG_ON SETL {TRUE} + +LAST_COEFF EQU 0x20 ;// End of block flag +TWO_BYTE_COEFF EQU 0x10 + +;// Declare input registers + +ppBitStream RN 0 +pOffset RN 1 +pNumCoeff RN 2 +ppPosCoefbuf RN 3 +nC RN 4 ;// number of coeffs or 17 for chroma +sMaxNumCoeff RN 5 + +;// Declare inner loop registers + +;// Level loop +Count RN 0 +TrailingOnes RN 1 +pLevel RN 2 +LevelSuffix RN 3 +SuffixLength RN 4 +TotalCoeff RN 5 + +pVLDTable RN 6 +Symbol RN 7 +T1 RN 8 +T2 RN 9 +RBitStream RN 10 +RBitBuffer RN 11 +RBitCount RN 12 +lr RN 14 + +;// Run loop +Count RN 0 +ZerosLeft RN 1 +pLevel RN 2 +ppRunTable RN 3 +pRun RN 4 +TotalCoeff RN 5 + +pVLDTable RN 6 +Symbol RN 7 +T1 RN 8 +T2 RN 9 +RBitStream RN 10 +RBitBuffer RN 11 +RBitCount RN 12 +lr RN 14 + +;// Fill in coefficients loop +pPosCoefbuf RN 0 +temp RN 1 +pLevel RN 2 +ppPosCoefbuf RN 3 +pRun RN 4 +TotalCoeff RN 5 +pZigZag RN 6 + +T1 RN 8 +T2 RN 9 +RBitStream RN 10 +RBitBuffer RN 11 +RBitCount RN 12 +CoeffNum RN 14 + + + + IF ARM1136JS + + ;// Allocate stack memory required by the function + M_ALLOC4 pppBitStream, 4 + M_ALLOC4 ppOffset, 4 + M_ALLOC4 pppPosCoefbuf, 4 + M_ALLOC4 ppLevel, 16*2 + M_ALLOC4 ppRun, 16 + + ;// Write function header + M_START armVCM4P10_DecodeCoeffsToPair, r11 + + ;// Define stack arguments + M_ARG pNC, 4 + M_ARG pSMaxNumCoeff,4 + + ;// Code start + M_BD_INIT0 ppBitStream, pOffset, RBitStream, RBitBuffer, RBitCount + LDR pVLDTable, =armVCM4P10_CAVLCCoeffTokenTables + M_LDR nC, pNC + + M_BD_INIT1 T1, T2, lr + LDR pVLDTable, [pVLDTable, nC, LSL #2] ;// Find VLD table + + M_BD_INIT2 T1, T2, lr + + ;// Decode Symbol = TotalCoeff*4 + TrailingOnes + M_BD_VLD Symbol, T1, T2, pVLDTable, 4, 2 + + MOVS TotalCoeff, Symbol, LSR #2 + STRB TotalCoeff, [pNumCoeff] + M_PRINTF "TotalCoeff=%d\n", TotalCoeff + BEQ.W EndNoError ;// Finished if no coefficients + + CMP Symbol, #17*4 + BGE.W EndBadSymbol ;// Error if bad symbol + + ;// Save bitstream pointers + M_STR ppBitStream, pppBitStream + M_STR pOffset, ppOffset + M_STR ppPosCoefbuf, pppPosCoefbuf + + ;// Decode Trailing Ones + ANDS TrailingOnes, Symbol, #3 + M_ADR pLevel, ppLevel + M_PRINTF "TrailingOnes=%d\n", TrailingOnes + BEQ TrailingOnesDone + MOV Count, TrailingOnes +TrailingOnesLoop + M_BD_READ8 Symbol, 1, T1 + SUBS Count, Count, #1 + MOV T1, #1 + SUB T1, T1, Symbol, LSL #1 + M_PRINTF "Level=%d\n", T1 + STRH T1, [pLevel], #2 + BGT TrailingOnesLoop +TrailingOnesDone + + ;// Decode level values + SUBS Count, TotalCoeff, TrailingOnes ;// Number of levels to read + BEQ DecodeRuns ;// None left + + MOV SuffixLength, #1 + CMP TotalCoeff, #10 + MOVLE SuffixLength, #0 + CMP TrailingOnes, #3 ;// if (TrailingOnes<3) + MOVLT TrailingOnes, #4 ;// then TrailingOnes = +4 + MOVGE TrailingOnes, #2 ;// else TrailingOnes = +2 + MOVGE SuffixLength, #0 ;// SuffixLength = 0 + +LevelLoop + M_BD_CLZ16 Symbol, T1, T2 ;// Symbol=LevelPrefix + CMP Symbol,#16 + BGE EndBadSymbol + + MOVS lr, SuffixLength ;// if LevelSuffixSize==0 + TEQEQ Symbol, #14 ;// and LevelPrefix==14 + MOVEQ lr, #4 ;// then LevelSuffixSize=4 + TEQ Symbol, #15 ;// if LevelSuffixSize==15 + MOVEQ lr, #12 ;// then LevelSuffixSize=12 + + TEQEQ SuffixLength,#0 + ADDEQ Symbol,Symbol,#15 + + TEQ lr, #0 ;// if LevelSuffixSize==0 + BEQ LevelCodeRead ;// LevelCode = LevelPrefix + + M_BD_VREAD16 LevelSuffix, lr, T1, T2 ;// Read Level Suffix + + MOV Symbol, Symbol, LSL SuffixLength + ADD Symbol, LevelSuffix, Symbol + +LevelCodeRead + ;// Symbol = LevelCode + ADD Symbol, Symbol, TrailingOnes ;// +4 if level cannot be +/-1, +2 o/w + MOV TrailingOnes, #2 + MOVS T1, Symbol, LSR #1 + RSBCS T1, T1, #0 ;// If Symbol odd then negate + M_PRINTF "Level=%d\n", T1 + STRH T1, [pLevel], #2 ;// Store level. + + LDR T2, =armVCM4P10_SuffixToLevel + LDRSB T1, [T2, SuffixLength] ;// Find increment level + TEQ SuffixLength, #0 + MOVEQ SuffixLength, #1 + CMP Symbol, T1 + ADDCS SuffixLength, SuffixLength, #1 + SUBS Count, Count, #1 + BGT LevelLoop + +DecodeRuns + ;// Find number of zeros + M_LDR T1, pSMaxNumCoeff ;// sMaxNumCoeff + SUB Count, TotalCoeff, #1 ;// Number of runs excluding last + SUBS ZerosLeft, T1, TotalCoeff ;// Maximum number of zeros there could be + M_ADR pRun, ppRun + MOV CoeffNum,TotalCoeff + SUB CoeffNum,CoeffNum,#1 + BEQ NoZerosLeft + + ;// Unpack number of zeros from bitstream + TEQ T1, #4 + LDREQ pVLDTable, =(armVCM4P10_CAVLCTotalZeros2x2Tables-4) + LDRNE pVLDTable, =(armVCM4P10_CAVLCTotalZeroTables-4) + LDR pVLDTable, [pVLDTable, TotalCoeff, LSL #2] + + M_BD_VLD Symbol, T1, T2, pVLDTable, 4, 2 ;// Symbol = ZerosLeft + CMP Symbol,#16 + BGE EndBadSymbol + + LDR ppRunTable, =(armVCM4P10_CAVLCRunBeforeTables-4) + M_ADR pRun, ppRun + MOVS ZerosLeft, Symbol + + ADD CoeffNum,CoeffNum,ZerosLeft + + BEQ NoZerosLeft + + ;// Decode runs while zeros are left and more than one coefficient +RunLoop + SUBS Count, Count, #1 + LDR pVLDTable, [ppRunTable, ZerosLeft, LSL#2] + BLT LastRun + M_BD_VLD Symbol, T1, T2, pVLDTable, 3, 2 ;// Symbol = Run + CMP Symbol,#15 + BGE EndBadSymbol + + SUBS ZerosLeft, ZerosLeft, Symbol + M_PRINTF "Run=%d\n", Symbol + STRB Symbol, [pRun], #1 + BGT RunLoop + + ;// Decode runs while no zeros are left +NoZerosLeft + SUBS Count, Count, #1 + M_PRINTF "Run=%d\n", ZerosLeft + STRGEB ZerosLeft, [pRun], #1 + BGT NoZerosLeft + +LastRun + ;// Final run length is remaining zeros + M_PRINTF "LastRun=%d\n", ZerosLeft + STRB ZerosLeft, [pRun], #1 + + ;// Write coefficients to output array + M_LDR T1, pSMaxNumCoeff ;// sMaxNumCoeff + TEQ T1, #15 + ADDEQ CoeffNum,CoeffNum,#1 + + + SUB pRun,pRun,TotalCoeff + SUB pLevel,pLevel,TotalCoeff + SUB pLevel,pLevel,TotalCoeff + + M_LDR ppPosCoefbuf, pppPosCoefbuf + LDR pPosCoefbuf, [ppPosCoefbuf] + TEQ T1, #4 + LDREQ pZigZag, =armVCM4P10_ZigZag_2x2 + LDRNE pZigZag, =armVCM4P10_ZigZag_4x4 + + + +OutputLoop + + LDRB T2, [pRun],#1 + LDRB T1, [pZigZag, CoeffNum] + SUB CoeffNum, CoeffNum, #1 ;// Skip Non zero + SUB CoeffNum, CoeffNum, T2 ;// Skip Zero run + + LDRSH T2, [pLevel],#2 + + SUBS TotalCoeff, TotalCoeff, #1 + ORREQ T1, T1, #LAST_COEFF + + ADD temp, T2, #128 + CMP temp, #256 + ORRCS T1, T1, #TWO_BYTE_COEFF + + + TEQ TotalCoeff, #0 ;// Preserves carry + + M_PRINTF "Output=%02x %04x\n", T1, T2 + STRB T1, [pPosCoefbuf], #1 + STRB T2, [pPosCoefbuf], #1 + MOV T2, T2, LSR #8 + STRCSB T2, [pPosCoefbuf], #1 + BNE OutputLoop + + ;// Finished + STR pPosCoefbuf, [ppPosCoefbuf] + M_LDR ppBitStream, pppBitStream + M_LDR pOffset, ppOffset + B EndNoError + +EndBadSymbol + MOV r0, #OMX_Sts_Err + B End + +EndNoError + ;// Finished reading from the bitstream + M_BD_FINI ppBitStream, pOffset + + ;// Set return value + MOV r0, #OMX_Sts_NoErr +End + M_END + + ENDIF + + END + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_DequantTables_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_DequantTables_s.s new file mode 100755 index 0000000000000000000000000000000000000000..27616004f5f245e17e5a56addaed17108d303fdb --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_DequantTables_s.s @@ -0,0 +1,123 @@ +;// +;// +;// File Name: armVCM4P10_DequantTables_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 12290 +;// Date: Wednesday, April 9, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + EXPORT armVCM4P10_QPDivTable + EXPORT armVCM4P10_VMatrixQPModTable + EXPORT armVCM4P10_PosToVCol4x4 + EXPORT armVCM4P10_PosToVCol2x2 + EXPORT armVCM4P10_VMatrix + EXPORT armVCM4P10_QPModuloTable + EXPORT armVCM4P10_VMatrixU16 + +;// Define the processor variants supported by this file + + M_VARIANTS CortexA8 + + +;// Guarding implementation by the processor name + + + IF CortexA8 + + + M_TABLE armVCM4P10_PosToVCol4x4 + DCB 0, 2, 0, 2 + DCB 2, 1, 2, 1 + DCB 0, 2, 0, 2 + DCB 2, 1, 2, 1 + + + M_TABLE armVCM4P10_PosToVCol2x2 + DCB 0, 2 + DCB 2, 1 + + + M_TABLE armVCM4P10_VMatrix + DCB 10, 16, 13 + DCB 11, 18, 14 + DCB 13, 20, 16 + DCB 14, 23, 18 + DCB 16, 25, 20 + DCB 18, 29, 23 + +;//------------------------------------------------------- +;// This table evaluates the expression [(INT)(QP/6)], +;// for values of QP from 0 to 51 (inclusive). +;//------------------------------------------------------- + + M_TABLE armVCM4P10_QPDivTable + DCB 0, 0, 0, 0, 0, 0 + DCB 1, 1, 1, 1, 1, 1 + DCB 2, 2, 2, 2, 2, 2 + DCB 3, 3, 3, 3, 3, 3 + DCB 4, 4, 4, 4, 4, 4 + DCB 5, 5, 5, 5, 5, 5 + DCB 6, 6, 6, 6, 6, 6 + DCB 7, 7, 7, 7, 7, 7 + DCB 8, 8, 8, 8, 8, 8 + +;//---------------------------------------------------- +;// This table contains armVCM4P10_VMatrix[QP%6][0] entires, +;// for values of QP from 0 to 51 (inclusive). +;//---------------------------------------------------- + + M_TABLE armVCM4P10_VMatrixQPModTable + DCB 10, 11, 13, 14, 16, 18 + DCB 10, 11, 13, 14, 16, 18 + DCB 10, 11, 13, 14, 16, 18 + DCB 10, 11, 13, 14, 16, 18 + DCB 10, 11, 13, 14, 16, 18 + DCB 10, 11, 13, 14, 16, 18 + DCB 10, 11, 13, 14, 16, 18 + DCB 10, 11, 13, 14, 16, 18 + DCB 10, 11, 13, 14, 16, 18 + +;//------------------------------------------------------- +;// This table evaluates the modulus expression [QP%6]*6, +;// for values of QP from 0 to 51 (inclusive). +;//------------------------------------------------------- + + M_TABLE armVCM4P10_QPModuloTable + DCB 0, 6, 12, 18, 24, 30 + DCB 0, 6, 12, 18, 24, 30 + DCB 0, 6, 12, 18, 24, 30 + DCB 0, 6, 12, 18, 24, 30 + DCB 0, 6, 12, 18, 24, 30 + DCB 0, 6, 12, 18, 24, 30 + DCB 0, 6, 12, 18, 24, 30 + DCB 0, 6, 12, 18, 24, 30 + DCB 0, 6, 12, 18, 24, 30 + +;//------------------------------------------------------- +;// This table contains the invidual byte values stored as +;// halfwords. This avoids unpacking inside the function +;//------------------------------------------------------- + + M_TABLE armVCM4P10_VMatrixU16 + DCW 10, 16, 13 + DCW 11, 18, 14 + DCW 13, 20, 16 + DCW 14, 23, 18 + DCW 16, 25, 20 + DCW 18, 29, 23 + + ENDIF ;//ARM1136JS + + + + + END \ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_Align_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_Align_unsafe_s.s new file mode 100755 index 0000000000000000000000000000000000000000..6e912d7681c49802dbe30a0cc502840b67fa13ed --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_Align_unsafe_s.s @@ -0,0 +1,236 @@ +;// +;// +;// File Name: armVCM4P10_InterpolateLuma_Align_unsafe_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 12290 +;// Date: Wednesday, April 9, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS ARM1136JS + + EXPORT armVCM4P10_InterpolateLuma_HorAlign9x_unsafe + EXPORT armVCM4P10_InterpolateLuma_VerAlign4x_unsafe + +DEBUG_ON SETL {FALSE} + + IF ARM1136JS + +;// Declare input registers +pSrc RN 0 +srcStep RN 1 +pDst RN 8 +iHeight RN 9 + +;// Declare inner loop registers +x RN 7 +x0 RN 7 +x1 RN 10 +x2 RN 11 +Scratch RN 12 + +;// Function: +;// armVCM4P10_InterpolateLuma_HorAlign9x_unsafe +;// +;// Implements copy from an arbitrary aligned source memory location (pSrc) to a 4 byte aligned +;// destination pointed by (pDst) for horizontal interpolation. +;// This function needs to copy 9 bytes in horizontal direction. +;// +;// Registers used as input for this function +;// r0,r1,r8,r9 where r8 containings aligned memory pointer and r9 no rows to copy +;// +;// Registers preserved for top level function +;// r2,r3,r4,r5,r6 +;// +;// Registers modified by the function +;// r7,r8,r9,r10,r11,r12 +;// +;// Output registers +;// r0 - pointer to the new aligned location which will be used as pSrc +;// r1 - step size to this aligned location + + ;// Function header + M_START armVCM4P10_InterpolateLuma_HorAlign9x_unsafe + + ;// Copy pDst to scratch + MOV Scratch, pDst + +StartAlignedStackCopy + AND x, pSrc, #3 + BIC pSrc, pSrc, #3 + + M_SWITCH x + M_CASE Copy0toAligned + M_CASE Copy1toAligned + M_CASE Copy2toAligned + M_CASE Copy3toAligned + M_ENDSWITCH + +Copy0toAligned + LDM pSrc, {x0, x1, x2} + SUBS iHeight, iHeight, #1 + ADD pSrc, pSrc, srcStep + + ;// One cycle stall + + STM pDst!, {x0, x1, x2} ;// Store aligned output row + BGT Copy0toAligned + B CopyEnd + +Copy1toAligned + LDM pSrc, {x0, x1, x2} + SUBS iHeight, iHeight, #1 + ADD pSrc, pSrc, srcStep + + ;// One cycle stall + + MOV x0, x0, LSR #8 + ORR x0, x0, x1, LSL #24 + MOV x1, x1, LSR #8 + ORR x1, x1, x2, LSL #24 + MOV x2, x2, LSR #8 + STM pDst!, {x0, x1, x2} ;// Store aligned output row + BGT Copy1toAligned + B CopyEnd + +Copy2toAligned + LDM pSrc, {x0, x1, x2} + SUBS iHeight, iHeight, #1 + ADD pSrc, pSrc, srcStep + + ;// One cycle stall + + MOV x0, x0, LSR #16 + ORR x0, x0, x1, LSL #16 + MOV x1, x1, LSR #16 + ORR x1, x1, x2, LSL #16 + MOV x2, x2, LSR #16 + STM pDst!, {x0, x1, x2} ;// Store aligned output row + BGT Copy2toAligned + B CopyEnd + +Copy3toAligned + LDM pSrc, {x0, x1, x2} + SUBS iHeight, iHeight, #1 + ADD pSrc, pSrc, srcStep + + ;// One cycle stall + + MOV x0, x0, LSR #24 + ORR x0, x0, x1, LSL #8 + MOV x1, x1, LSR #24 + ORR x1, x1, x2, LSL #8 + MOV x2, x2, LSR #24 + STM pDst!, {x0, x1, x2} ;// Store aligned output row + BGT Copy3toAligned + +CopyEnd + + MOV pSrc, Scratch + MOV srcStep, #12 + + M_END + + +;// Function: +;// armVCM4P10_InterpolateLuma_VerAlign4x_unsafe +;// +;// Implements copy from an arbitrary aligned source memory location (pSrc) to an aligned +;// destination pointed by (pDst) for vertical interpolation. +;// This function needs to copy 4 bytes in horizontal direction +;// +;// Registers used as input for this function +;// r0,r1,r8,r9 where r8 containings aligned memory pointer and r9 no of rows to copy +;// +;// Registers preserved for top level function +;// r2,r3,r4,r5,r6 +;// +;// Registers modified by the function +;// r7,r8,r9,r10,r11,r12 +;// +;// Output registers +;// r0 - pointer to the new aligned location which will be used as pSrc +;// r1 - step size to this aligned location + + ;// Function header + M_START armVCM4P10_InterpolateLuma_VerAlign4x_unsafe + + ;// Copy pSrc to stack +StartVAlignedStackCopy + AND x, pSrc, #3 + BIC pSrc, pSrc, #3 + + + M_SWITCH x + M_CASE Copy0toVAligned + M_CASE Copy1toVAligned + M_CASE Copy2toVAligned + M_CASE Copy3toVAligned + M_ENDSWITCH + +Copy0toVAligned + M_LDR x0, [pSrc], srcStep + SUBS iHeight, iHeight, #1 + + ;// One cycle stall + + STR x0, [pDst], #4 ;// Store aligned output row + BGT Copy0toVAligned + B CopyVEnd + +Copy1toVAligned + LDR x1, [pSrc, #4] + M_LDR x0, [pSrc], srcStep + SUBS iHeight, iHeight, #1 + + ;// One cycle stall + + MOV x1, x1, LSL #24 + ORR x0, x1, x0, LSR #8 + STR x0, [pDst], #4 ;// Store aligned output row + BGT Copy1toVAligned + B CopyVEnd + +Copy2toVAligned + LDR x1, [pSrc, #4] + M_LDR x0, [pSrc], srcStep + SUBS iHeight, iHeight, #1 + + ;// One cycle stall + + MOV x1, x1, LSL #16 + ORR x0, x1, x0, LSR #16 + STR x0, [pDst], #4 ;// Store aligned output row + BGT Copy2toVAligned + B CopyVEnd + +Copy3toVAligned + LDR x1, [pSrc, #4] + M_LDR x0, [pSrc], srcStep + SUBS iHeight, iHeight, #1 + + ;// One cycle stall + + MOV x1, x1, LSL #8 + ORR x0, x1, x0, LSR #24 + STR x0, [pDst], #4 ;// Store aligned output row + BGT Copy3toVAligned + +CopyVEnd + + SUB pSrc, pDst, #28 + MOV srcStep, #4 + + M_END + + + ENDIF + + END + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_Copy_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_Copy_unsafe_s.s new file mode 100755 index 0000000000000000000000000000000000000000..d2758912ddbfa6c2ddf2795c7e1665a304c2a2c0 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_Copy_unsafe_s.s @@ -0,0 +1,149 @@ +;// +;// +;// File Name: armVCM4P10_InterpolateLuma_Copy_unsafe_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 12290 +;// Date: Wednesday, April 9, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + +;// Function: +;// armVCM4P10_InterpolateLuma_Copy4x4_unsafe +;// +;// Implements copy from an arbitrary aligned source memory location (pSrc) to an aligned +;// destination pointed by (pDst) +;// +;// Registers preserved for top level function +;// r1,r3,r4,r5,r6,r7,r10,r11,r14 +;// +;// Registers modified by the function +;// r0,r2,r8,r9,r12 + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS ARM1136JS + + EXPORT armVCM4P10_InterpolateLuma_Copy4x4_unsafe + +;// Declare input registers +pSrc RN 0 +srcStep RN 1 +pDst RN 2 +dstStep RN 3 + +;// Declare other intermediate registers +x0 RN 4 +x1 RN 5 +x2 RN 8 +x3 RN 9 +Temp RN 12 + + IF ARM1136JS + + M_START armVCM4P10_InterpolateLuma_Copy4x4_unsafe, r6 + +Copy4x4Start + ;// Do Copy and branch to EndOfInterpolation + AND Temp, pSrc, #3 + BIC pSrc, pSrc, #3 + + M_SWITCH Temp + M_CASE Copy4x4Align0 + M_CASE Copy4x4Align1 + M_CASE Copy4x4Align2 + M_CASE Copy4x4Align3 + M_ENDSWITCH + +Copy4x4Align0 + M_LDR x0, [pSrc], srcStep + M_LDR x1, [pSrc], srcStep + M_STR x0, [pDst], dstStep + M_LDR x2, [pSrc], srcStep + M_STR x1, [pDst], dstStep + M_LDR x3, [pSrc], srcStep + M_STR x2, [pDst], dstStep + M_STR x3, [pDst], dstStep + B Copy4x4End + +Copy4x4Align1 + LDR x1, [pSrc, #4] + M_LDR x0, [pSrc], srcStep + LDR x3, [pSrc, #4] + M_LDR x2, [pSrc], srcStep + MOV x0, x0, LSR #8 + ORR x0, x0, x1, LSL #24 + M_STR x0, [pDst], dstStep + MOV x2, x2, LSR #8 + ORR x2, x2, x3, LSL #24 + LDR x1, [pSrc, #4] + M_LDR x0, [pSrc], srcStep + M_STR x2, [pDst], dstStep + LDR x3, [pSrc, #4] + M_LDR x2, [pSrc], srcStep + MOV x0, x0, LSR #8 + ORR x0, x0, x1, LSL #24 + M_STR x0, [pDst], dstStep + MOV x2, x2, LSR #8 + ORR x2, x2, x3, LSL #24 + M_STR x2, [pDst], dstStep + B Copy4x4End + +Copy4x4Align2 + LDR x1, [pSrc, #4] + M_LDR x0, [pSrc], srcStep + LDR x3, [pSrc, #4] + M_LDR x2, [pSrc], srcStep + MOV x0, x0, LSR #16 + ORR x0, x0, x1, LSL #16 + M_STR x0, [pDst], dstStep + MOV x2, x2, LSR #16 + ORR x2, x2, x3, LSL #16 + M_STR x2, [pDst], dstStep + + LDR x1, [pSrc, #4] + M_LDR x0, [pSrc], srcStep + LDR x3, [pSrc, #4] + M_LDR x2, [pSrc], srcStep + MOV x0, x0, LSR #16 + ORR x0, x0, x1, LSL #16 + M_STR x0, [pDst], dstStep + MOV x2, x2, LSR #16 + ORR x2, x2, x3, LSL #16 + M_STR x2, [pDst], dstStep + B Copy4x4End + +Copy4x4Align3 + LDR x1, [pSrc, #4] + M_LDR x0, [pSrc], srcStep + LDR x3, [pSrc, #4] + M_LDR x2, [pSrc], srcStep + MOV x0, x0, LSR #24 + ORR x0, x0, x1, LSL #8 + M_STR x0, [pDst], dstStep + MOV x2, x2, LSR #24 + ORR x2, x2, x3, LSL #8 + M_STR x2, [pDst], dstStep + + LDR x1, [pSrc, #4] + M_LDR x0, [pSrc], srcStep + LDR x3, [pSrc, #4] + M_LDR x2, [pSrc], srcStep + MOV x0, x0, LSR #24 + ORR x0, x0, x1, LSL #8 + M_STR x0, [pDst], dstStep + MOV x2, x2, LSR #24 + ORR x2, x2, x3, LSL #8 + M_STR x2, [pDst], dstStep + B Copy4x4End + +Copy4x4End + M_END + + ENDIF + + END + \ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s new file mode 100755 index 0000000000000000000000000000000000000000..4e5a39d2c5dfe4a0e5c864006c1575bae1961b51 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s @@ -0,0 +1,178 @@ +;// +;// +;// File Name: armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 12290 +;// Date: Wednesday, April 9, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS ARM1136JS + + EXPORT armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe + EXPORT armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe + +;// Functions: +;// armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe and +;// armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe +;// +;// Implements re-arrangement of data from temporary buffer to a buffer pointed by pBuf. +;// This will do the convertion of data from 16 bit to 8 bit and it also +;// remove offset and check for saturation. +;// +;// Registers used as input for this function +;// r0,r1,r7 where r0 is input pointer and r2 its step size, r7 is output pointer +;// +;// Registers preserved for top level function +;// r4,r5,r6,r8,r9,r14 +;// +;// Registers modified by the function +;// r7,r10,r11,r12 +;// +;// Output registers +;// r0 - pointer to the destination location +;// r1 - step size to this destination location + + +DEBUG_ON SETL {FALSE} + +MASK EQU 0x80808080 ;// Mask is used to implement (a+b+1)/2 + +;// Declare input registers + +pSrc0 RN 0 +srcStep0 RN 1 + +;// Declare other intermediate registers +Temp1 RN 4 +Temp2 RN 5 +Temp3 RN 10 +Temp4 RN 11 +pBuf RN 7 +r0x0fe00fe0 RN 6 +r0x00ff00ff RN 12 +Count RN 14 +ValueA0 RN 10 +ValueA1 RN 11 + + IF ARM1136JS + + + ;// Function header + M_START armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe, r6 + + ;// Code start + MOV Count, #4 + LDR r0x0fe00fe0, =0x0fe00fe0 + LDR r0x00ff00ff, =0x00ff00ff +LoopStart1 + LDR Temp4, [pSrc0, #12] + LDR Temp3, [pSrc0, #8] + LDR Temp2, [pSrc0, #4] + M_LDR Temp1, [pSrc0], srcStep0 + UQSUB16 Temp4, Temp4, r0x0fe00fe0 + UQSUB16 Temp3, Temp3, r0x0fe00fe0 + UQSUB16 Temp2, Temp2, r0x0fe00fe0 + UQSUB16 Temp1, Temp1, r0x0fe00fe0 + USAT16 Temp4, #13, Temp4 + USAT16 Temp3, #13, Temp3 + USAT16 Temp2, #13, Temp2 + USAT16 Temp1, #13, Temp1 + AND Temp4, r0x00ff00ff, Temp4, LSR #5 + AND Temp3, r0x00ff00ff, Temp3, LSR #5 + AND Temp2, r0x00ff00ff, Temp2, LSR #5 + AND Temp1, r0x00ff00ff, Temp1, LSR #5 + ORR ValueA1, Temp3, Temp4, LSL #8 + ORR ValueA0, Temp1, Temp2, LSL #8 + SUBS Count, Count, #1 + STRD ValueA0, [pBuf], #8 + BGT LoopStart1 +End1 + SUB pSrc0, pBuf, #32 + MOV srcStep0, #8 + + M_END + + + ;// Function header + M_START armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe, r6 + + ;// Code start + LDR r0x0fe00fe0, =0x0fe00fe0 + LDR r0x00ff00ff, =0x00ff00ff + MOV Count, #2 + +LoopStart + LDR Temp4, [pSrc0, #12] + LDR Temp3, [pSrc0, #8] + LDR Temp2, [pSrc0, #4] + M_LDR Temp1, [pSrc0], srcStep0 + + UQSUB16 Temp4, Temp4, r0x0fe00fe0 + UQSUB16 Temp3, Temp3, r0x0fe00fe0 + UQSUB16 Temp2, Temp2, r0x0fe00fe0 + UQSUB16 Temp1, Temp1, r0x0fe00fe0 + + USAT16 Temp4, #13, Temp4 + USAT16 Temp3, #13, Temp3 + USAT16 Temp2, #13, Temp2 + USAT16 Temp1, #13, Temp1 + + AND Temp4, r0x00ff00ff, Temp4, LSR #5 + AND Temp3, r0x00ff00ff, Temp3, LSR #5 + AND Temp2, r0x00ff00ff, Temp2, LSR #5 + AND Temp1, r0x00ff00ff, Temp1, LSR #5 + ORR ValueA1, Temp3, Temp4, LSL #8 ;// [d2 c2 d0 c0] + ORR ValueA0, Temp1, Temp2, LSL #8 ;// [b2 a2 b0 a0] + + PKHBT Temp1, ValueA0, ValueA1, LSL #16 ;// [d0 c0 b0 a0] + + STR Temp1, [pBuf], #8 + PKHTB Temp2, ValueA1, ValueA0, ASR #16 ;// [d2 c2 b2 a2] + STR Temp2, [pBuf], #-4 + + LDR Temp4, [pSrc0, #12] + LDR Temp3, [pSrc0, #8] + LDR Temp2, [pSrc0, #4] + M_LDR Temp1, [pSrc0], srcStep0 + + UQSUB16 Temp4, Temp4, r0x0fe00fe0 + UQSUB16 Temp3, Temp3, r0x0fe00fe0 + UQSUB16 Temp2, Temp2, r0x0fe00fe0 + UQSUB16 Temp1, Temp1, r0x0fe00fe0 + + USAT16 Temp4, #13, Temp4 + USAT16 Temp3, #13, Temp3 + USAT16 Temp2, #13, Temp2 + USAT16 Temp1, #13, Temp1 + + AND Temp4, r0x00ff00ff, Temp4, LSR #5 + AND Temp3, r0x00ff00ff, Temp3, LSR #5 + AND Temp2, r0x00ff00ff, Temp2, LSR #5 + AND Temp1, r0x00ff00ff, Temp1, LSR #5 + ORR ValueA1, Temp3, Temp4, LSL #8 ;// [d2 c2 d0 c0] + ORR ValueA0, Temp1, Temp2, LSL #8 ;// [b2 a2 b0 a0] + + PKHBT Temp1, ValueA0, ValueA1, LSL #16 ;// [d0 c0 b0 a0] + SUBS Count, Count, #1 + STR Temp1, [pBuf], #8 + PKHTB Temp2, ValueA1, ValueA0, ASR #16 ;// [d2 c2 b2 a2] + STR Temp2, [pBuf], #4 + + BGT LoopStart +End2 + SUB pSrc0, pBuf, #32-8 + MOV srcStep0, #4 + + M_END + + ENDIF + + END + \ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s new file mode 100755 index 0000000000000000000000000000000000000000..d1684cb42ac573ce7da64b2e3f63db33cece3191 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s @@ -0,0 +1,313 @@ +;// +;// +;// File Name: armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 12290 +;// Date: Wednesday, April 9, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + EXPORT armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe + + M_VARIANTS CortexA8 + + IF CortexA8 + + M_START armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe, r11 + +;// Declare input registers +pSrc RN 0 +srcStep RN 1 +pDst RN 2 +dstStep RN 3 + +;// Declare Neon registers +dCoeff5 DN 30.S16 +dCoeff20 DN 31.S16 +qCoeff5 QN 14.S32 +qCoeff20 QN 15.S32 + +qSrc01 QN 0.U8 +dSrc0 DN 0.U8 +dSrc1 DN 1.U8 + +dSrcb DN 4.U8 +dSrcc DN 2.U8 +dSrcd DN 3.U8 +dSrce DN 5.U8 +dSrcf DN 1.U8 + +qSrcb QN 2.S16 +qSrcc QN 1.S16 +dSrcB DN 4.S16 +dSrcC DN 2.S16 + +qRes0 QN 5.S16 +qRes1 QN 6.S16 +qRes2 QN 7.S16 +qRes3 QN 8.S16 +qRes4 QN 9.S16 +qRes5 QN 10.S16 +qRes6 QN 11.S16 +qRes7 QN 12.S16 +qRes8 QN 13.S16 + +dRes0 DN 10.S16 +dRes1 DN 12.S16 +dRes2 DN 14.S16 +dRes3 DN 16.S16 +dRes4 DN 18.S16 +dRes5 DN 20.S16 +dRes6 DN 22.S16 +dRes7 DN 24.S16 +dRes8 DN 26.S16 + +qAcc01 QN 5.S32 +qAcc23 QN 6.S32 +qAcc45 QN 2.S32 +qAcc67 QN 3.S32 +qSumBE QN 0.S32 +qSumCD QN 1.S32 + +dTempAcc0 DN 0.U16 +dTempAcc1 DN 2.U16 +dTempAcc2 DN 4.U16 +dTempAcc3 DN 6.U16 + +qTAcc0 QN 0.U16 +qTAcc1 QN 1.U16 +qTAcc2 QN 2.U16 +qTAcc3 QN 3.U16 + +dAcc0 DN 0.U8 +dAcc1 DN 2.U8 +dAcc2 DN 4.U8 +dAcc3 DN 6.U8 + +dTmp0 DN 8.S16 +dTmp1 DN 9.S16 +qTmp0 QN 4.S32 + + VLD1 qSrc01, [pSrc], srcStep ;// [a0 a1 a2 a3 ..] + VMOV dCoeff20, #20 + VMOV dCoeff5, #5 + + ;// Row0 + VEXT dSrcb, dSrc0, dSrc1, #1 ;// [b0 b1 b2 b3 ..] + VEXT dSrcc, dSrc0, dSrc1, #2 + VEXT dSrcd, dSrc0, dSrc1, #3 + VEXT dSrce, dSrc0, dSrc1, #4 + VEXT dSrcf, dSrc0, dSrc1, #5 ;// [f0 f1 f2 f3 ..] + VADDL qSrcc, dSrcc, dSrcd ;// c+d + VADDL qSrcb, dSrcb, dSrce ;// b+e + VADDL qRes0, dSrc0, dSrcf ;// Acc=a+f + VLD1 qSrc01, [pSrc], srcStep ;// [a0 a1 a2 a3 ..] + VMLA dRes0, dSrcC, dCoeff20 ;// Acc += 20*(c+d) +; VMLS dRes0, dSrcB, dCoeff5 ;// Acc -= 5*(b+e) + VMUL dTmp0, dSrcB, dCoeff5 ;// Acc -= 5*(b+e) + + ;// Row1 + VEXT dSrcb, dSrc0, dSrc1, #1 ;// [b0 b1 b2 b3 ..] + VEXT dSrcc, dSrc0, dSrc1, #2 + VEXT dSrcd, dSrc0, dSrc1, #3 + VEXT dSrce, dSrc0, dSrc1, #4 + VEXT dSrcf, dSrc0, dSrc1, #5 ;// [f0 f1 f2 f3 ..] + VADDL qSrcc, dSrcc, dSrcd ;// c+d + VADDL qSrcb, dSrcb, dSrce ;// b+e + VADDL qRes1, dSrc0, dSrcf ;// Acc=a+f + VLD1 qSrc01, [pSrc], srcStep ;// [a0 a1 a2 a3 ..] + + VSUB dRes0, dRes0, dTmp0 ;// TeRi + + VMLA dRes1, dSrcC, dCoeff20 ;// Acc += 20*(c+d) +; VMLS dRes1, dSrcB, dCoeff5 ;// Acc -= 5*(b+e) + VMUL dTmp0, dSrcB, dCoeff5 ;// Acc -= 5*(b+e) + + ;// Row2 + VEXT dSrcb, dSrc0, dSrc1, #1 ;// [b0 b1 b2 b3 ..] + VEXT dSrcc, dSrc0, dSrc1, #2 + VEXT dSrcd, dSrc0, dSrc1, #3 + VEXT dSrce, dSrc0, dSrc1, #4 + VEXT dSrcf, dSrc0, dSrc1, #5 ;// [f0 f1 f2 f3 ..] + VADDL qSrcc, dSrcc, dSrcd ;// c+d + VADDL qSrcb, dSrcb, dSrce ;// b+e + VADDL qRes2, dSrc0, dSrcf ;// Acc=a+f + VLD1 qSrc01, [pSrc], srcStep ;// [a0 a1 a2 a3 ..] + + VSUB dRes1, dRes1, dTmp0 + + VMLA dRes2, dSrcC, dCoeff20 ;// Acc += 20*(c+d) +; VMLS dRes2, dSrcB, dCoeff5 ;// Acc -= 5*(b+e) + VMUL dTmp0, dSrcB, dCoeff5 ;// Acc -= 5*(b+e) + + ;// Row3 + VEXT dSrcb, dSrc0, dSrc1, #1 ;// [b0 b1 b2 b3 ..] + VEXT dSrcc, dSrc0, dSrc1, #2 + VEXT dSrcd, dSrc0, dSrc1, #3 + VEXT dSrce, dSrc0, dSrc1, #4 + VEXT dSrcf, dSrc0, dSrc1, #5 ;// [f0 f1 f2 f3 ..] + VADDL qSrcc, dSrcc, dSrcd ;// c+d + VADDL qSrcb, dSrcb, dSrce ;// b+e + VADDL qRes3, dSrc0, dSrcf ;// Acc=a+f + VLD1 qSrc01, [pSrc], srcStep ;// [a0 a1 a2 a3 ..] + + VSUB dRes2, dRes2, dTmp0 + + VMLA dRes3, dSrcC, dCoeff20 ;// Acc += 20*(c+d) +; VMLS dRes3, dSrcB, dCoeff5 ;// Acc -= 5*(b+e) + VMUL dTmp0, dSrcB, dCoeff5 ;// Acc -= 5*(b+e) + + ;// Row4 + VEXT dSrcb, dSrc0, dSrc1, #1 ;// [b0 b1 b2 b3 ..] + VEXT dSrcc, dSrc0, dSrc1, #2 + VEXT dSrcd, dSrc0, dSrc1, #3 + VEXT dSrce, dSrc0, dSrc1, #4 + VEXT dSrcf, dSrc0, dSrc1, #5 ;// [f0 f1 f2 f3 ..] + VADDL qSrcc, dSrcc, dSrcd ;// c+d + VADDL qSrcb, dSrcb, dSrce ;// b+e + VADDL qRes4, dSrc0, dSrcf ;// Acc=a+f + VLD1 qSrc01, [pSrc], srcStep ;// [a0 a1 a2 a3 ..] + + VSUB dRes3, dRes3, dTmp0 + + VMLA dRes4, dSrcC, dCoeff20 ;// Acc += 20*(c+d) +; VMLS dRes4, dSrcB, dCoeff5 ;// Acc -= 5*(b+e) + VMUL dTmp0, dSrcB, dCoeff5 ;// Acc -= 5*(b+e) + + ;// Row5 + VEXT dSrcb, dSrc0, dSrc1, #1 ;// [b0 b1 b2 b3 ..] + VEXT dSrcc, dSrc0, dSrc1, #2 + VEXT dSrcd, dSrc0, dSrc1, #3 + VEXT dSrce, dSrc0, dSrc1, #4 + VEXT dSrcf, dSrc0, dSrc1, #5 ;// [f0 f1 f2 f3 ..] + VADDL qSrcc, dSrcc, dSrcd ;// c+d + VADDL qSrcb, dSrcb, dSrce ;// b+e + VADDL qRes5, dSrc0, dSrcf ;// Acc=a+f + VLD1 qSrc01, [pSrc], srcStep ;// [a0 a1 a2 a3 ..] + + VSUB dRes4, dRes4, dTmp0 + + VMLA dRes5, dSrcC, dCoeff20 ;// Acc += 20*(c+d) +; VMLS dRes5, dSrcB, dCoeff5 ;// Acc -= 5*(b+e) + VMUL dTmp0, dSrcB, dCoeff5 ;// Acc -= 5*(b+e) + + ;// Row6 + VEXT dSrcb, dSrc0, dSrc1, #1 ;// [b0 b1 b2 b3 ..] + VEXT dSrcc, dSrc0, dSrc1, #2 + VEXT dSrcd, dSrc0, dSrc1, #3 + VEXT dSrce, dSrc0, dSrc1, #4 + VEXT dSrcf, dSrc0, dSrc1, #5 ;// [f0 f1 f2 f3 ..] + VADDL qSrcc, dSrcc, dSrcd ;// c+d + VADDL qSrcb, dSrcb, dSrce ;// b+e + VADDL qRes6, dSrc0, dSrcf ;// Acc=a+f + VLD1 qSrc01, [pSrc], srcStep ;// [a0 a1 a2 a3 ..] + + VSUB dRes5, dRes5, dTmp0 + + VMLA dRes6, dSrcC, dCoeff20 ;// Acc += 20*(c+d) +; VMLS dRes6, dSrcB, dCoeff5 ;// Acc -= 5*(b+e) + VMUL dTmp0, dSrcB, dCoeff5 ;// Acc -= 5*(b+e) + + ;// Row7 + VEXT dSrcb, dSrc0, dSrc1, #1 ;// [b0 b1 b2 b3 ..] + VEXT dSrcc, dSrc0, dSrc1, #2 + VEXT dSrcd, dSrc0, dSrc1, #3 + VEXT dSrce, dSrc0, dSrc1, #4 + VEXT dSrcf, dSrc0, dSrc1, #5 ;// [f0 f1 f2 f3 ..] + VADDL qSrcc, dSrcc, dSrcd ;// c+d + VADDL qSrcb, dSrcb, dSrce ;// b+e + VADDL qRes7, dSrc0, dSrcf ;// Acc=a+f + VLD1 qSrc01, [pSrc], srcStep ;// [a0 a1 a2 a3 ..] + + VSUB dRes6, dRes6, dTmp0 + + VMLA dRes7, dSrcC, dCoeff20 ;// Acc += 20*(c+d) +; VMLS dRes7, dSrcB, dCoeff5 ;// Acc -= 5*(b+e) + VMUL dTmp0, dSrcB, dCoeff5 ;// Acc -= 5*(b+e) + + ;// Row8 + VEXT dSrcb, dSrc0, dSrc1, #1 ;// [b0 b1 b2 b3 ..] + VEXT dSrcc, dSrc0, dSrc1, #2 + VEXT dSrcd, dSrc0, dSrc1, #3 + VEXT dSrce, dSrc0, dSrc1, #4 + VEXT dSrcf, dSrc0, dSrc1, #5 ;// [f0 f1 f2 f3 ..] + VADDL qSrcc, dSrcc, dSrcd ;// c+d + VADDL qSrcb, dSrcb, dSrce ;// b+e + VADDL qRes8, dSrc0, dSrcf ;// Acc=a+f + + VSUB dRes7, dRes7, dTmp0 + + VMLA dRes8, dSrcC, dCoeff20 ;// Acc += 20*(c+d) +; VMLS dRes8, dSrcB, dCoeff5 ;// Acc -= 5*(b+e) + VMUL dTmp0, dSrcB, dCoeff5 ;// Acc -= 5*(b+e) + + VMOV qCoeff20, #20 + VMOV qCoeff5, #5 + + ;// Col0 + VADDL qAcc01, dRes0, dRes5 ;// Acc = a+f + VADDL qSumCD, dRes2, dRes3 ;// c+d + VADDL qSumBE, dRes1, dRes4 ;// b+e + + VSUB dRes8, dRes8, dTmp0 + + VMLA qAcc01, qSumCD, qCoeff20 ;// Acc += 20*(c+d) +; VMLS qAcc01, qSumBE, qCoeff5 ;// Acc -= 20*(b+e) + VMUL qTmp0, qSumBE, qCoeff5 ;// Acc -= 20*(b+e) + + ;// Col1 + VADDL qAcc23, dRes1, dRes6 ;// Acc = a+f + VADDL qSumCD, dRes3, dRes4 ;// c+d + VADDL qSumBE, dRes2, dRes5 ;// b+e + VMLA qAcc23, qSumCD, qCoeff20 ;// Acc += 20*(c+d) + + VSUB qAcc01, qAcc01, qTmp0 + +; VMLS qAcc23, qSumBE, qCoeff5 ;// Acc -= 20*(b+e) + VMUL qTmp0, qSumBE, qCoeff5 ;// Acc -= 20*(b+e) + + ;// Col2 + VADDL qAcc45, dRes2, dRes7 ;// Acc = a+f + VADDL qSumCD, dRes4, dRes5 ;// c+d + VADDL qSumBE, dRes3, dRes6 ;// b+e + VMLA qAcc45, qSumCD, qCoeff20 ;// Acc += 20*(c+d) + + VSUB qAcc23, qAcc23, qTmp0 + +; VMLS qAcc45, qSumBE, qCoeff5 ;// Acc -= 20*(b+e) + VMUL qTmp0, qSumBE, qCoeff5 ;// Acc -= 20*(b+e) + + ;// Col3 + VADDL qAcc67, dRes3, dRes8 ;// Acc = a+f + VADDL qSumCD, dRes5, dRes6 ;// c+d + VADDL qSumBE, dRes4, dRes7 ;// b+e + VMLA qAcc67, qSumCD, qCoeff20 ;// Acc += 20*(c+d) + + VSUB qAcc45, qAcc45, qTmp0 + + VMLS qAcc67, qSumBE, qCoeff5 ;// Acc -= 20*(b+e) + + VQRSHRUN dTempAcc0, qAcc01, #10 + VQRSHRUN dTempAcc1, qAcc23, #10 + VQRSHRUN dTempAcc2, qAcc45, #10 + VQRSHRUN dTempAcc3, qAcc67, #10 + + VQMOVN dAcc0, qTAcc0 + VQMOVN dAcc1, qTAcc1 + VQMOVN dAcc2, qTAcc2 + VQMOVN dAcc3, qTAcc3 + + M_END + + ENDIF + + + + END + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s new file mode 100755 index 0000000000000000000000000000000000000000..7bc091f06bd3b46fed34825838702e365960b975 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s @@ -0,0 +1,266 @@ +;// +;// +;// File Name: armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 12290 +;// Date: Wednesday, April 9, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + EXPORT armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe + + M_VARIANTS CortexA8 + + IF CortexA8 + M_START armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe, r11 + +;// Declare input registers +pSrc RN 0 +srcStep RN 1 +pDst RN 2 +dstStep RN 3 + +;// Declare Neon registers +dTCoeff5 DN 30.U8 +dTCoeff20 DN 31.U8 +dCoeff5 DN 30.S16 +dCoeff20 DN 31.S16 + +qSrcA01 QN 0.U8 +qSrcB23 QN 1.U8 +qSrcC45 QN 2.U8 +qSrcD67 QN 3.U8 +qSrcE89 QN 4.U8 +qSrcF1011 QN 5.U8 +qSrcG1213 QN 6.U8 +qSrcH1415 QN 7.U8 +qSrcI1617 QN 8.U8 + +dSrcA0 DN 0.U8 +dSrcB2 DN 2.U8 +dSrcC4 DN 4.U8 +dSrcD6 DN 6.U8 +dSrcE8 DN 8.U8 +dSrcF10 DN 10.U8 +dSrcG12 DN 12.U8 +dSrcH14 DN 14.U8 +dSrcI16 DN 16.U8 + +dSrcA1 DN 1.U8 +dSrcB3 DN 3.U8 +dSrcC5 DN 5.U8 +dSrcD7 DN 7.U8 +dSrcE9 DN 9.U8 +dSrcF11 DN 11.U8 +dSrcG13 DN 13.U8 +dSrcH15 DN 15.U8 +dSrcI17 DN 17.U8 + +qTempP01 QN 9.S16 +qTempQ01 QN 10.S16 +qTempR01 QN 11.S16 +qTempS01 QN 12.S16 + +qTempP23 QN 0.S16 +qTempQ23 QN 1.S16 +qTempR23 QN 2.S16 +qTempS23 QN 3.S16 + +dTempP0 DN 18.S16 +dTempP1 DN 19.S16 +dTempP2 DN 0.S16 + +dTempQ0 DN 20.S16 +dTempQ1 DN 21.S16 +dTempQ2 DN 2.S16 + +dTempR0 DN 22.S16 +dTempR1 DN 23.S16 +dTempR2 DN 4.S16 + +dTempS0 DN 24.S16 +dTempS1 DN 25.S16 +dTempS2 DN 6.S16 + +dTempB0 DN 26.S16 +dTempC0 DN 27.S16 +dTempD0 DN 28.S16 +dTempF0 DN 29.S16 + +dTempAcc0 DN 0.U16 +dTempAcc1 DN 2.U16 +dTempAcc2 DN 4.U16 +dTempAcc3 DN 6.U16 + +dAcc0 DN 0.U8 +dAcc1 DN 2.U8 +dAcc2 DN 4.U8 +dAcc3 DN 6.U8 + +qAcc0 QN 0.S32 +qAcc1 QN 1.S32 +qAcc2 QN 2.S32 +qAcc3 QN 3.S32 + +qTAcc0 QN 0.U16 +qTAcc1 QN 1.U16 +qTAcc2 QN 2.U16 +qTAcc3 QN 3.U16 + +qTmp QN 4.S16 +dTmp DN 8.S16 + + VLD1 qSrcA01, [pSrc], srcStep ;// [a0 a1 a2 a3 .. a15] + ADD r12, pSrc, srcStep, LSL #2 + VMOV dTCoeff5, #5 + VMOV dTCoeff20, #20 + VLD1 qSrcF1011, [r12], srcStep + VLD1 qSrcB23, [pSrc], srcStep ;// [b0 b1 b2 b3 .. b15] + + VLD1 qSrcG1213, [r12], srcStep + VADDL qTempP01, dSrcA0, dSrcF10 + VLD1 qSrcC45, [pSrc], srcStep ;// [c0 c1 c2 c3 .. c15] + VADDL qTempP23, dSrcA1, dSrcF11 + VLD1 qSrcD67, [pSrc], srcStep + VADDL qTempQ01, dSrcB2, dSrcG12 + VLD1 qSrcE89, [pSrc], srcStep + + ;//t0 + VMLAL qTempP01, dSrcC4, dTCoeff20 + + VLD1 qSrcH1415, [r12], srcStep + + VMLAL qTempP23, dSrcC5, dTCoeff20 + + VLD1 qSrcI1617, [r12], srcStep ;// [i0 i1 i2 i3 .. ] + + VMLAL qTempP01, dSrcD6, dTCoeff20 + VMLAL qTempQ01, dSrcD6, dTCoeff20 + VMLSL qTempP23, dSrcB3, dTCoeff5 + + VADDL qTempR01, dSrcC4, dSrcH14 + + VMLSL qTempP01, dSrcB2, dTCoeff5 + + VADDL qTempQ23, dSrcB3, dSrcG13 + + VMLAL qTempP23, dSrcD7, dTCoeff20 + VMLAL qTempQ01, dSrcE8, dTCoeff20 + + VMLSL qTempP01, dSrcE8, dTCoeff5 + VMLAL qTempQ23, dSrcD7, dTCoeff20 + + VMLSL qTempP23, dSrcE9, dTCoeff5 + + ;//t1 + + VMLAL qTempR01, dSrcE8, dTCoeff20 + VMLSL qTempQ01, dSrcC4, dTCoeff5 + VMLSL qTempQ23, dSrcC5, dTCoeff5 + VADDL qTempR23, dSrcC5, dSrcH15 + + VMLAL qTempR01, dSrcF10, dTCoeff20 + VMLSL qTempQ01, dSrcF10, dTCoeff5 + VMLAL qTempQ23, dSrcE9, dTCoeff20 + VMLAL qTempR23, dSrcE9, dTCoeff20 + VADDL qTempS01, dSrcD6, dSrcI16 + + + VMLSL qTempR01, dSrcD6, dTCoeff5 + VMLSL qTempQ23, dSrcF11, dTCoeff5 + VMLSL qTempR23, dSrcD7, dTCoeff5 + + ;//t2 + VADDL qTempS23, dSrcD7, dSrcI17 + VMLAL qTempS01, dSrcF10, dTCoeff20 + VMLSL qTempR01, dSrcG12, dTCoeff5 + VMLSL qTempR23, dSrcG13, dTCoeff5 + + VMLAL qTempS23, dSrcF11, dTCoeff20 + VMLAL qTempS01, dSrcG12, dTCoeff20 + VEXT dTempB0, dTempP0, dTempP1, #1 + VMLAL qTempR23, dSrcF11, dTCoeff20 + + + ;//t3 + VMLAL qTempS23, dSrcG13, dTCoeff20 + VMLSL qTempS01, dSrcE8, dTCoeff5 + VEXT dTempC0, dTempP0, dTempP1, #2 + VMOV dCoeff20, #20 + VMLSL qTempS23, dSrcE9, dTCoeff5 + VMLSL qTempS01, dSrcH14, dTCoeff5 + VEXT dTempF0, dTempP1, dTempP2, #1 + VEXT dTempD0, dTempP0, dTempP1, #3 + VMLSL qTempS23, dSrcH15, dTCoeff5 + + VADDL qAcc0, dTempP0, dTempF0 + VADD dTempC0, dTempC0, dTempD0 + ;//h + VMOV dCoeff5, #5 + + ;// res0 + VADD dTempB0, dTempB0, dTempP1 + VMLAL qAcc0, dTempC0, dCoeff20 + VEXT dTempC0, dTempQ0, dTempQ1, #2 + VEXT dTempD0, dTempQ0, dTempQ1, #3 + VEXT dTempF0, dTempQ1, dTempQ2, #1 + VMLSL qAcc0, dTempB0, dCoeff5 + + ;// res1 + VEXT dTempB0, dTempQ0, dTempQ1, #1 + VADDL qAcc1, dTempQ0, dTempF0 + VADD dTempC0, dTempC0, dTempD0 + VADD dTempB0, dTempB0, dTempQ1 + VEXT dTempD0, dTempR0, dTempR1, #3 + VMLAL qAcc1, dTempC0, dCoeff20 + VEXT dTempF0, dTempR1, dTempR2, #1 + VEXT dTempC0, dTempR0, dTempR1, #2 + VEXT dTmp, dTempR0, dTempR1, #1 + VADDL qAcc2, dTempR0, dTempF0 + VMLSL qAcc1, dTempB0, dCoeff5 +; VEXT dTempB0, dTempR0, dTempR1, #1 + VADD dTempC0, dTempC0, dTempD0 + + ;// res2 + VADD dTempB0, dTmp, dTempR1 + VEXT dTempD0, dTempS0, dTempS1, #3 + VMLAL qAcc2, dTempC0, dCoeff20 +; VADD dTempB0, dTempB0, dTempR1 + + ;// res3 + VEXT dTempC0, dTempS0, dTempS1, #2 + VEXT dTempF0, dTempS1, dTempS2, #1 + VADD dTempC0, dTempC0, dTempD0 + VEXT dTmp, dTempS0, dTempS1, #1 + VADDL qAcc3, dTempS0, dTempF0 + VMLSL qAcc2, dTempB0, dCoeff5 + VMLAL qAcc3, dTempC0, dCoeff20 + VADD dTmp, dTmp, dTempS1 + VMLSL qAcc3, dTmp, dCoeff5 + + VQRSHRUN dTempAcc0, qAcc0, #10 + VQRSHRUN dTempAcc1, qAcc1, #10 + VQRSHRUN dTempAcc2, qAcc2, #10 + VQRSHRUN dTempAcc3, qAcc3, #10 + + VQMOVN dAcc0, qTAcc0 + VQMOVN dAcc1, qTAcc1 + VQMOVN dAcc2, qTAcc2 + VQMOVN dAcc3, qTAcc3 + + M_END + + ENDIF + + + + + + END + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s new file mode 100755 index 0000000000000000000000000000000000000000..babe8adeab803bc77729cce88b169b9a33b52433 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s @@ -0,0 +1,228 @@ +;// +;// +;// File Name: armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 12290 +;// Date: Wednesday, April 9, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS CortexA8 + + EXPORT armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe + +DEBUG_ON SETL {FALSE} + + IF CortexA8 + + M_START armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe, r11 + +;// Declare input registers +pSrc RN 0 +srcStep RN 1 +pDst RN 2 +dstStep RN 3 + +;// Declare Neon registers +dCoeff5 DN 30.S16 +dCoeff20 DN 31.S16 + +qSrcA01 QN 11.U8 +qSrcB01 QN 12.U8 +qSrcC01 QN 13.U8 +qSrcD01 QN 14.U8 + +dSrcA0 DN 22.U8 +dSrcA1 DN 23.U8 +dSrcB0 DN 24.U8 +dSrcB1 DN 25.U8 +dSrcC0 DN 26.U8 +dSrcC1 DN 27.U8 +dSrcD0 DN 28.U8 +dSrcD1 DN 29.U8 + +dSrcb DN 12.U8 +dSrce DN 13.U8 +dSrcf DN 10.U8 + +dSrc0c DN 14.U8 +dSrc1c DN 16.U8 +dSrc2c DN 18.U8 +dSrc3c DN 20.U8 + +dSrc0d DN 15.U8 +dSrc1d DN 17.U8 +dSrc2d DN 19.U8 +dSrc3d DN 21.U8 + +qTemp01 QN 4.S16 +qTemp23 QN 6.S16 +dTemp0 DN 8.S16 +dTemp2 DN 12.S16 + +qRes01 QN 11.S16 +qRes23 QN 12.S16 +qRes45 QN 13.S16 +qRes67 QN 14.S16 + +dRes0 DN 22.S16 +dRes2 DN 24.S16 +dRes4 DN 26.S16 +dRes6 DN 28.S16 + +dAcc0 DN 22.U8 +dAcc2 DN 24.U8 +dAcc4 DN 26.U8 +dAcc6 DN 28.U8 + +dResult0 DN 22.U32 +dResult2 DN 24.U32 +dResult4 DN 26.U32 +dResult6 DN 28.U32 + + VLD1 qSrcA01, [pSrc], srcStep ;// Load A register [a0 a1 a2 a3 ..] + ;// One cycle stall + VEXT dSrcf, dSrcA0, dSrcA1, #5 ;// [f0 f1 f2 f3 ..] + VEXT dSrcb, dSrcA0, dSrcA1, #1 ;// [b0 b1 b2 b3 ..] +; VLD1 qSrcB01, [pSrc], srcStep ;// Load B register [a0 a1 a2 a3 ..] + VEXT dSrc0c, dSrcA0, dSrcA1, #2 + VEXT dSrc0d, dSrcA0, dSrcA1, #3 + VEXT dSrce, dSrcA0, dSrcA1, #4 + VADDL qRes01, dSrcA0, dSrcf ;// Acc=a+f + VADDL qTemp01, dSrc0c, dSrc0d ;// c+d + VADDL qTemp23, dSrcb, dSrce ;// b+e + + VLD1 qSrcB01, [pSrc], srcStep ;// Load B register [a0 a1 a2 a3 ..] +; VLD1 qSrcC01, [pSrc], srcStep ;// Load C register [a0 a1 a2 a3 ..] + VMLA dRes0, dTemp0, dCoeff20 ;// Acc += 20*(c+d) +; VMLS dRes0, dTemp2, dCoeff5 ;// Acc -= 5*(b+e) + VMUL dTemp0, dTemp2, dCoeff5 ;// TeRi + + VEXT dSrcf, dSrcB0, dSrcB1, #5 ;// [f0 f1 f2 f3 ..] + VEXT dSrcb, dSrcB0, dSrcB1, #1 ;// [b0 b1 b2 b3 ..] + VEXT dSrc1c, dSrcB0, dSrcB1, #2 + VEXT dSrc1d, dSrcB0, dSrcB1, #3 + VEXT dSrce, dSrcB0, dSrcB1, #4 + VADDL qRes23, dSrcB0, dSrcf ;// Acc=a+f + + VSUB dRes0, dRes0, dTemp0 ;// TeRi + + VADDL qTemp01, dSrc1c, dSrc1d ;// c+d + VADDL qTemp23, dSrcb, dSrce ;// b+e + + VLD1 qSrcC01, [pSrc], srcStep ;// Load C register [a0 a1 a2 a3 ..] +; VLD1 qSrcD01, [pSrc], srcStep ;// Load D register [a0 a1 a2 a3 ..] + + VMLA dRes2, dTemp0, dCoeff20 ;// Acc += 20*(c+d) +; VMLS dRes2, dTemp2, dCoeff5 ;// Acc -= 5*(b+e) + VMUL dTemp0, dTemp2, dCoeff5 ;// TeRi + + VEXT dSrcf, dSrcC0, dSrcC1, #5 ;// [f0 f1 f2 f3 ..] + VEXT dSrcb, dSrcC0, dSrcC1, #1 ;// [b0 b1 b2 b3 ..] + VEXT dSrc2c, dSrcC0, dSrcC1, #2 + VEXT dSrc2d, dSrcC0, dSrcC1, #3 + VEXT dSrce, dSrcC0, dSrcC1, #4 + VADDL qRes45, dSrcC0, dSrcf ;// Acc=a+f + + VSUB dRes2, dRes2, dTemp0 ;// TeRi + + VADDL qTemp01, dSrc2c, dSrc2d ;// c+d + VADDL qTemp23, dSrcb, dSrce ;// b+e + + VLD1 qSrcD01, [pSrc], srcStep ;// Load D register [a0 a1 a2 a3 ..] + + VMLA dRes4, dTemp0, dCoeff20 ;// Acc += 20*(c+d) +; VMLS dRes4, dTemp2, dCoeff5 ;// Acc -= 5*(b+e) + VMUL dTemp0, dTemp2, dCoeff5 ;// Acc -= 5*(b+e) TeRi + + + VEXT dSrcf, dSrcD0, dSrcD1, #5 ;// [f0 f1 f2 f3 ..] + VEXT dSrcb, dSrcD0, dSrcD1, #1 ;// [b0 b1 b2 b3 ..] + VEXT dSrc3c, dSrcD0, dSrcD1, #2 + VEXT dSrc3d, dSrcD0, dSrcD1, #3 + VEXT dSrce, dSrcD0, dSrcD1, #4 + VADDL qRes67, dSrcD0, dSrcf ;// Acc=a+f + + VSUB dRes4, dRes4, dTemp0 ;// TeRi + + VADDL qTemp01, dSrc3c, dSrc3d ;// c+d + VADDL qTemp23, dSrcb, dSrce ;// b+e + VMLA dRes6, dTemp0, dCoeff20 ;// Acc += 20*(c+d) + VMLS dRes6, dTemp2, dCoeff5 ;// Acc -= 5*(b+e) + + VQRSHRUN dAcc0, qRes01, #5 ;// Acc = Sat ((Acc + 16) / 32) + VQRSHRUN dAcc2, qRes23, #5 ;// Acc = Sat ((Acc + 16) / 32) + VQRSHRUN dAcc4, qRes45, #5 ;// Acc = Sat ((Acc + 16) / 32) + VQRSHRUN dAcc6, qRes67, #5 ;// Acc = Sat ((Acc + 16) / 32) + + M_END + + ENDIF + + + END + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s new file mode 100755 index 0000000000000000000000000000000000000000..89c90aa87764d086990cf1f95f6df3ab26f84169 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s @@ -0,0 +1,134 @@ +;// +;// +;// File Name: armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 12290 +;// Date: Wednesday, April 9, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS CortexA8 + + EXPORT armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe + + IF CortexA8 + + M_START armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe, r11 + +;// Declare input registers +pSrc RN 0 +srcStep RN 1 +pDst RN 2 +dstStep RN 3 + +Temp RN 12 + +;// Declare Neon registers +dCoeff5 DN 30.S16 +dCoeff20 DN 31.S16 + +dSrc0 DN 7.U8 +dSrc1 DN 8.U8 +dSrc2 DN 9.U8 +dSrc3 DN 10.U8 +dSrc4 DN 11.U8 +dSrc5 DN 12.U8 +dSrc6 DN 13.U8 +dSrc7 DN 14.U8 +dSrc8 DN 15.U8 + +qSumBE01 QN 8.S16 +qSumCD01 QN 9.S16 +dSumBE0 DN 16.S16 +dSumCD0 DN 18.S16 + +qAcc01 QN 0.S16 +qAcc23 QN 1.S16 +qAcc45 QN 2.S16 +qAcc67 QN 3.S16 + +dRes0 DN 0.S16 +dRes1 DN 2.S16 +dRes2 DN 4.S16 +dRes3 DN 6.S16 + +dAcc0 DN 0.U8 +dAcc1 DN 2.U8 +dAcc2 DN 4.U8 +dAcc3 DN 6.U8 + + +dTmp0 DN 20.S16 +dTmp1 DN 21.S16 +dTmp2 DN 22.S16 +dTmp3 DN 23.S16 + + + VLD1 dSrc0, [pSrc], srcStep ;// [a0 a1 a2 a3 .. ] + ADD Temp, pSrc, srcStep, LSL #2 + VLD1 dSrc1, [pSrc], srcStep ;// [b0 b1 b2 b3 .. ] + ;// One cycle stall + VLD1 dSrc5, [Temp], srcStep + ;// One cycle stall + VLD1 dSrc2, [pSrc], srcStep ;// [c0 c1 c2 c3 .. ] + VADDL qAcc01, dSrc0, dSrc5 ;// Acc = a+f + VLD1 dSrc3, [pSrc], srcStep + ;// One cycle stall + VLD1 dSrc6, [Temp], srcStep ;// TeRi + + VLD1 dSrc4, [pSrc], srcStep + VLD1 dSrc7, [Temp], srcStep ;// TeRi + VADDL qSumBE01, dSrc1, dSrc4 ;// b+e + VADDL qSumCD01, dSrc2, dSrc3 ;// c+d + VLD1 dSrc8, [Temp], srcStep ;// TeRi + VMLS dRes0, dSumBE0, dCoeff5 ;// Acc -= 20*(b+e) +; VMLA dRes0, dSumCD0, dCoeff20 ;// Acc += 20*(c+d) + VMUL dTmp0, dSumCD0, dCoeff20 ;// Acc += 20*(c+d) + +; VLD1 dSrc6, [Temp], srcStep + VADDL qSumBE01, dSrc2, dSrc5 ;// b+e + VADDL qSumCD01, dSrc3, dSrc4 ;// c+d + VADDL qAcc23, dSrc1, dSrc6 ;// Acc = a+f + VMLS dRes1, dSumBE0, dCoeff5 ;// Acc -= 20*(b+e) +; VMLA dRes1, dSumCD0, dCoeff20 ;// Acc += 20*(c+d) + VMUL dTmp1, dSumCD0, dCoeff20 ;// Acc += 20*(c+d) + +; VLD1 dSrc7, [Temp], srcStep + VADDL qSumBE01, dSrc3, dSrc6 ;// b+e + VADDL qSumCD01, dSrc4, dSrc5 ;// c+d + VADDL qAcc45, dSrc2, dSrc7 ;// Acc = a+f + VMLS dRes2, dSumBE0, dCoeff5 ;// Acc -= 20*(b+e) +; VMLA dRes2, dSumCD0, dCoeff20 ;// Acc += 20*(c+d) + VMUL dTmp2, dSumCD0, dCoeff20 ;// Acc += 20*(c+d) + +; VLD1 dSrc8, [Temp], srcStep ;// [i0 i1 i2 i3 .. ] + VADDL qSumBE01, dSrc4, dSrc7 ;// b+e + VADDL qAcc67, dSrc3, dSrc8 ;// Acc = a+f + VADDL qSumCD01, dSrc5, dSrc6 ;// c+d + VMLS dRes3, dSumBE0, dCoeff5 ;// Acc -= 20*(b+e) + VADD dRes0, dRes0, dTmp0 + VADD dRes1, dRes1, dTmp1 + VADD dRes2, dRes2, dTmp2 + VMLA dRes3, dSumCD0, dCoeff20 ;// Acc += 20*(c+d) +; VMUL dTmp3, dSumCD0, dCoeff20 ;// Acc += 20*(c+d) +; VADD dRes3, dRes3, dTmp3 + + VQRSHRUN dAcc0, qAcc01, #5 + VQRSHRUN dAcc1, qAcc23, #5 + VQRSHRUN dAcc2, qAcc45, #5 + VQRSHRUN dAcc3, qAcc67, #5 + + M_END + + ENDIF + + + + END + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_Interpolate_Chroma_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_Interpolate_Chroma_s.s new file mode 100755 index 0000000000000000000000000000000000000000..0f0ec7854364095deee457f653cc133318d35875 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_Interpolate_Chroma_s.s @@ -0,0 +1,318 @@ +;// +;// +;// File Name: armVCM4P10_Interpolate_Chroma_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS CortexA8 + + + IF CortexA8 + + M_TABLE armVCM4P10_WidthBranchTableMVIsNotZero + + DCD WidthIs2MVIsNotZero, WidthIs2MVIsNotZero + DCD WidthIs4MVIsNotZero, WidthIs4MVIsNotZero + DCD WidthIs8MVIsNotZero + + M_TABLE armVCM4P10_WidthBranchTableMVIsZero + + DCD WidthIs2MVIsZero, WidthIs2MVIsZero + DCD WidthIs4MVIsZero, WidthIs4MVIsZero + DCD WidthIs8MVIsZero + + +;// input registers + +pSrc RN 0 +iSrcStep RN 1 +pDst RN 2 +iDstStep RN 3 +iWidth RN 4 +iHeight RN 5 +dx RN 6 +dy RN 7 + +;// local variable registers +pc RN 15 +return RN 0 +EightMinusdx RN 8 +EightMinusdy RN 9 + +ACoeff RN 12 +BCoeff RN 9 +CCoeff RN 8 +DCoeff RN 6 + +pTable RN 11 + +Step1 RN 10 +SrcStepMinus1 RN 14 + +dACoeff DN D12.U8 +dBCoeff DN D13.U8 +dCCoeff DN D14.U8 +dDCoeff DN D15.U8 + +dRow0a DN D0.U8 +dRow0b DN D1.U8 +dRow1a DN D2.U8 +dRow1b DN D3.U8 + +qRow0a QN Q2.S16 +qRow0b QN Q3.S16 + +;//dIndex DN D16.U8 +qRow1a QN Q11.S16 +qRow1b QN Q12.S16 + +dRow2a DN D16.U8 +dRow2b DN D17.U8 +dRow3a DN D18.U8 +dRow3b DN D19.U8 + +qOutRow2 QN Q11.U16 +qOutRow3 QN Q12.U16 +dOutRow2 DN D20.U8 +dOutRow3 DN D21.U8 +dOutRow2U64 DN D20.U64 +dOutRow3U64 DN D21.U64 + +qOutRow0 QN Q2.U16 +qOutRow1 QN Q3.U16 +dOutRow0 DN D8.U8 +dOutRow1 DN D9.U8 + +dOutRow0U64 DN D8.U64 +dOutRow1U64 DN D9.U64 + +dOutRow0U32 DN D8.U32 +dOutRow1U32 DN D9.U32 + +dOutRow0U16 DN D8.U16 +dOutRow1U16 DN D9.U16 + + +dOut0U64 DN D0.U64 +dOut1U64 DN D1.U64 + +dOut00U32 DN D0.U32 +dOut01U32 DN D1.U32 +dOut10U32 DN D2.U32 +dOut11U32 DN D3.U32 + +dOut0U16 DN D0.U16 +dOut1U16 DN D1.U16 + +;//----------------------------------------------------------------------------------------------- +;// armVCM4P10_Interpolate_Chroma_asm starts +;//----------------------------------------------------------------------------------------------- + + ;// Write function header + M_START armVCM4P10_Interpolate_Chroma, r11, d15 + + ;// Define stack arguments + M_ARG Width, 4 + M_ARG Height, 4 + M_ARG Dx, 4 + M_ARG Dy, 4 + + ;// Load argument from the stack + ;// M_STALL ARM1136JS=4 + + M_LDRD dx, dy, Dx + M_LDRD iWidth, iHeight, Width + + ;// EightMinusdx = 8 - dx + ;// EightMinusdy = 8 - dy + + ;// ACoeff = EightMinusdx * EightMinusdy + ;// BCoeff = dx * EightMinusdy + ;// CCoeff = EightMinusdx * dy + ;// DCoeff = dx * dy + + RSB EightMinusdx, dx, #8 + RSB EightMinusdy, dy, #8 + CMN dx,dy + MOV Step1, #1 + LDREQ pTable, =armVCM4P10_WidthBranchTableMVIsZero + SUB SrcStepMinus1, iSrcStep, Step1 + LDRNE pTable, =armVCM4P10_WidthBranchTableMVIsNotZero + + VLD1 dRow0a, [pSrc], Step1 ;// 0a + + SMULBB ACoeff, EightMinusdx, EightMinusdy + SMULBB BCoeff, dx, EightMinusdy + VLD1 dRow0b, [pSrc], SrcStepMinus1 ;// 0b + SMULBB CCoeff, EightMinusdx, dy + SMULBB DCoeff, dx, dy + + VDUP dACoeff, ACoeff + VDUP dBCoeff, BCoeff + VDUP dCCoeff, CCoeff + VDUP dDCoeff, DCoeff + + LDR pc, [pTable, iWidth, LSL #1] ;// Branch to the case based on iWidth + +;// Pixel layout: +;// +;// x00 x01 x02 +;// x10 x11 x12 +;// x20 x21 x22 + +;// If fractionl mv is not (0, 0) +WidthIs8MVIsNotZero + + VLD1 dRow1a, [pSrc], Step1 ;// 1a + VMULL qRow0a, dRow0a, dACoeff + VLD1 dRow1b, [pSrc], SrcStepMinus1 ;// 1b + VMULL qRow0b, dRow1a, dACoeff + VLD1 dRow2a, [pSrc], Step1 ;// 2a + VMLAL qRow0a, dRow0b, dBCoeff + VLD1 dRow2b, [pSrc], SrcStepMinus1 ;// 2b + VMULL qRow1a, dRow2a, dACoeff + VMLAL qRow0b, dRow1b, dBCoeff + VLD1 dRow3a, [pSrc], Step1 ;// 3a + VMLAL qRow0a, dRow1a, dCCoeff + VMLAL qRow1a, dRow2b, dBCoeff + VMULL qRow1b, dRow3a, dACoeff + VLD1 dRow3b, [pSrc], SrcStepMinus1 ;// 3b + VMLAL qRow0b, dRow2a, dCCoeff + VLD1 dRow0a, [pSrc], Step1 ;// 0a + VMLAL qRow1b, dRow3b, dBCoeff + VMLAL qRow1a, dRow3a, dCCoeff + VMLAL qRow0a, dRow1b, dDCoeff + VLD1 dRow0b, [pSrc], SrcStepMinus1 ;// 0b + VMLAL qRow1b, dRow0a, dCCoeff + VMLAL qRow0b, dRow2b, dDCoeff + VMLAL qRow1a, dRow3b, dDCoeff + + + SUBS iHeight, iHeight, #4 + VMLAL qRow1b, dRow0b, dDCoeff + + VQRSHRN dOutRow0, qOutRow0, #6 + VQRSHRN dOutRow1, qOutRow1, #6 + VQRSHRN dOutRow2, qOutRow2, #6 + VST1 dOutRow0U64, [pDst], iDstStep + VQRSHRN dOutRow3, qOutRow3, #6 + + VST1 dOutRow1U64, [pDst], iDstStep + VST1 dOutRow2U64, [pDst], iDstStep + VST1 dOutRow3U64, [pDst], iDstStep + + + BGT WidthIs8MVIsNotZero + MOV return, #OMX_Sts_NoErr + M_EXIT + +WidthIs4MVIsNotZero + + VLD1 dRow1a, [pSrc], Step1 + VMULL qRow0a, dRow0a, dACoeff + VMULL qRow0b, dRow1a, dACoeff + VLD1 dRow1b, [pSrc], SrcStepMinus1 + VMLAL qRow0a, dRow0b, dBCoeff + VMLAL qRow0b, dRow1b, dBCoeff + VLD1 dRow0a, [pSrc], Step1 + VMLAL qRow0a, dRow1a, dCCoeff + VMLAL qRow0b, dRow0a, dCCoeff + VLD1 dRow0b, [pSrc], SrcStepMinus1 + SUBS iHeight, iHeight, #2 + VMLAL qRow0b, dRow0b, dDCoeff + VMLAL qRow0a, dRow1b, dDCoeff + + VQRSHRN dOutRow1, qOutRow1, #6 + VQRSHRN dOutRow0, qOutRow0, #6 + + VST1 dOutRow0U32[0], [pDst], iDstStep + VST1 dOutRow1U32[0], [pDst], iDstStep + + BGT WidthIs4MVIsNotZero + MOV return, #OMX_Sts_NoErr + M_EXIT + +WidthIs2MVIsNotZero + + VLD1 dRow1a, [pSrc], Step1 + VMULL qRow0a, dRow0a, dACoeff + VMULL qRow0b, dRow1a, dACoeff + VLD1 dRow1b, [pSrc], SrcStepMinus1 + VMLAL qRow0a, dRow0b, dBCoeff + VMLAL qRow0b, dRow1b, dBCoeff + VLD1 dRow0a, [pSrc], Step1 + VMLAL qRow0a, dRow1a, dCCoeff + VMLAL qRow0b, dRow0a, dCCoeff + VLD1 dRow0b, [pSrc], SrcStepMinus1 + SUBS iHeight, iHeight, #2 + VMLAL qRow0b, dRow0b, dDCoeff + VMLAL qRow0a, dRow1b, dDCoeff + + VQRSHRN dOutRow1, qOutRow1, #6 + VQRSHRN dOutRow0, qOutRow0, #6 + + VST1 dOutRow0U16[0], [pDst], iDstStep + VST1 dOutRow1U16[0], [pDst], iDstStep + + BGT WidthIs2MVIsNotZero + MOV return, #OMX_Sts_NoErr + M_EXIT + +;// If fractionl mv is (0, 0) +WidthIs8MVIsZero + SUB pSrc, pSrc, iSrcStep + +WidthIs8LoopMVIsZero + VLD1 dRow0a, [pSrc], iSrcStep + SUBS iHeight, iHeight, #2 + VLD1 dRow0b, [pSrc], iSrcStep + VST1 dOut0U64, [pDst], iDstStep + VST1 dOut1U64, [pDst], iDstStep + BGT WidthIs8LoopMVIsZero + + MOV return, #OMX_Sts_NoErr + M_EXIT + +WidthIs4MVIsZero + VLD1 dRow0b, [pSrc], iSrcStep + + SUBS iHeight, iHeight, #2 + + VST1 dOut00U32[0], [pDst], iDstStep + VLD1 dRow0a, [pSrc], iSrcStep + VST1 dOut01U32[0], [pDst], iDstStep + + BGT WidthIs4MVIsZero + MOV return, #OMX_Sts_NoErr + M_EXIT + +WidthIs2MVIsZero + VLD1 dRow0b, [pSrc], iSrcStep + SUBS iHeight, iHeight, #2 + + VST1 dOut0U16[0], [pDst], iDstStep + VLD1 dRow0a, [pSrc], iSrcStep + VST1 dOut1U16[0], [pDst], iDstStep + + BGT WidthIs2MVIsZero + MOV return, #OMX_Sts_NoErr + M_END + + ENDIF ;// CortexA8 + + END + +;//----------------------------------------------------------------------------------------------- +;// armVCM4P10_Interpolate_Chroma_asm ends +;//----------------------------------------------------------------------------------------------- + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_QuantTables_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_QuantTables_s.s new file mode 100755 index 0000000000000000000000000000000000000000..7e2642babd5b35a6465cd67569140d14783e4e62 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_QuantTables_s.s @@ -0,0 +1,74 @@ +;// +;// +;// File Name: armVCM4P10_QuantTables_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 12290 +;// Date: Wednesday, April 9, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// Description: +;// This file contains quantization tables +;// +;// + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + + EXPORT armVCM4P10_MFMatrixQPModTable + EXPORT armVCM4P10_QPDivIntraTable + EXPORT armVCM4P10_QPDivPlusOneTable + +;//-------------------------------------------------------------- +;// This table contains armVCM4P10_MFMatrix [iQP % 6][0] entires, +;// for values of iQP from 0 to 51 (inclusive). +;//-------------------------------------------------------------- + + M_TABLE armVCM4P10_MFMatrixQPModTable + DCW 13107, 11916, 10082, 9362, 8192, 7282 + DCW 13107, 11916, 10082, 9362, 8192, 7282 + DCW 13107, 11916, 10082, 9362, 8192, 7282 + DCW 13107, 11916, 10082, 9362, 8192, 7282 + DCW 13107, 11916, 10082, 9362, 8192, 7282 + DCW 13107, 11916, 10082, 9362, 8192, 7282 + DCW 13107, 11916, 10082, 9362, 8192, 7282 + DCW 13107, 11916, 10082, 9362, 8192, 7282 + DCW 13107, 11916, 10082, 9362, 8192, 7282 + +;//--------------------------------------------------------------- +;// This table contains ARM_M4P10_Q_OFFSET + 1 + (iQP / 6) values, +;// for values of iQP from 0 to 51 (inclusive). +;//--------------------------------------------------------------- + + M_TABLE armVCM4P10_QPDivPlusOneTable + DCB 16, 16, 16, 16, 16, 16 + DCB 17, 17, 17, 17, 17, 17 + DCB 18, 18, 18, 18, 18, 18 + DCB 19, 19, 19, 19, 19, 19 + DCB 20, 20, 20, 20, 20, 20 + DCB 21, 21, 21, 21, 21, 21 + DCB 22, 22, 22, 22, 22, 22 + DCB 23, 23, 23, 23, 23, 23 + DCB 24, 24, 24, 24, 24, 24 + +;//------------------------------------------------------------------ +;// This table contains (1 << QbitsPlusOne) / 3 Values (Intra case) , +;// for values of iQP from 0 to 51 (inclusive). +;//------------------------------------------------------------------ + + M_TABLE armVCM4P10_QPDivIntraTable, 2 + DCD 21845, 21845, 21845, 21845, 21845, 21845 + DCD 43690, 43690, 43690, 43690, 43690, 43690 + DCD 87381, 87381, 87381, 87381, 87381, 87381 + DCD 174762, 174762, 174762, 174762, 174762, 174762 + DCD 349525, 349525, 349525, 349525, 349525, 349525 + DCD 699050, 699050, 699050, 699050, 699050, 699050 + DCD 1398101, 1398101, 1398101, 1398101, 1398101, 1398101 + DCD 2796202, 2796202, 2796202, 2796202, 2796202, 2796202 + DCD 5592405, 5592405, 5592405, 5592405, 5592405, 5592405 + + + END + \ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_TransformResidual4x4_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_TransformResidual4x4_s.s new file mode 100755 index 0000000000000000000000000000000000000000..ee9c339652f53fafd90fb722b96f620f62af7f5d --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_TransformResidual4x4_s.s @@ -0,0 +1,186 @@ +;// +;// +;// File Name: armVCM4P10_TransformResidual4x4_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 12290 +;// Date: Wednesday, April 9, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// +;// Description: +;// Transform Residual 4x4 Coefficients +;// +;// + + +;// Include standard headers + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS CortexA8 + +;// Import symbols required from other files +;// (For example tables) + + + + +;// Set debugging level +;//DEBUG_ON SETL {TRUE} + + + +;// Guarding implementation by the processor name + + + + + + + + +;// Guarding implementation by the processor name + + IF CortexA8 + +;// ARM Registers + +;//Input Registers +pDst RN 0 +pSrc RN 1 + + +;// Neon Registers + +;// Packed Input pixels +dIn0 DN D0.S16 +dIn1 DN D1.S16 +dIn2 DN D2.S16 +dIn3 DN D3.S16 + +;// Intermediate calculations +dZero DN D4.S16 +de0 DN D5.S16 +de1 DN D6.S16 +de2 DN D7.S16 +de3 DN D8.S16 +dIn1RS DN D7.S16 +dIn3RS DN D8.S16 +df0 DN D0.S16 +df1 DN D1.S16 +df2 DN D2.S16 +df3 DN D3.S16 +qf01 QN Q0.32 +qf23 QN Q1.32 +dg0 DN D5.S16 +dg1 DN D6.S16 +dg2 DN D7.S16 +dg3 DN D8.S16 +df1RS DN D7.S16 +df3RS DN D8.S16 + +;// Output pixels +dh0 DN D0.S16 +dh1 DN D1.S16 +dh2 DN D2.S16 +dh3 DN D3.S16 + + + ;// Allocate stack memory required by the function + + + ;// Write function header + M_START armVCM4P10_TransformResidual4x4, ,d8 + + ;****************************************************************** + ;// The strategy used in implementing the transform is as follows:* + ;// Load the 4x4 block into 8 registers * + ;// Transpose the 4x4 matrix * + ;// Perform the row operations (on columns) using SIMD * + ;// Transpose the 4x4 result matrix * + ;// Perform the coloumn operations * + ;// Store the 4x4 block at one go * + ;****************************************************************** + + ;// Load all the 4x4 pixels in transposed form + + VLD4 {dIn0,dIn1,dIn2,dIn3},[pSrc] + + VMOV dZero,#0 ;// Used to right shift by 1 + + + ;**************************************** + ;// Row Operations (Performed on columns) + ;**************************************** + + + VADD de0,dIn0,dIn2 ;// e0 = d0 + d2 + VSUB de1,dIn0,dIn2 ;// e1 = d0 - d2 + VHADD dIn1RS,dIn1,dZero ;// (f1>>1) constZero is a register holding 0 + VHADD dIn3RS,dIn3,dZero + VSUB de2,dIn1RS,dIn3 ;// e2 = (d1>>1) - d3 + VADD de3,dIn1,dIn3RS ;// e3 = d1 + (d3>>1) + VADD df0,de0,de3 ;// f0 = e0 + e3 + VADD df1,de1,de2 ;// f1 = e1 + e2 + VSUB df2,de1,de2 ;// f2 = e1 - e2 + VSUB df3,de0,de3 ;// f3 = e0 - e3 + + + + ;***************************************************************** + ;// Transpose the resultant matrix + ;***************************************************************** + + VTRN df0,df1 + VTRN df2,df3 + VTRN qf01,qf23 + + + ;******************************* + ;// Coloumn Operations + ;******************************* + + + VADD dg0,df0,df2 ;// e0 = d0 + d2 + VSUB dg1,df0,df2 ;// e1 = d0 - d2 + VHADD df1RS,df1,dZero ;// (f1>>1) constZero is a register holding 0 + VHADD df3RS,df3,dZero + VSUB dg2,df1RS,df3 ;// e2 = (d1>>1) - d3 + VADD dg3,df1,df3RS ;// e3 = d1 + (d3>>1) + VADD dh0,dg0,dg3 ;// f0 = e0 + e3 + VADD dh1,dg1,dg2 ;// f1 = e1 + e2 + VSUB dh2,dg1,dg2 ;// f2 = e1 - e2 + VSUB dh3,dg0,dg3 ;// f3 = e0 - e3 + + + ;************************************************ + ;// Calculate final value (colOp[i][j] + 32)>>6 + ;************************************************ + + VRSHR dh0,#6 + VRSHR dh1,#6 + VRSHR dh2,#6 + VRSHR dh3,#6 + + + ;*************************** + ;// Store all the 4x4 pixels + ;*************************** + + VST1 {dh0,dh1,dh2,dh3},[pDst] + + + ;// Set return value + +End + + + ;// Write function tail + M_END + + ENDIF ;//CortexA8 + + END \ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_UnpackBlock4x4_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_UnpackBlock4x4_s.s new file mode 100755 index 0000000000000000000000000000000000000000..4c52e2221976ee129f6ef1edf9faa55bcdbd3916 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_UnpackBlock4x4_s.s @@ -0,0 +1,92 @@ +;// +;// +;// File Name: armVCM4P10_UnpackBlock4x4_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 12290 +;// Date: Wednesday, April 9, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + +;// Define the processor variants supported by this file + + M_VARIANTS ARM1136JS + + + IF ARM1136JS + +;//-------------------------------------- +;// Input Arguments and their scope/usage +;//-------------------------------------- +ppSrc RN 0 ;// Persistent variable +pDst RN 1 ;// Persistent variable + +;//-------------------------------- +;// Variables and their scope/usage +;//-------------------------------- +pSrc RN 2 ;// Persistent variables +Flag RN 3 +Value RN 4 +Value2 RN 5 +strOffset RN 6 +cstOffset RN 7 + + + M_START armVCM4P10_UnpackBlock4x4, r7 + + LDR pSrc, [ppSrc] ;// Load pSrc + MOV cstOffset, #31 ;// To be used in the loop, to compute offset + + ;//----------------------------------------------------------------------- + ; Firstly, fill all the coefficient values on the buffer by zero + ;//----------------------------------------------------------------------- + + MOV Value, #0 ;// Initialize the zero value + MOV Value2, #0 ;// Initialize the zero value + LDRB Flag, [pSrc], #1 ;// Preload before + + STRD Value, [pDst, #0] ;// pDst[0] = pDst[1] = pDst[2] = pDst[3] = 0 + STRD Value, [pDst, #8] ;// pDst[4] = pDst[5] = pDst[6] = pDst[7] = 0 + STRD Value, [pDst, #16] ;// pDst[8] = pDst[9] = pDst[10] = pDst[11] = 0 + STRD Value, [pDst, #24] ;// pDst[12] = pDst[13] = pDst[14] = pDst[15] = 0 + + ;//---------------------------------------------------------------------------- + ;// The loop below parses and unpacks the input stream. The C-model has + ;// a somewhat complicated logic for sign extension. But in the v6 version, + ;// that can be easily taken care by loading the data from stream as + ;// SIGNED byte/halfword. So, based on the first TST instruction, 8-bits or + ;// 16-bits are read. + ;// + ;// Next, to compute the offset, where the unpacked value needs to be stored, + ;// we modify the computation to perform [(Flag & 15) < 1] as [(Flag < 1) & 31] + ;// This results in a saving of one cycle. + ;//---------------------------------------------------------------------------- + +unpackLoop + TST Flag, #0x10 ;// Computing (Flag & 0x10) + LDRSBNE Value2,[pSrc,#1] ;// Load byte wise to avoid unaligned access + LDRBNE Value, [pSrc], #2 + AND strOffset, cstOffset, Flag, LSL #1 ;// strOffset = (Flag & 15) < 1; + LDRSBEQ Value, [pSrc], #1 ;// Value = (OMX_U8) *pSrc++ + ORRNE Value,Value,Value2, LSL #8 ;// Value = (OMX_U16) *pSrc++ + + TST Flag, #0x20 ;// Computing (Flag & 0x20) to check, if we're done + LDRBEQ Flag, [pSrc], #1 ;// Flag = (OMX_U8) *pSrc++, for next iteration + STRH Value, [pDst, strOffset] ;// Store at offset + BEQ unpackLoop ;// Branch to the loop beginning + + STR pSrc, [ppSrc] ;// Update the bitstream pointer + M_END + + ENDIF + + + + END + \ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c new file mode 100755 index 0000000000000000000000000000000000000000..40d4d5ef7f6a3c7cc1d8e52fa6f6bb043f221f49 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c @@ -0,0 +1,88 @@ +/* ---------------------------------------------------------------- + * + * + * File Name: omxVCM4P10_DeblockChroma_I.c + * OpenMAX DL: v1.0.2 + * Revision: 12290 + * Date: Wednesday, April 9, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * H.264 intra chroma deblock + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armVC.h" + +/** + * Function: omxVCM4P10_DeblockChroma_I + * + * Description: + * Performs deblocking filtering on all edges of the chroma macroblock (16x16). + * + * Remarks: + * + * Parameters: + * [in] pSrcDst pointer to the input macroblock. Must be 8-byte aligned. + * [in] srcdstStep Step of the arrays + * [in] pAlpha pointer to a 2x2 array of alpha thresholds, organized as follows: { external + * vertical edge, internal vertical edge, external + * horizontal edge, internal horizontal edge } + * [in] pBeta pointer to a 2x2 array of beta thresholds, organized as follows: { external + * vertical edge, internal vertical edge, external horizontal edge, + * internal horizontal edge } + * [in] pThresholds AArray of size 8x2 of Thresholds (TC0) (values for the left or + * above edge of each 4x2 or 2x4 block, arranged in vertical block order + * and then in horizontal block order) + * [in] pBS array of size 16x2 of BS parameters (arranged in scan block order for vertical edges and then horizontal edges); + * valid in the range [0,4] with the following restrictions: i) pBS[i]== 4 may occur only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^1]== 4. Must be 4-byte aligned. + * [out] pSrcDst pointer to filtered output macroblock + * + * Return Value: + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments + * - Either of the pointers in pSrcDst, pAlpha, pBeta, pTresholds, or pBS is NULL. + * - pSrcDst is not 8-byte aligned. + * - either pThresholds or pBS is not 4-byte aligned. + * - pBS is out of range, i.e., one of the following conditions is true: pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or (pBS[i]==4 && pBS[i^1]!=4) for 0<=i<=3. + * - srcdstStep is not a multiple of 8. + * + */ +OMXResult omxVCM4P10_DeblockChroma_I( + OMX_U8* pSrcDst, + OMX_S32 srcdstStep, + const OMX_U8* pAlpha, + const OMX_U8* pBeta, + const OMX_U8* pThresholds, + const OMX_U8 *pBS +) +{ + OMXResult errorCode; + + armRetArgErrIf(pSrcDst == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(armNot8ByteAligned(pSrcDst), OMX_Sts_BadArgErr); + armRetArgErrIf(srcdstStep & 7, OMX_Sts_BadArgErr); + armRetArgErrIf(pAlpha == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pBeta == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pThresholds == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(armNot4ByteAligned(pThresholds), OMX_Sts_BadArgErr); + armRetArgErrIf(pBS == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(armNot4ByteAligned(pBS), OMX_Sts_BadArgErr); + + errorCode = omxVCM4P10_FilterDeblockingChroma_VerEdge_I( + pSrcDst, srcdstStep, pAlpha, pBeta, pThresholds, pBS); + + armRetArgErrIf(errorCode != OMX_Sts_NoErr, errorCode) + + errorCode = omxVCM4P10_FilterDeblockingChroma_HorEdge_I( + pSrcDst, srcdstStep, pAlpha+2, pBeta+2, pThresholds+8, pBS+16); + + return errorCode; +} diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c new file mode 100755 index 0000000000000000000000000000000000000000..619365fcfd14e3cf0f4b2acf4a0c3c8a06682376 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c @@ -0,0 +1,91 @@ +/* ---------------------------------------------------------------- + * + * + * File Name: omxVCM4P10_DeblockLuma_I.c + * OpenMAX DL: v1.0.2 + * Revision: 12290 + * Date: Wednesday, April 9, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * H.264 luma deblock + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armVC.h" + + +/** + * Function: omxVCM4P10_DeblockLuma_I + * + * Description: + * This function performs deblock filtering the horizontal and vertical edges of a luma macroblock + *(16x16). + * + * Remarks: + * + * Parameters: + * [in] pSrcDst pointer to the input macroblock. Must be 8-byte aligned. + * [in] srcdstStep image width + * [in] pAlpha pointer to a 2x2 table of alpha thresholds, organized as follows: { external + * vertical edge, internal vertical edge, external horizontal + * edge, internal horizontal edge } + * [in] pBeta pointer to a 2x2 table of beta thresholds, organized as follows: { external + * vertical edge, internal vertical edge, external horizontal edge, + * internal horizontal edge } + * [in] pThresholds pointer to a 16x2 table of threshold (TC0), organized as follows: { values for + * the left or above edge of each 4x4 block, arranged in vertical block order + * and then in horizontal block order) + * [in] pBS pointer to a 16x2 table of BS parameters arranged in scan block order for vertical edges and then horizontal edges; + * valid in the range [0,4] with the following restrictions: i) pBS[i]== 4 may occur only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^1]== 4. Must be 4-byte aligned. + * [out] pSrcDst pointer to filtered output macroblock. + * + * Return Value: + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments + * - Either of the pointers in pSrcDst, pAlpha, pBeta, pTresholds or pBS is NULL. + * - pSrcDst is not 8-byte aligned. + * - srcdstStep is not a multiple of 8 + * - pBS is out of range, i.e., one of the following conditions is true: pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or (pBS[i]==4 && pBS[i^1]!=4) for 0<=i<=3. +. + * + */ + +OMXResult omxVCM4P10_DeblockLuma_I( + OMX_U8* pSrcDst, + OMX_S32 srcdstStep, + const OMX_U8* pAlpha, + const OMX_U8* pBeta, + const OMX_U8* pThresholds, + const OMX_U8 *pBS +) +{ + OMXResult errorCode; + + armRetArgErrIf(pSrcDst == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(armNot8ByteAligned(pSrcDst), OMX_Sts_BadArgErr); + armRetArgErrIf(srcdstStep & 7, OMX_Sts_BadArgErr); + armRetArgErrIf(pAlpha == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pBeta == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pThresholds == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(armNot4ByteAligned(pThresholds), OMX_Sts_BadArgErr); + armRetArgErrIf(pBS == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(armNot4ByteAligned(pBS), OMX_Sts_BadArgErr); + + errorCode = omxVCM4P10_FilterDeblockingLuma_VerEdge_I( + pSrcDst, srcdstStep, pAlpha, pBeta, pThresholds, pBS); + + armRetArgErrIf(errorCode != OMX_Sts_NoErr, errorCode) + + errorCode = omxVCM4P10_FilterDeblockingLuma_HorEdge_I( + pSrcDst, srcdstStep, pAlpha+2, pBeta+2, pThresholds+16, pBS+16); + + return errorCode; +} diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c new file mode 100755 index 0000000000000000000000000000000000000000..4e871bfcdd839d2733663a1ad24c9a7dbb37a9e3 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c @@ -0,0 +1,62 @@ +/* ---------------------------------------------------------------- + * + * + * File Name: omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c + * OpenMAX DL: v1.0.2 + * Revision: 12290 + * Date: Wednesday, April 9, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * H.264 decode coefficients module + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armVC.h" + +/** + * Function: omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC + * + * Description: + * Performs CAVLC decoding and inverse raster scan for 2x2 block of + * ChromaDCLevel. The decoded coefficients in packed position-coefficient + * buffer are stored in increasing raster scan order, namely position order. + * + * Remarks: + * + * Parameters: + * [in] ppBitStream Double pointer to current byte in bit stream + * buffer + * [in] pOffset Pointer to current bit position in the byte + * pointed to by *ppBitStream + * [out] ppBitStream *ppBitStream is updated after each block is decoded + * [out] pOffset *pOffset is updated after each block is decoded + * [out] pNumCoeff Pointer to the number of nonzero coefficients + * in this block + * [out] ppPosCoefbuf Double pointer to destination residual + * coefficient-position pair buffer + * + * Return Value: + * Standard omxError result. See enumeration for possible result codes. + * + */ + +OMXResult omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC ( + const OMX_U8** ppBitStream, + OMX_S32* pOffset, + OMX_U8* pNumCoeff, + OMX_U8** ppPosCoefbuf + ) + +{ + return armVCM4P10_DecodeCoeffsToPair(ppBitStream, pOffset, pNumCoeff, + ppPosCoefbuf, 17, 4); + +} diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c new file mode 100755 index 0000000000000000000000000000000000000000..b29e57663f295e36d166c14be632ed52c2e2de62 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c @@ -0,0 +1,68 @@ +/* ---------------------------------------------------------------- + * + * + * File Name: omxVCM4P10_DecodeCoeffsToPairCAVLC.c + * OpenMAX DL: v1.0.2 + * Revision: 12290 + * Date: Wednesday, April 9, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * H.264 decode coefficients module + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armVC.h" + +/** + * Function: omxVCM4P10_DecodeCoeffsToPairCAVLC + * + * Description: + * Performs CAVLC decoding and inverse zigzag scan for 4x4 block of + * Intra16x16DCLevel, Intra16x16ACLevel,LumaLevel, and ChromaACLevel. + * Inverse field scan is not supported. The decoded coefficients in packed + * position-coefficient buffer are stored in increasing zigzag order instead + * of position order. + * + * Remarks: + * + * Parameters: + * [in] ppBitStream Double pointer to current byte in bit stream buffer + * [in] pOffset Pointer to current bit position in the byte pointed + * to by *ppBitStream + * [in] sMaxNumCoeff Maximum number of non-zero coefficients in current + * block + * [in] sVLCSelect VLC table selector, obtained from number of non-zero + * AC coefficients of above and left 4x4 blocks. It is + * equivalent to the variable nC described in H.264 standard + * table 9-5, except its value can¡¯t be less than zero. + * [out] ppBitStream *ppBitStream is updated after each block is decoded + * [out] pOffset *pOffset is updated after each block is decoded + * [out] pNumCoeff Pointer to the number of nonzero coefficients in + * this block + * [out] ppPosCoefbuf Double pointer to destination residual + * coefficient-position pair buffer + * Return Value: + * Standard omxError result. See enumeration for possible result codes. + * + */ + +OMXResult omxVCM4P10_DecodeCoeffsToPairCAVLC( + const OMX_U8** ppBitStream, + OMX_S32* pOffset, + OMX_U8* pNumCoeff, + OMX_U8**ppPosCoefbuf, + OMX_INT sVLCSelect, + OMX_INT sMaxNumCoeff + ) +{ + return armVCM4P10_DecodeCoeffsToPair(ppBitStream, pOffset, pNumCoeff, + ppPosCoefbuf, sVLCSelect, sMaxNumCoeff); +} diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.s new file mode 100755 index 0000000000000000000000000000000000000000..485a48854bda162a7c362c06686ae797cf8b9ccc --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.s @@ -0,0 +1,396 @@ +;// +;// +;// File Name: omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 12290 +;// Date: Wednesday, April 9, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// +;// Description: +;// H.264 inverse quantize and transform module +;// +;// + + + +;// Include standard headers + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + +;// Import symbols required from other files +;// (For example tables) + + IMPORT armVCM4P10_UnpackBlock4x4 + IMPORT armVCM4P10_TransformResidual4x4 + IMPORT armVCM4P10_QPDivTable + IMPORT armVCM4P10_VMatrixU16 + IMPORT armVCM4P10_QPModuloTable + + M_VARIANTS CortexA8 + +;// Set debugging level +;//DEBUG_ON SETL {TRUE} + + +;// Static Function: armVCM4P10_DequantLumaAC4x4 + +;// Guarding implementation by the processor name + + + +;// Guarding implementation by the processor name + + + + + + +;// Function: omxVCM4P10_DequantTransformResidualFromPairAndAdd + +;// Guarding implementation by the processor name + + + +;// Function: omxVCM4P10_DequantTransformResidualFromPairAndAdd + +;// Guarding implementation by the processor name + + IF CortexA8 + + +;// ARM Registers + +;//Input Registers +ppSrc RN 0 +pPred RN 1 +pDC RN 2 +pDst RN 3 + + +;//Output Registers +result RN 0 + +;//Local Scratch Registers + +;//Registers used in armVCM4P10_DequantLumaAC4x4 +pQPdiv RN 10 +pQPmod RN 11 +pVRow RN 2 +QPmod RN 12 +shift RN 14 +index0 RN 1 +index1 RN 10 + +;//Registers used in DequantTransformResidualFromPairAndAdd +pDelta RN 4 +pDeltaTmp RN 6 +AC RN 5 ;//Load from stack +pPredTemp RN 7 +pDCTemp RN 8 +pDstTemp RN 9 +pDeltaArg1 RN 1 +pDeltaArg0 RN 0 +QP RN 1 ;//Load from stack +DCval RN 10 +predstep RN 1 +dstStep RN 10 +PredVal1 RN 3 +PredVal2 RN 5 + + + + +;// Neon Registers + +;// Registers used in armVCM4P10_DequantLumaAC4x4 + +dVmatrix DN D6.8 +dindexRow0 DN D7.32 +dindexRow1 DN D9.32 +dByteIndexRow0 DN D7.8 +dByteIndexRow1 DN D9.8 +dVRow0 DN D8.8 +dVRow1 DN D4.8 +dVRow0U16 DN D8.U16 +dVRow1U16 DN D4.U16 +dVRow2U16 DN D8.U16 +dVRow3U16 DN D4.U16 + +dShift DN D5.U16 +dSrcRow0 DN D0.I16 +dSrcRow1 DN D1.I16 +dSrcRow2 DN D2.I16 +dSrcRow3 DN D3.I16 +dDqntRow0 DN D0.I16 +dDqntRow1 DN D1.I16 +dDqntRow2 DN D2.I16 +dDqntRow3 DN D3.I16 + +;// Registers used in TransformResidual4x4 + +;// Packed Input pixels +dIn0 DN D0.S16 +dIn1 DN D1.S16 +dIn2 DN D2.S16 +dIn3 DN D3.S16 +qIn01 QN Q0.32 +qIn23 QN Q1.32 + +;// Intermediate calculations +dZero DN D4.S16 +de0 DN D5.S16 +de1 DN D6.S16 +de2 DN D7.S16 +de3 DN D8.S16 +dIn1RS DN D7.S16 +dIn3RS DN D8.S16 +df0 DN D0.S16 +df1 DN D1.S16 +df2 DN D2.S16 +df3 DN D3.S16 +qf01 QN Q0.32 +qf23 QN Q1.32 +dg0 DN D5.S16 +dg1 DN D6.S16 +dg2 DN D7.S16 +dg3 DN D8.S16 +df1RS DN D7.S16 +df3RS DN D8.S16 + +;// Output pixels +dh0 DN D0.S16 +dh1 DN D1.S16 +dh2 DN D2.S16 +dh3 DN D3.S16 + +;// Registers used in DequantTransformResidualFromPairAndAdd + +dDeltaRow0 DN D0.S16 +dDeltaRow1 DN D1.S16 +dDeltaRow2 DN D2.S16 +dDeltaRow3 DN D3.S16 +qDeltaRow01 QN Q0.S16 +qDeltaRow23 QN Q1.S16 + +dPredValRow01 DN D4.U8 +dPredValRow23 DN D5.U8 + +qSumRow01 QN Q3.S16 +qSumRow23 QN Q4.S16 +dDstRow01 DN D0.U8 +dDstRow23 DN D1.U8 +dDstRow0 DN D0.32[0] +dDstRow1 DN D0.32[1] +dDstRow2 DN D1.32[0] +dDstRow3 DN D1.32[1] + + + ;// Allocate stack memory required by the function + M_ALLOC8 pBuffer, 32 + + + ;// Write function header + M_START omxVCM4P10_DequantTransformResidualFromPairAndAdd,r11,d9 + + ;// Define stack arguments + M_ARG predStepOnStack, 4 + M_ARG dstStepOnStack,4 + M_ARG QPOnStack, 4 + M_ARG ACOnStack,4 + + + M_ADR pDelta,pBuffer + M_LDR AC,ACOnStack + + + ;// Save registers r1,r2,r3 before function call + MOV pPredTemp,pPred + MOV pDCTemp,pDC + MOV pDstTemp,pDst + + CMP AC,#0 + BEQ DCcase + MOV pDeltaArg1,pDelta ;// Set up r1 for armVCM4P10_UnpackBlock4x4 + + BL armVCM4P10_UnpackBlock4x4 + + ;//-------------------------------------------------------- + ;// armVCM4P10_DequantLumaAC4x4 : static function inlined + ;//-------------------------------------------------------- + + ;//BL armVCM4P10_DequantLumaAC4x4 + M_LDR QP,QPOnStack ;// Set up r1 for armVCM4P10_DequantLumaAC4x4 + + LDR pQPmod,=armVCM4P10_QPModuloTable + LDR pQPdiv,=armVCM4P10_QPDivTable + LDR pVRow,=armVCM4P10_VMatrixU16 + + + LDRSB QPmod,[pQPmod,QP] ;// (QP%6) * 6 + LDRSB shift,[pQPdiv,QP] ;// Shift = QP / 6 + + LDR index1,=0x03020504 + LDR index0,=0x05040100 ;// Indexes into dVmatrix + ADD pVRow,pVRow,QPmod + VDUP dindexRow0,index0 + VDUP dindexRow1,index1 + VDUP dShift,shift + + ;// Load all 4x4 pVRow[] values + VLD1 dVmatrix,[pVRow] ;// dVmatrix = [0d|0c|0b|0a] + + + VTBL dVRow0,dVmatrix,dByteIndexRow0 ;// row0 = row2 = [pVRow[2] | pVRow[0] | pVRow[2] | pVRow[0]] + VTBL dVRow1,dVmatrix,dByteIndexRow1 ;// row1 = row3 = [pVRow[1] | pVRow[2] | pVRow[1] | pVRow[2]] + CMP pDCTemp,#0 + ;// Load all the 4x4 'src' values + VLD1 { dSrcRow0,dSrcRow1,dSrcRow2,dSrcRow3 },[pDelta] + + VSHL dVRow0U16,dVRow0U16,dShift + VSHL dVRow1U16,dVRow1U16,dShift + LDRSHNE DCval,[pDCTemp] + + + ;// Multiply src[] with pVRow[] + VMUL dDqntRow0,dSrcRow0,dVRow0U16 + VMUL dDqntRow1,dSrcRow1,dVRow1U16 + VMUL dDqntRow2,dSrcRow2,dVRow2U16 + VMUL dDqntRow3,dSrcRow3,dVRow3U16 + + + + ;//------------------------------------------------------------- + ;// TransformResidual4x4 : Inlined to avoid Load/Stores + ;//------------------------------------------------------------- + + + ;//BL armVCM4P10_TransformResidual4x4 + ;//STRHNE DCval,[pDelta] + VMOVNE dIn0[0],DCval + + + + ;//***************************************************************** + ;// Transpose the input pixels : perform Row ops as Col ops + ;//***************************************************************** + + VTRN dIn0,dIn1 + VTRN dIn2,dIn3 + VTRN qIn01,qIn23 + + + VMOV dZero,#0 ;// Used to right shift by 1 + + + ;//**************************************** + ;// Row Operations (Performed on columns) + ;//**************************************** + + + VADD de0,dIn0,dIn2 ;// e0 = d0 + d2 + VSUB de1,dIn0,dIn2 ;// e1 = d0 - d2 + VHADD dIn1RS,dIn1,dZero ;// (f1>>1) constZero is a register holding 0 + VHADD dIn3RS,dIn3,dZero + VSUB de2,dIn1RS,dIn3 ;// e2 = (d1>>1) - d3 + VADD de3,dIn1,dIn3RS ;// e3 = d1 + (d3>>1) + VADD df0,de0,de3 ;// f0 = e0 + e3 + VADD df1,de1,de2 ;// f1 = e1 + e2 + VSUB df2,de1,de2 ;// f2 = e1 - e2 + VSUB df3,de0,de3 ;// f3 = e0 - e3 + + + + ;//***************************************************************** + ;// Transpose the resultant matrix + ;//***************************************************************** + + VTRN df0,df1 + VTRN df2,df3 + VTRN qf01,qf23 + + + ;//******************************* + ;// Coloumn Operations + ;//******************************* + + + VADD dg0,df0,df2 ;// e0 = d0 + d2 + VSUB dg1,df0,df2 ;// e1 = d0 - d2 + VHADD df1RS,df1,dZero ;// (f1>>1) constZero is a register holding 0 + VHADD df3RS,df3,dZero + VSUB dg2,df1RS,df3 ;// e2 = (d1>>1) - d3 + VADD dg3,df1,df3RS ;// e3 = d1 + (d3>>1) + VADD dh0,dg0,dg3 ;// f0 = e0 + e3 + VADD dh1,dg1,dg2 ;// f1 = e1 + e2 + VSUB dh2,dg1,dg2 ;// f2 = e1 - e2 + VSUB dh3,dg0,dg3 ;// f3 = e0 - e3 + + + ;//************************************************ + ;// Calculate final value (colOp[i][j] + 32)>>6 + ;//************************************************ + + VRSHR dh0,#6 + VRSHR dh1,#6 + VRSHR dh2,#6 + VRSHR dh3,#6 + + + B OutDCcase + + +DCcase + ;// Calculate the Transformed DCvalue : (DCval+32)>>6 + LDRSH DCval,[pDCTemp] + ADD DCval,DCval,#32 + ASR DCval,DCval,#6 + + VDUP dDeltaRow0, DCval ;// pDelta[0] = pDelta[1] = pDelta[2] = pDelta[3] = DCval + VDUP dDeltaRow1, DCval ;// pDelta[4] = pDelta[5] = pDelta[6] = pDelta[7] = DCval + VDUP dDeltaRow2, DCval ;// pDelta[8] = pDelta[9] = pDelta[10] = pDelta[11] = DCval + VDUP dDeltaRow3, DCval + + +OutDCcase + M_LDR predstep,predStepOnStack + M_LDR dstStep,dstStepOnStack + + LDR PredVal1,[pPredTemp],predstep + LDR PredVal2,[pPredTemp],predstep + VMOV dPredValRow01,PredVal1,PredVal2 + + LDR PredVal1,[pPredTemp],predstep + LDR PredVal2,[pPredTemp] + VMOV dPredValRow23,PredVal1,PredVal2 + + + VADDW qSumRow01,qDeltaRow01,dPredValRow01 + VADDW qSumRow23,qDeltaRow23,dPredValRow23 + VQMOVUN dDstRow01,qSumRow01 + VQMOVUN dDstRow23,qSumRow23 + + + VST1 dDstRow0,[pDstTemp],dstStep + VST1 dDstRow1,[pDstTemp],dstStep + VST1 dDstRow2,[pDstTemp],dstStep + VST1 dDstRow3,[pDstTemp] + + ;// Set return value + MOV result,#OMX_Sts_NoErr + +End + + + ;// Write function tail + + M_END + + ENDIF ;//CORTEXA8 + + + + END diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s new file mode 100644 index 0000000000000000000000000000000000000000..4606197919bb4a8d94de7ac696ff1675ee6ded92 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s @@ -0,0 +1,202 @@ +;// +;// +;// File Name: omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 12290 +;// Date: Wednesday, April 9, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS CortexA8 + + IF CortexA8 + + IMPORT armVCM4P10_DeblockingChromabSGE4_unsafe + IMPORT armVCM4P10_DeblockingChromabSLT4_unsafe + +LOOP_COUNT EQU 0x40000000 +MASK_3 EQU 0x03030303 +MASK_4 EQU 0x04040404 + +;// Function arguments + +pSrcDst RN 0 +srcdstStep RN 1 +pAlpha RN 2 +pBeta RN 3 + +pThresholds RN 5 +pBS RN 4 +bS3210 RN 6 + +;// Loop + +XY RN 7 + +;// Pixels +dP_0 DN D4.U8 +dP_1 DN D5.U8 +dP_2 DN D6.U8 +dQ_0 DN D8.U8 +dQ_1 DN D9.U8 +dQ_2 DN D10.U8 + +;// Filtering Decision +dAlpha DN D0.U8 +dBeta DN D2.U8 + +dFilt DN D16.U8 +dAqflg DN D12.U8 +dApflg DN D17.U8 + +dAp0q0 DN D13.U8 +dAp1p0 DN D12.U8 +dAq1q0 DN D18.U8 +dAp2p0 DN D19.U8 +dAq2q0 DN D17.U8 + +qBS3210 QN Q13.U16 +dBS3210 DN D26 +dMask_bs DN D27 +dFilt_bs DN D26.U16 + +;// bSLT4 +dMask_0 DN D14.U8 +dMask_1 DN D15.U8 +dMask_4 DN D1.U16 + +Mask_4 RN 8 +Mask_3 RN 9 + +dTemp DN D19.U8 + +;// Result +dP_0t DN D13.U8 +dQ_0t DN D31.U8 + +dP_0n DN D29.U8 +dQ_0n DN D24.U8 + + + ;// Function header + M_START omxVCM4P10_FilterDeblockingChroma_HorEdge_I, r9, d15 + + ;//Arguments on the stack + M_ARG ppThresholds, 4 + M_ARG ppBS, 4 + + ;// d0-dAlpha_0 + ;// d2-dBeta_0 + + ;load alpha1,beta1 somewhere to avoid more loads + VLD1 {dAlpha[]}, [pAlpha]! + SUB pSrcDst, pSrcDst, srcdstStep, LSL #1 ;? + SUB pSrcDst, pSrcDst, srcdstStep + VLD1 {dBeta[]}, [pBeta]! + + M_LDR pBS, ppBS + M_LDR pThresholds, ppThresholds + + LDR Mask_3, =MASK_3 + LDR Mask_4, =MASK_4 + + VMOV dMask_0, #0 + VMOV dMask_1, #1 + VMOV dMask_4, #4 + + LDR XY, =LOOP_COUNT + + ;// p0-p3 - d4-d7 + ;// q0-q3 - d8-d11 +LoopY + LDR bS3210, [pBS], #8 + + VLD1 dP_2, [pSrcDst], srcdstStep + ;1 + VLD1 dP_1, [pSrcDst], srcdstStep + CMP bS3210, #0 + VLD1 dP_0, [pSrcDst], srcdstStep + ;1 + VLD1 dQ_0, [pSrcDst], srcdstStep + VABD dAp2p0, dP_2, dP_0 + VLD1 dQ_1, [pSrcDst], srcdstStep + VABD dAp0q0, dP_0, dQ_0 + VLD1 dQ_2, [pSrcDst], srcdstStep + BEQ NoFilterBS0 + + VABD dAp1p0, dP_1, dP_0 + VABD dAq1q0, dQ_1, dQ_0 + + VCGT dFilt, dAlpha, dAp0q0 + VMOV.U32 dBS3210[0], bS3210 + VMAX dAp1p0, dAq1q0, dAp1p0 + VMOVL qBS3210, dBS3210.U8 + VABD dAq2q0, dQ_2, dQ_0 + VCGT dMask_bs.S16, dBS3210.S16, #0 + + VCGT dAp1p0, dBeta, dAp1p0 + VCGT dAp2p0, dBeta, dAp2p0 + + VAND dFilt, dMask_bs.U8 + + TST bS3210, Mask_3 + + VCGT dAq2q0, dBeta, dAq2q0 + VAND dFilt, dFilt, dAp1p0 + + VAND dAqflg, dFilt, dAq2q0 + VAND dApflg, dFilt, dAp2p0 + + ;// bS < 4 Filtering + BLNE armVCM4P10_DeblockingChromabSLT4_unsafe + + TST bS3210, Mask_4 + + SUB pSrcDst, pSrcDst, srcdstStep, LSL #2 + VTST dFilt_bs, dFilt_bs, dMask_4 + + ;// bS == 4 Filtering + BLNE armVCM4P10_DeblockingChromabSGE4_unsafe + + VBIT dP_0n, dP_0t, dFilt_bs + VBIT dQ_0n, dQ_0t, dFilt_bs + + VBIF dP_0n, dP_0, dFilt + VBIF dQ_0n, dQ_0, dFilt + + ;// Result Storage + VST1 dP_0n, [pSrcDst], srcdstStep + ADDS XY, XY, XY + VST1 dQ_0n, [pSrcDst], srcdstStep + + BNE LoopY + + MOV r0, #OMX_Sts_NoErr + + M_EXIT + +NoFilterBS0 + + VLD1 {dAlpha[]}, [pAlpha] + SUB pSrcDst, pSrcDst, srcdstStep, LSL #1 + ADDS XY, XY, XY + VLD1 {dBeta[]}, [pBeta] + ADD pThresholds, pThresholds, #4 + BNE LoopY + + MOV r0, #OMX_Sts_NoErr + M_END + + ENDIF + + + END + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s new file mode 100644 index 0000000000000000000000000000000000000000..18e6c1d25e01d26172c068a7414160ed6eff7e14 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s @@ -0,0 +1,282 @@ +;// +;// +;// File Name: omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 12290 +;// Date: Wednesday, April 9, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS CortexA8 + + IF CortexA8 + + IMPORT armVCM4P10_DeblockingChromabSGE4_unsafe + IMPORT armVCM4P10_DeblockingChromabSLT4_unsafe + +LOOP_COUNT EQU 0x40000000 +MASK_3 EQU 0x03030303 +MASK_4 EQU 0x04040404 + +;// Function arguments + +pSrcDst RN 0 +srcdstStep RN 1 +pAlpha RN 2 +pBeta RN 3 + +pThresholds RN 5 +pBS RN 4 +bS3210 RN 6 +pSrcDst_P RN 10 +pSrcDst_Q RN 12 + +pTmp RN 10 +pTmp2 RN 12 +step RN 14 + +;// Loop + +XY RN 7 + +;// Rows input +dRow0 DN D7.U8 +dRow1 DN D8.U8 +dRow2 DN D5.U8 +dRow3 DN D10.U8 +dRow4 DN D6.U8 +dRow5 DN D9.U8 +dRow6 DN D4.U8 +dRow7 DN D11.U8 + + +;// Pixels +dP_0 DN D4.U8 +dP_1 DN D5.U8 +dP_2 DN D6.U8 +dQ_0 DN D8.U8 +dQ_1 DN D9.U8 +dQ_2 DN D10.U8 + +;// Filtering Decision +dAlpha DN D0.U8 +dBeta DN D2.U8 + +dFilt DN D16.U8 +dAqflg DN D12.U8 +dApflg DN D17.U8 + +dAp0q0 DN D13.U8 +dAp1p0 DN D12.U8 +dAq1q0 DN D18.U8 +dAp2p0 DN D19.U8 +dAq2q0 DN D17.U8 + +qBS3210 QN Q13.U16 +dBS3210 DN D26 +dMask_bs DN D27 +dFilt_bs DN D26.U16 + +;// bSLT4 +dMask_0 DN D14.U8 +dMask_1 DN D15.U8 +dMask_4 DN D1.U16 + +Mask_4 RN 8 +Mask_3 RN 9 + +dTemp DN D19.U8 + +;// Result +dP_0t DN D13.U8 +dQ_0t DN D31.U8 + +dP_0n DN D29.U8 +dQ_0n DN D24.U8 + + + ;// Function header + M_START omxVCM4P10_FilterDeblockingChroma_VerEdge_I, r12, d15 + + ;//Arguments on the stack + M_ARG ppThresholds, 4 + M_ARG ppBS, 4 + + ;// d0-dAlpha_0 + ;// d2-dBeta_0 + + ;load alpha1,beta1 somewhere to avoid more loads + VLD1 {dAlpha[]}, [pAlpha]! + SUB pSrcDst, pSrcDst, #4 + VLD1 {dBeta[]}, [pBeta]! + + M_LDR pBS, ppBS + M_LDR pThresholds, ppThresholds + + LDR Mask_4, =MASK_4 + LDR Mask_3, =MASK_3 + + ;dMask_0-14 + ;dMask_1-15 + ;dMask_4-19 + + VMOV dMask_0, #0 + VMOV dMask_1, #1 + VMOV dMask_4, #4 + + LDR XY, =LOOP_COUNT + + ;// p0-p3 - d4-d7 + ;// q0-q3 - d8-d11 + + +LoopY + LDR bS3210, [pBS], #8 + ADD pTmp, pSrcDst, srcdstStep + ADD step, srcdstStep, srcdstStep + + ;1 + VLD1 dRow0, [pSrcDst], step + ;1 + VLD1 dRow1, [pTmp], step + VLD1 dRow2, [pSrcDst], step + VLD1 dRow3, [pTmp], step + VLD1 dRow4, [pSrcDst], step + VLD1 dRow5, [pTmp], step + VLD1 dRow6, [pSrcDst], step + VLD1 dRow7, [pTmp], step + + + ;// dRow0 = [q3r0 q2r0 q1r0 q0r0 p0r0 p1r0 p2r0 p3r0] + ;// dRow1 = [q3r1 q2r1 q1r1 q0r1 p0r1 p1r1 p2r1 p3r1] + ;// dRow2 = [q3r2 q2r2 q1r2 q0r2 p0r2 p1r2 p2r2 p3r2] + ;// dRow3 = [q3r3 q2r3 q1r3 q0r3 p0r3 p1r3 p2r3 p3r3] + ;// dRow4 = [q3r4 q2r4 q1r4 q0r4 p0r4 p1r4 p2r4 p3r4] + ;// dRow5 = [q3r5 q2r5 q1r5 q0r5 p0r5 p1r5 p2r5 p3r5] + ;// dRow6 = [q3r6 q2r6 q1r6 q0r6 p0r6 p1r6 p2r6 p3r6] + ;// dRow7 = [q3r7 q2r7 q1r7 q0r7 p0r7 p1r7 p2r7 p3r7] + + ;// 8x8 Transpose + VZIP.8 dRow0, dRow1 + VZIP.8 dRow2, dRow3 + VZIP.8 dRow4, dRow5 + VZIP.8 dRow6, dRow7 + + VZIP.16 dRow0, dRow2 + VZIP.16 dRow1, dRow3 + VZIP.16 dRow4, dRow6 + VZIP.16 dRow5, dRow7 + + VZIP.32 dRow0, dRow4 + VZIP.32 dRow2, dRow6 + VZIP.32 dRow3, dRow7 + VZIP.32 dRow1, dRow5 + + + ;Realign the pointers + + CMP bS3210, #0 + VABD dAp2p0, dP_2, dP_0 + VABD dAp0q0, dP_0, dQ_0 + BEQ NoFilterBS0 + + VABD dAp1p0, dP_1, dP_0 + VABD dAq1q0, dQ_1, dQ_0 + + VMOV.U32 dBS3210[0], bS3210 + VCGT dFilt, dAlpha, dAp0q0 + VMAX dAp1p0, dAq1q0, dAp1p0 + VMOVL qBS3210, dBS3210.U8 + VABD dAq2q0, dQ_2, dQ_0 + VCGT dMask_bs.S16, dBS3210.S16, #0 + + VCGT dAp1p0, dBeta, dAp1p0 + VCGT dAp2p0, dBeta, dAp2p0 + VAND dFilt, dMask_bs.U8 + + TST bS3210, Mask_3 + + VCGT dAq2q0, dBeta, dAq2q0 + VAND dFilt, dFilt, dAp1p0 + + VAND dAqflg, dFilt, dAq2q0 + VAND dApflg, dFilt, dAp2p0 + + ;// bS < 4 Filtering + BLNE armVCM4P10_DeblockingChromabSLT4_unsafe + + TST bS3210, Mask_4 + + SUB pSrcDst, pSrcDst, srcdstStep, LSL #3 + VTST dFilt_bs, dFilt_bs, dMask_4 + + ;// bS == 4 Filtering + BLNE armVCM4P10_DeblockingChromabSGE4_unsafe + + VBIT dP_0n, dP_0t, dFilt_bs + VBIT dQ_0n, dQ_0t, dFilt_bs + + ;// Result Storage + ADD pSrcDst_P, pSrcDst, #3 + VBIF dP_0n, dP_0, dFilt + + ADD pTmp2, pSrcDst_P, srcdstStep + ADD step, srcdstStep, srcdstStep + VBIF dQ_0n, dQ_0, dFilt + + ADDS XY, XY, XY + + VST1 {dP_0n[0]}, [pSrcDst_P], step + VST1 {dP_0n[1]}, [pTmp2], step + VST1 {dP_0n[2]}, [pSrcDst_P], step + VST1 {dP_0n[3]}, [pTmp2], step + VST1 {dP_0n[4]}, [pSrcDst_P], step + VST1 {dP_0n[5]}, [pTmp2], step + VST1 {dP_0n[6]}, [pSrcDst_P], step + VST1 {dP_0n[7]}, [pTmp2], step + + ADD pSrcDst_Q, pSrcDst, #4 + ADD pTmp, pSrcDst_Q, srcdstStep + + VST1 {dQ_0n[0]}, [pSrcDst_Q], step + VST1 {dQ_0n[1]}, [pTmp], step + VST1 {dQ_0n[2]}, [pSrcDst_Q], step + VST1 {dQ_0n[3]}, [pTmp], step + VST1 {dQ_0n[4]}, [pSrcDst_Q], step + VST1 {dQ_0n[5]}, [pTmp], step + VST1 {dQ_0n[6]}, [pSrcDst_Q], step + VST1 {dQ_0n[7]}, [pTmp], step + + ADD pSrcDst, pSrcDst, #4 + + BNE LoopY + + MOV r0, #OMX_Sts_NoErr + + M_EXIT + +NoFilterBS0 + VLD1 {dAlpha[]}, [pAlpha] + ADD pSrcDst, pSrcDst, #4 + SUB pSrcDst, pSrcDst, srcdstStep, LSL #3 + ADDS XY, XY, XY + VLD1 {dBeta[]}, [pBeta] + ADD pThresholds, pThresholds, #4 + BNE LoopY + + MOV r0, #OMX_Sts_NoErr + + M_END + + ENDIF + + + END + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s new file mode 100755 index 0000000000000000000000000000000000000000..0c3f4f28810e9b4e65368f83dfb93deaeb679d1b --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s @@ -0,0 +1,288 @@ +;// +;// +;// File Name: omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 12290 +;// Date: Wednesday, April 9, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS CortexA8 + + IMPORT armVCM4P10_DeblockingLumabSLT4_unsafe + IMPORT armVCM4P10_DeblockingLumabSGE4_unsafe + + IF CortexA8 + +LOOP_COUNT EQU 0x55000000 + + +;// Function arguments + +pSrcDst RN 0 +srcdstStep RN 1 +pAlpha RN 2 +pBeta RN 3 + +pThresholds RN 5 +pBS RN 4 +bS10 RN 12 + +pAlpha_0 RN 2 +pBeta_0 RN 3 + +pAlpha_1 RN 7 +pBeta_1 RN 8 + + + +;// Loop + +XY RN 9 + +pTmp RN 6 +step RN 10 + +;// Pixels +dP_0 DN D4.U8 +dP_1 DN D5.U8 +dP_2 DN D6.U8 +dP_3 DN D7.U8 +dQ_0 DN D8.U8 +dQ_1 DN D9.U8 +dQ_2 DN D10.U8 +dQ_3 DN D11.U8 + + +;// Filtering Decision +dAlpha DN D0.U8 +dBeta DN D2.U8 + +dFilt DN D16.U8 +dAqflg DN D12.U8 +dApflg DN D17.U8 + +dAp0q0 DN D13.U8 +dAp1p0 DN D12.U8 +dAq1q0 DN D18.U8 +dAp2p0 DN D19.U8 +dAq2q0 DN D17.U8 + +;// bSLT4 +dTC0 DN D18.U8 +dTC1 DN D19.U8 +dTC01 DN D18.U8 + +dTCs DN D31.S8 +dTC DN D31.U8 + +dMask_0 DN D14.U8 +dMask_1 DN D15.U8 + +Mask_0 RN 11 + +dTemp DN D19.U8 + +;// Computing P0,Q0 +qDq0p0 QN Q10.S16 +qDp1q1 QN Q11.S16 +qDelta QN Q10.S16 ; reuse qDq0p0 +dDelta DN D20.S8 + + +;// Computing P1,Q1 +dRp0q0 DN D24.U8 + +dMaxP DN D23.U8 +dMinP DN D22.U8 + +dMaxQ DN D19.U8 +dMinQ DN D21.U8 + +dDeltaP DN D26.U8 +dDeltaQ DN D27.U8 + +qP_0n QN Q14.S16 +qQ_0n QN Q12.S16 + +dQ_0n DN D24.U8 +dQ_1n DN D25.U8 +dP_0n DN D29.U8 +dP_1n DN D30.U8 + +;// bSGE4 + +qSp0q0 QN Q10.U16 + +qSp2q1 QN Q11.U16 +qSp0q0p1 QN Q12.U16 +qSp3p2 QN Q13.U16 +dHSp0q1 DN D28.U8 + +qSq2p1 QN Q11.U16 +qSp0q0q1 QN Q12.U16 +qSq3q2 QN Q13.U16 ;!! +dHSq0p1 DN D28.U8 ;!! + +qTemp1 QN Q11.U16 ;!!;qSp2q1 +qTemp2 QN Q12.U16 ;!!;qSp0q0p1 + +dP_0t DN D28.U8 ;!!;dHSp0q1 +dQ_0t DN D22.U8 ;!!;Temp1 + +dP_0n DN D29.U8 +dP_1n DN D30.U8 +dP_2n DN D31.U8 + +dQ_0n DN D24.U8 ;!!;Temp2 +dQ_1n DN D25.U8 ;!!;Temp2 +dQ_2n DN D28.U8 ;!!;dQ_0t + + + ;// Function header + M_START omxVCM4P10_FilterDeblockingLuma_HorEdge_I, r11, d15 + + ;//Arguments on the stack + M_ARG ppThresholds, 4 + M_ARG ppBS, 4 + + ;// d0-dAlpha_0 + ;// d2-dBeta_0 + + ADD pAlpha_1, pAlpha_0, #1 + ADD pBeta_1, pBeta_0, #1 + + VLD1 {dAlpha[]}, [pAlpha_0] + SUB pSrcDst, pSrcDst, srcdstStep, LSL #2 + VLD1 {dBeta[]}, [pBeta_0] + + M_LDR pBS, ppBS + M_LDR pThresholds, ppThresholds + + MOV Mask_0,#0 + + ;dMask_0-14 + ;dMask_1-15 + + VMOV dMask_0, #0 + VMOV dMask_1, #1 + + ADD step, srcdstStep, srcdstStep + + LDR XY,=LOOP_COUNT + + ;// p0-p3 - d4-d7 + ;// q0-q3 - d8-d11 +LoopY +LoopX + LDRH bS10, [pBS], #2 + ADD pTmp, pSrcDst, srcdstStep + CMP bS10, #0 + BEQ NoFilterBS0 + + VLD1 dP_3, [pSrcDst], step + VLD1 dP_2, [pTmp], step + VLD1 dP_1, [pSrcDst], step + VLD1 dP_0, [pTmp], step + VLD1 dQ_0, [pSrcDst], step + VABD dAp1p0, dP_0, dP_1 + VLD1 dQ_1, [pTmp] + VABD dAp0q0, dQ_0, dP_0 + VLD1 dQ_2, [pSrcDst], srcdstStep + + VABD dAq1q0, dQ_1, dQ_0 + VABD dAp2p0, dP_2, dP_0 + VCGT dFilt, dAlpha, dAp0q0 + + TST bS10, #0xff + VMAX dAp1p0, dAq1q0, dAp1p0 + VABD dAq2q0, dQ_2, dQ_0 + + VMOVEQ.U32 dFilt[0], Mask_0 + TST bS10, #0xff00 + + VCGT dAp2p0, dBeta, dAp2p0 + VCGT dAp1p0, dBeta, dAp1p0 + + VMOVEQ.U32 dFilt[1], Mask_0 + + VCGT dAq2q0, dBeta, dAq2q0 + VLD1 dQ_3, [pSrcDst] + VAND dFilt, dFilt, dAp1p0 + TST bS10, #4 + + VAND dAqflg, dFilt, dAq2q0 + VAND dApflg, dFilt, dAp2p0 + + BNE bSGE4 +bSLT4 + ;// bS < 4 Filtering + SUB pSrcDst, pSrcDst, srcdstStep, LSL #2 + SUB pSrcDst, pSrcDst, srcdstStep + + BL armVCM4P10_DeblockingLumabSLT4_unsafe + + ;// Result Storage + VST1 dP_1n, [pSrcDst], srcdstStep + VST1 dP_0n, [pSrcDst], srcdstStep + SUB pTmp, pSrcDst, srcdstStep, LSL #2 + VST1 dQ_0n, [pSrcDst], srcdstStep + ADDS XY, XY, XY + VST1 dQ_1n, [pSrcDst] + ADD pSrcDst, pTmp, #8 + + BCC LoopX + B ExitLoopY + +NoFilterBS0 + ADD pSrcDst, pSrcDst, #8 + ADDS XY, XY, XY + ADD pThresholds, pThresholds, #2 + BCC LoopX + B ExitLoopY +bSGE4 + ;// bS >= 4 Filtering + SUB pSrcDst, pSrcDst, srcdstStep, LSL #2 + SUB pSrcDst, pSrcDst, srcdstStep, LSL #1 + BL armVCM4P10_DeblockingLumabSGE4_unsafe + + ;// Result Storage + VST1 dP_2n, [pSrcDst], srcdstStep + VST1 dP_1n, [pSrcDst], srcdstStep + VST1 dP_0n, [pSrcDst], srcdstStep + SUB pTmp, pSrcDst, srcdstStep, LSL #2 + VST1 dQ_0n, [pSrcDst], srcdstStep + ADDS XY,XY,XY + VST1 dQ_1n, [pSrcDst], srcdstStep + ADD pThresholds, pThresholds, #2 + VST1 dQ_2n, [pSrcDst] + + ADD pSrcDst, pTmp, #8 + BCC LoopX + +ExitLoopY + + SUB pSrcDst, pSrcDst, #16 + VLD1 {dAlpha[]}, [pAlpha_1] + ADD pSrcDst, pSrcDst, srcdstStep, LSL #2 + VLD1 {dBeta[]}, [pBeta_1] + BNE LoopY + + MOV r0, #OMX_Sts_NoErr + + M_END + + ENDIF + + + + + END + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.s new file mode 100755 index 0000000000000000000000000000000000000000..e6fbb34225800bd48ddba80bbcc005e220d73e98 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.s @@ -0,0 +1,436 @@ +;// +;// +;// File Name: omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 12290 +;// Date: Wednesday, April 9, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS CortexA8 + + IMPORT armVCM4P10_DeblockingLumabSLT4_unsafe + IMPORT armVCM4P10_DeblockingLumabSGE4_unsafe + + IF CortexA8 + +LOOP_COUNT EQU 0x11000000 + + +;// Function arguments + +pSrcDst RN 0 +srcdstStep RN 1 +pAlpha RN 2 +pBeta RN 3 + +pThresholds RN 5 +pBS RN 4 +bS10 RN 12 + +pAlpha_0 RN 2 +pBeta_0 RN 3 + +pAlpha_1 RN 7 +pBeta_1 RN 8 + +pTmp RN 10 +pTmpStep RN 11 + +;// Loop + +XY RN 9 + +;// Rows input +dRow0 DN D7.U8 +dRow1 DN D8.U8 +dRow2 DN D5.U8 +dRow3 DN D10.U8 +dRow4 DN D6.U8 +dRow5 DN D9.U8 +dRow6 DN D4.U8 +dRow7 DN D11.U8 + +;// dRow0 - dP_3, dRow1 - dQ_0, dRow2 - dP_1, dRow3 - dQ_2 +;// dRow4 - dP_2, dRow5 - dQ_1, dRow6 - dP_0, dRow7 - dQ_3 + +;// Rows output +dRown0 DN D7.U8 +dRown1 DN D24.U8 +dRown2 DN D30.U8 +dRown3 DN D10.U8 +dRown4 DN D6.U8 +dRown5 DN D25.U8 +dRown6 DN D29.U8 +dRown7 DN D11.U8 + +;// dP_0n DN D29.U8 +;// dP_1n DN D30.U8 +;// dP_2n DN D31.U8 +;// +;// dQ_0n DN D24.U8 ;!!;Temp2 +;// dQ_1n DN D25.U8 ;!!;Temp2 +;// dQ_2n DN D28.U8 ;!!;dQ_0t +;// +;// dRown0 - dP_3, dRown1 - dQ_0n +;// dRown2 - dP_1n, dRown3 - dQ_2 +;// dRown4 - dP_2, dRown5 - dQ_1n +;// dRown6 - dP_0n, dRown7 - dQ_3 + +dRow0n DN D7.U8 +dRow1n DN D24.U8 +dRow2n DN D30.U8 +dRow3n DN D28.U8 +dRow4n DN D31.U8 +dRow5n DN D25.U8 +dRow6n DN D29.U8 +dRow7n DN D11.U8 + +;// dRow0n - dP_3, dRow1n - dQ_0n, dRow2n - dP_1n, dRow3n - dQ_2n +;// dRow4n - dP_2, dRow5n - dQ_1n, dRow6n - dP_0n, dRow7n - dQ_3 + +;// Pixels +dP_0 DN D4.U8 +dP_1 DN D5.U8 +dP_2 DN D6.U8 +dP_3 DN D7.U8 +dQ_0 DN D8.U8 +dQ_1 DN D9.U8 +dQ_2 DN D10.U8 +dQ_3 DN D11.U8 + + +;// Filtering Decision +dAlpha DN D0.U8 +dBeta DN D2.U8 + +dFilt DN D16.U8 +dAqflg DN D12.U8 +dApflg DN D17.U8 + +dAp0q0 DN D13.U8 +dAp1p0 DN D12.U8 +dAq1q0 DN D18.U8 +dAp2p0 DN D19.U8 +dAq2q0 DN D17.U8 + +;// bSLT4 +dTC0 DN D18.U8 +dTC1 DN D19.U8 +dTC01 DN D18.U8 + +dTCs DN D31.S8 +dTC DN D31.U8 + +dMask_0 DN D14.U8 +dMask_1 DN D15.U8 + +Mask_0 RN 6 + +dTemp DN D19.U8 + +;// Computing P0,Q0 +qDq0p0 QN Q10.S16 +qDp1q1 QN Q11.S16 +qDelta QN Q10.S16 ; reuse qDq0p0 +dDelta DN D20.S8 + + +;// Computing P1,Q1 +dRp0q0 DN D24.U8 + +dMaxP DN D23.U8 +dMinP DN D22.U8 + +dMaxQ DN D19.U8 +dMinQ DN D21.U8 + +dDeltaP DN D26.U8 +dDeltaQ DN D27.U8 + +qP_0n QN Q14.S16 +qQ_0n QN Q12.S16 + +dQ_0n DN D24.U8 +dQ_1n DN D25.U8 +dP_0n DN D29.U8 +dP_1n DN D30.U8 + +;// bSGE4 + +qSp0q0 QN Q10.U16 + +qSp2q1 QN Q11.U16 +qSp0q0p1 QN Q12.U16 +qSp3p2 QN Q13.U16 +dHSp0q1 DN D28.U8 + +qSq2p1 QN Q11.U16 +qSp0q0q1 QN Q12.U16 +qSq3q2 QN Q13.U16 ;!! +dHSq0p1 DN D28.U8 ;!! + +qTemp1 QN Q11.U16 ;!!;qSp2q1 +qTemp2 QN Q12.U16 ;!!;qSp0q0p1 + +dP_0t DN D28.U8 ;!!;dHSp0q1 +dQ_0t DN D22.U8 ;!!;Temp1 + +dP_0n DN D29.U8 +dP_1n DN D30.U8 +dP_2n DN D31.U8 + +dQ_0n DN D24.U8 ;!!;Temp2 +dQ_1n DN D25.U8 ;!!;Temp2 +dQ_2n DN D28.U8 ;!!;dQ_0t + + + ;// Function header + M_START omxVCM4P10_FilterDeblockingLuma_VerEdge_I, r11, d15 + + ;//Arguments on the stack + M_ARG ppThresholds, 4 + M_ARG ppBS, 4 + + ;// d0-dAlpha_0 + ;// d2-dBeta_0 + + ADD pAlpha_1, pAlpha_0, #1 + ADD pBeta_1, pBeta_0, #1 + + VLD1 {dAlpha[]}, [pAlpha_0] + SUB pSrcDst, pSrcDst, #4 + VLD1 {dBeta[]}, [pBeta_0] + + M_LDR pBS, ppBS + M_LDR pThresholds, ppThresholds + + MOV Mask_0,#0 + + ;dMask_0-14 + ;dMask_1-15 + + VMOV dMask_0, #0 + VMOV dMask_1, #1 + + LDR XY,=LOOP_COUNT + + ADD pTmpStep, srcdstStep, srcdstStep + + ;// p0-p3 - d4-d7 + ;// q0-q3 - d8-d11 +LoopY +LoopX + LDRH bS10, [pBS], #4 + + CMP bS10, #0 + BEQ NoFilterBS0 + + ;// Load 8 rows of data + ADD pTmp, pSrcDst, srcdstStep + VLD1 dRow0, [pSrcDst], pTmpStep + VLD1 dRow1, [pTmp], pTmpStep + VLD1 dRow2, [pSrcDst], pTmpStep + VZIP.8 dRow0, dRow1 + VLD1 dRow3, [pTmp], pTmpStep + VLD1 dRow4, [pSrcDst], pTmpStep + VZIP.8 dRow2, dRow3 + VLD1 dRow5, [pTmp], pTmpStep + VLD1 dRow6, [pSrcDst], pTmpStep + VLD1 dRow7, [pTmp], pTmpStep + VZIP.8 dRow4, dRow5 + VZIP.16 dRow1, dRow3 + + + ;// dRow0 = [q3r0 q2r0 q1r0 q0r0 p0r0 p1r0 p2r0 p3r0] + ;// dRow1 = [q3r1 q2r1 q1r1 q0r1 p0r1 p1r1 p2r1 p3r1] + ;// dRow2 = [q3r2 q2r2 q1r2 q0r2 p0r2 p1r2 p2r2 p3r2] + ;// dRow3 = [q3r3 q2r3 q1r3 q0r3 p0r3 p1r3 p2r3 p3r3] + ;// dRow4 = [q3r4 q2r4 q1r4 q0r4 p0r4 p1r4 p2r4 p3r4] + ;// dRow5 = [q3r5 q2r5 q1r5 q0r5 p0r5 p1r5 p2r5 p3r5] + ;// dRow6 = [q3r6 q2r6 q1r6 q0r6 p0r6 p1r6 p2r6 p3r6] + ;// dRow7 = [q3r7 q2r7 q1r7 q0r7 p0r7 p1r7 p2r7 p3r7] + + ;// 8x8 Transpose + + VZIP.8 dRow6, dRow7 + + SUB pSrcDst, pSrcDst, srcdstStep, LSL #3 + VZIP.16 dRow0, dRow2 + VZIP.16 dRow5, dRow7 + + + VZIP.16 dRow4, dRow6 + VZIP.32 dRow1, dRow5 + VZIP.32 dRow2, dRow6 + VZIP.32 dRow3, dRow7 + VZIP.32 dRow0, dRow4 + + + ;// dRow0 - dP_3, dRow1 - dQ_0, dRow2 - dP_1, dRow3 - dQ_2 + ;// dRow4 - dP_2, dRow5 - dQ_1, dRow6 - dP_0, dRow7 - dQ_3 + + ;// dQ_0 = [q0r7 q0r6 q0r5 q0r4 q0r3 q0r2 q0r1 q0r0] + ;// dQ_1 = [q1r7 q1r6 q1r5 q1r4 q1r3 q1r2 q1r1 q1r0] + ;// dQ_2 = [q2r7 q2r6 q2r5 q2r4 q2r3 q2r2 q2r1 q2r0] + ;// dQ_3 = [q3r7 q3r6 q3r5 q3r4 q3r3 q3r2 q3r1 q3r0] + + ;// dP_0 = [p0r7 p0r6 p0r5 p0r4 p0r3 p0r2 p0r1 p0r0] + ;// dP_1 = [p1r7 p1r6 p1r5 p1r4 p1r3 p1r2 p1r1 p1r0] + ;// dP_2 = [p2r7 p2r6 p2r5 p2r4 p2r3 p2r2 p2r1 p2r0] + ;// dP_3 = [p3r7 p3r6 p3r5 p3r4 p3r3 p3r2 p3r1 p3r0] + + VABD dAp0q0, dP_0, dQ_0 + VABD dAp1p0, dP_1, dP_0 + + VABD dAq1q0, dQ_1, dQ_0 + VABD dAp2p0, dP_2, dP_0 + + TST bS10, #0xff + VCGT dFilt, dAlpha, dAp0q0 + + VMAX dAp1p0, dAq1q0, dAp1p0 + VABD dAq2q0, dQ_2, dQ_0 + + VMOVEQ.U32 dFilt[0], Mask_0 + TST bS10, #0xff00 + + VCGT dAp2p0, dBeta, dAp2p0 + VCGT dAp1p0, dBeta, dAp1p0 + + VMOVEQ.U32 dFilt[1], Mask_0 + + VCGT dAq2q0, dBeta, dAq2q0 + VAND dFilt, dFilt, dAp1p0 + TST bS10, #4 + + VAND dAqflg, dFilt, dAq2q0 + VAND dApflg, dFilt, dAp2p0 + + BNE bSGE4 +bSLT4 + ;// bS < 4 Filtering + + BL armVCM4P10_DeblockingLumabSLT4_unsafe + + ;// Transpose + + VZIP.8 dP_3, dP_2 + VZIP.8 dP_1n, dP_0n + VZIP.8 dQ_0n, dQ_1n + VZIP.8 dQ_2, dQ_3 + + + VZIP.16 dP_3, dP_1n + ADD pTmp, pSrcDst, srcdstStep + VZIP.16 dQ_0n, dQ_2 + VZIP.16 dQ_1n, dQ_3 + VZIP.16 dP_2, dP_0n + + VZIP.32 dP_3, dQ_0n + VZIP.32 dP_1n, dQ_2 + VZIP.32 dP_2, dQ_1n + VZIP.32 dP_0n, dQ_3 + + ;// dRown0 - dP_3, dRown1 - dQ_0n + ;// dRown2 - dP_1n, dRown3 - dQ_2 + ;// dRown4 - dP_2, dRown5 - dQ_1n + ;// dRown6 - dP_0n, dRown7 - dQ_3 + + VST1 dRown0, [pSrcDst], pTmpStep + VST1 dRown1, [pTmp], pTmpStep + VST1 dRown2, [pSrcDst], pTmpStep + VST1 dRown3, [pTmp], pTmpStep + ;1 + VST1 dRown4, [pSrcDst], pTmpStep + VST1 dRown5, [pTmp], pTmpStep + ADDS XY, XY, XY + VST1 dRown6, [pSrcDst], pTmpStep + ADD pThresholds, pThresholds, #2 + VST1 dRown7, [pTmp], srcdstStep + + SUB pSrcDst, pSrcDst, srcdstStep, LSL #3 + VLD1 {dAlpha[]}, [pAlpha_1] + ADD pSrcDst, pSrcDst, #4 + VLD1 {dBeta[]}, [pBeta_1] + + BCC LoopX + B ExitLoopY + +NoFilterBS0 + ADD pSrcDst, pSrcDst, #4 + ADDS XY, XY, XY + VLD1 {dAlpha[]}, [pAlpha_1] + ADD pThresholds, pThresholds, #4 + VLD1 {dBeta[]}, [pBeta_1] + BCC LoopX + B ExitLoopY +bSGE4 + ;// bS >= 4 Filtering + + BL armVCM4P10_DeblockingLumabSGE4_unsafe + + ;// Transpose + + VZIP.8 dP_3, dP_2n + VZIP.8 dP_1n, dP_0n + VZIP.8 dQ_0n, dQ_1n + VZIP.8 dQ_2n, dQ_3 + + VZIP.16 dP_3, dP_1n + ADD pTmp, pSrcDst, srcdstStep + VZIP.16 dQ_0n, dQ_2n + VZIP.16 dQ_1n, dQ_3 + VZIP.16 dP_2n, dP_0n + + VZIP.32 dP_3, dQ_0n + VZIP.32 dP_1n, dQ_2n + VZIP.32 dP_2n, dQ_1n + VZIP.32 dP_0n, dQ_3 + + ;// dRow0n - dP_3, dRow1n - dQ_0n, dRow2n - dP_1n, dRow3n - dQ_2n + ;// dRow4n - dP_2, dRow5n - dQ_1n, dRow6n - dP_0n, dRow7n - dQ_3 + + VST1 dRow0n, [pSrcDst], pTmpStep + VST1 dRow1n, [pTmp], pTmpStep + VST1 dRow2n, [pSrcDst], pTmpStep + VST1 dRow3n, [pTmp], pTmpStep + VST1 dRow4n, [pSrcDst], pTmpStep + VST1 dRow5n, [pTmp], pTmpStep + ADDS XY,XY,XY + VST1 dRow6n, [pSrcDst], pTmpStep + ADD pThresholds, pThresholds, #4 + VST1 dRow7n, [pTmp], pTmpStep + + SUB pSrcDst, pSrcDst, srcdstStep, LSL #3 + VLD1 {dAlpha[]}, [pAlpha_1] + ADD pSrcDst, pSrcDst, #4 + VLD1 {dBeta[]}, [pBeta_1] + + BCC LoopX + +ExitLoopY + SUB pBS, pBS, #14 + SUB pThresholds, pThresholds, #14 + SUB pSrcDst, pSrcDst, #16 + VLD1 {dAlpha[]}, [pAlpha_0] + ADD pSrcDst, pSrcDst, srcdstStep, LSL #3 + VLD1 {dBeta[]}, [pBeta_0] + BNE LoopY + + MOV r0, #OMX_Sts_NoErr + + M_END + + ENDIF + + + END + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_InterpolateChroma.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_InterpolateChroma.c new file mode 100755 index 0000000000000000000000000000000000000000..3ce41bea49183c16df54e35f22995e6777e7670f --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_InterpolateChroma.c @@ -0,0 +1,79 @@ +/** + * + * File Name: omxVCM4P10_InterpolateChroma.c + * OpenMAX DL: v1.0.2 + * Revision: 12290 + * Date: Wednesday, April 9, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * Description: + * This function will calculate 1/8 Pixel interpolation for Chroma Block + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armVC.h" +#include "armCOMM.h" + + +/** + * Function: omxVCM4P10_InterpolateChroma, + * + * Description: + * Performs 1/8-pixel interpolation for inter chroma MB. + * + * Remarks: + * + * Parameters: + * [in] pSrc Pointer to the source reference frame buffer + * [in] srcStep Reference frame step in byte + * [in] dstStep Destination frame step in byte. Must be multiple of roi.width. + * [in] dx Fractional part of horizontal motion vector component + * in 1/8 pixel unit;valid in the range [0,7] + * [in] dy Fractional part of vertical motion vector component + * in 1/8 pixel unit;valid in the range [0,7] + * [in] roi Dimension of the interpolation region;the parameters roi.width and roi.height must + * be equal to either 2, 4, or 8. + * [out] pDst Pointer to the destination frame buffer. + * if roi.width==2, 2-byte alignment required + * if roi.width==4, 4-byte alignment required + * if roi.width==8, 8-byte alignment required + * + * Return Value: + * If the function runs without error, it returns OMX_Sts_NoErr. + * If one of the following cases occurs, the function returns OMX_Sts_BadArgErr: + * pSrc or pDst is NULL. + * srcStep or dstStep < 8. + * dx or dy is out of range [0-7]. + * roi.width or roi.height is out of range {2,4,8}. + * roi.width is equal to 2, but pDst is not 2-byte aligned. + * roi.width is equal to 4, but pDst is not 4-byte aligned. + * roi.width is equal to 8, but pDst is not 8 byte aligned. + * srcStep or dstStep is not a multiple of 8. + * + */ + +OMXResult omxVCM4P10_InterpolateChroma ( + const OMX_U8* pSrc, + OMX_S32 srcStep, + OMX_U8* pDst, + OMX_S32 dstStep, + OMX_S32 dx, + OMX_S32 dy, + OMXSize roi + ) +{ + return armVCM4P10_Interpolate_Chroma + ((OMX_U8*)pSrc, srcStep, pDst, dstStep, roi.width, roi.height, dx, dy); +} + + +/***************************************************************************** + * END OF FILE + *****************************************************************************/ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_InterpolateLuma_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_InterpolateLuma_s.s new file mode 100755 index 0000000000000000000000000000000000000000..942ebc6d03d5f468ccafb2cde2b65d1fb76da6cc --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_InterpolateLuma_s.s @@ -0,0 +1,553 @@ +;// +;// +;// File Name: omxVCM4P10_InterpolateLuma_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 12290 +;// Date: Wednesday, April 9, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + +;// Function: +;// omxVCM4P10_InterpolateLuma +;// +;// This function implements omxVCM4P10_InterpolateLuma in v6 assembly. +;// Performs quarter pel interpolation of inter luma MB. +;// It's assumed that the frame is already padded when calling this function. +;// Parameters: +;// [in] pSrc Pointer to the source reference frame buffer +;// [in] srcStep Reference frame step in byte +;// [in] dstStep Destination frame step in byte. Must be multiple of roi.width +;// [in] dx Fractional part of horizontal motion vector +;// component in 1/4 pixel unit; valid in the range [0,3] +;// [in] dy Fractional part of vertical motion vector +;// component in 1/4 pixel unit; valid in the range [0,3] +;// [in] roi Dimension of the interpolation region;the parameters roi.width and roi.height must +;// be equal to either 4, 8, or 16. +;// [out] pDst Pointer to the destination frame buffer. +;// if roi.width==4, 4-byte alignment required +;// if roi.width==8, 8-byte alignment required +;// if roi.width==16, 16-byte alignment required +;// +;// Return Value: +;// If the function runs without error, it returns OMX_Sts_NoErr. +;// It is assued that following cases are satisfied before calling this function: +;// pSrc or pDst is not NULL. +;// srcStep or dstStep >= roi.width. +;// dx or dy is in the range [0-3]. +;// roi.width or roi.height is not out of range {4, 8, 16}. +;// If roi.width is equal to 4, Dst is 4 byte aligned. +;// If roi.width is equal to 8, pDst is 8 byte aligned. +;// If roi.width is equal to 16, pDst is 16 byte aligned. +;// srcStep and dstStep is multiple of 8. +;// +;// + + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS CortexA8 + + EXPORT omxVCM4P10_InterpolateLuma + + + IF CortexA8 + IMPORT armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe + IMPORT armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe + IMPORT armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe + IMPORT armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe + ENDIF + + + +;// Declare input registers +pSrc RN 0 +srcStep RN 1 +pDst RN 2 +dstStep RN 3 +iHeight RN 4 +iWidth RN 5 + +;// Declare other intermediate registers +idx RN 6 +idy RN 7 +index RN 6 +Temp RN 12 +pArgs RN 11 + + + IF CortexA8 + + ;// + ;// Interpolation of luma is implemented by processing block of pixels, size 4x4 at a time. + ;// + M_ALLOC4 ppArgs, 16 + + ;// Function header + M_START omxVCM4P10_InterpolateLuma, r11, d15 + +pSrcBK RN 8 + +;// Declare Neon registers +dCoeff5 DN 30.S16 +dCoeff20 DN 31.S16 + +;// Registers used for implementing Horizontal interpolation +dSrc0c DN 14.U8 +dSrc1c DN 16.U8 +dSrc2c DN 18.U8 +dSrc3c DN 20.U8 +dSrc0d DN 15.U8 +dSrc1d DN 17.U8 +dSrc2d DN 19.U8 +dSrc3d DN 21.U8 +dAccH0 DN 22.U8 +dAccH1 DN 24.U8 +dAccH2 DN 26.U8 +dAccH3 DN 28.U8 +dResultH0 DN 22.U32 +dResultH1 DN 24.U32 +dResultH2 DN 26.U32 +dResultH3 DN 28.U32 + +;// Registers used for implementing Vertical interpolation +dSrc0 DN 9.U8 +dSrc1 DN 10.U8 +dSrc2 DN 11.U8 +dSrc3 DN 12.U8 +dSrc4 DN 13.U8 +dAccV0 DN 0.U8 +dAccV1 DN 2.U8 +dAccV2 DN 4.U8 +dAccV3 DN 6.U8 +dResultV0 DN 0.U32 +dResultV1 DN 2.U32 +dResultV2 DN 4.U32 +dResultV3 DN 6.U32 + +;// Registers used for implementing Diagonal interpolation +dTAcc0 DN 0.U8 +dTAcc1 DN 2.U8 +dTAcc2 DN 4.U8 +dTAcc3 DN 6.U8 +dTRes0 DN 0.32 +dTRes1 DN 2.32 +dTRes2 DN 4.32 +dTRes3 DN 6.32 +dTResult0 DN 14.U8 +dTResult1 DN 16.U8 +dTResult2 DN 18.U8 +dTResult3 DN 20.U8 +dTempP0 DN 18.S16 +dTempP1 DN 19.S16 +dTempQ0 DN 20.S16 +dTempQ1 DN 21.S16 +dTempR0 DN 22.S16 +dTempR1 DN 23.S16 +dTempS0 DN 24.S16 +dTempS1 DN 25.S16 +qTempP01 QN 9.S16 +qTempQ01 QN 10.S16 +qTempR01 QN 11.S16 +qTempS01 QN 12.S16 + +;// Intermediate values for averaging +qRes2 QN 7.S16 +qRes3 QN 8.S16 +qRes4 QN 9.S16 +qRes5 QN 10.S16 +qRes6 QN 11.S16 + +;// For implementing copy +dDst0 DN 9.32 +dDst1 DN 10.32 +dDst2 DN 11.32 +dDst3 DN 12.32 + + ;// Define stack arguments + M_ARG ptridx, 4 + M_ARG ptridy, 4 + M_ARG ptrWidth, 4 + M_ARG ptrHeight, 4 + + ;// Load structure elements of roi + M_LDR idx, ptridx + M_LDR idy, ptridy + M_LDR iWidth, ptrWidth + M_LDR iHeight, ptrHeight + + ADD index, idx, idy, LSL #2 ;// [index] = [idy][idx] + M_ADR pArgs, ppArgs + + ;// Move coefficients Neon registers + VMOV dCoeff20, #20 + VMOV dCoeff5, #5 + +Block4x4WidthLoop +Block4x4HeightLoop + + STM pArgs, {pSrc,srcStep,pDst,dstStep} + + ;// switch table using motion vector as index + ADD pc, pc, index, LSL #2 + B Case_f + B Case_0 + B Case_1 + B Case_2 + B Case_3 + B Case_4 + B Case_5 + B Case_6 + B Case_7 + B Case_8 + B Case_9 + B Case_a + B Case_b + B Case_c + B Case_d + B Case_e + B Case_f + +Case_0 + ;// Case G + M_PRINTF "Case 0 \n" + + ;// Loads a 4x4 block of .8 and stores as .32 + ADD Temp, pSrc, srcStep, LSL #1 + VLD1 dSrc0, [pSrc], srcStep + VLD1 dSrc2, [Temp], srcStep + VLD1 dSrc1, [pSrc] + VLD1 dSrc3, [Temp] + + ADD Temp, pDst, dstStep, LSL #1 + VST1 dDst0[0], [pDst], dstStep + VST1 dDst2[0], [Temp], dstStep + VST1 dDst1[0], [pDst] + VST1 dDst3[0], [Temp] + M_ADR pArgs, ppArgs + B Block4x4LoopEnd +Case_1 + ;// Case a + M_PRINTF "Case 1 \n" + + SUB pSrc, pSrc, #2 + BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe + VRHADD dAccH0, dAccH0, dSrc0c + VRHADD dAccH2, dAccH2, dSrc2c + VRHADD dAccH1, dAccH1, dSrc1c + VRHADD dAccH3, dAccH3, dSrc3c + ADD Temp, pDst, dstStep, LSL #1 + VST1 dResultH0[0], [pDst], dstStep + VST1 dResultH2[0], [Temp], dstStep + VST1 dResultH1[0], [pDst] + VST1 dResultH3[0], [Temp] + M_ADR pArgs, ppArgs + B Block4x4LoopEnd +Case_2 + ;// Case b + M_PRINTF "Case 2 \n" + + SUB pSrc, pSrc, #2 + BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe + ADD Temp, pDst, dstStep, LSL #1 + VST1 dResultH0[0], [pDst], dstStep + VST1 dResultH2[0], [Temp], dstStep + VST1 dResultH1[0], [pDst] + VST1 dResultH3[0], [Temp] + M_ADR pArgs, ppArgs + B Block4x4LoopEnd +Case_3 + ;// Case c + M_PRINTF "Case 3 \n" + + SUB pSrc, pSrc, #2 + BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe + VRHADD dAccH0, dAccH0, dSrc0d + VRHADD dAccH2, dAccH2, dSrc2d + VRHADD dAccH1, dAccH1, dSrc1d + VRHADD dAccH3, dAccH3, dSrc3d + ADD Temp, pDst, dstStep, LSL #1 + VST1 dResultH0[0], [pDst], dstStep + VST1 dResultH2[0], [Temp], dstStep + VST1 dResultH1[0], [pDst] + VST1 dResultH3[0], [Temp] + M_ADR pArgs, ppArgs + B Block4x4LoopEnd +Case_4 + ;// Case d + M_PRINTF "Case 4 \n" + + SUB pSrc, pSrc, srcStep, LSL #1 + BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe + VRHADD dAccV0, dAccV0, dSrc0 + VRHADD dAccV2, dAccV2, dSrc2 + VRHADD dAccV1, dAccV1, dSrc1 + VRHADD dAccV3, dAccV3, dSrc3 + ADD Temp, pDst, dstStep, LSL #1 + VST1 dResultV0[0], [pDst], dstStep + VST1 dResultV2[0], [Temp], dstStep + VST1 dResultV1[0], [pDst] + VST1 dResultV3[0], [Temp] + M_ADR pArgs, ppArgs + B Block4x4LoopEnd +Case_5 + ;// Case e + M_PRINTF "Case 5 \n" + + MOV pSrcBK, pSrc + SUB pSrc, pSrc, srcStep, LSL #1 + BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe + SUB pSrc, pSrcBK, #2 + BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe + VRHADD dAccH0, dAccH0, dAccV0 + VRHADD dAccH2, dAccH2, dAccV2 + VRHADD dAccH1, dAccH1, dAccV1 + VRHADD dAccH3, dAccH3, dAccV3 + ADD Temp, pDst, dstStep, LSL #1 + VST1 dResultH0[0], [pDst], dstStep + VST1 dResultH2[0], [Temp], dstStep + VST1 dResultH1[0], [pDst] + VST1 dResultH3[0], [Temp] + + M_ADR pArgs, ppArgs + B Block4x4LoopEnd +Case_6 + ;// Case f + M_PRINTF "Case 6 \n" + + SUB pSrc, pSrc, srcStep, LSL #1 + SUB pSrc, pSrc, #2 + BL armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe + VQRSHRUN dTResult0, qRes2, #5 + VQRSHRUN dTResult1, qRes3, #5 + VQRSHRUN dTResult2, qRes4, #5 + VQRSHRUN dTResult3, qRes5, #5 + VRHADD dTAcc0, dTAcc0, dTResult0 + VRHADD dTAcc2, dTAcc2, dTResult2 + VRHADD dTAcc1, dTAcc1, dTResult1 + VRHADD dTAcc3, dTAcc3, dTResult3 + ADD Temp, pDst, dstStep, LSL #1 + VST1 dTRes0[0], [pDst], dstStep + VST1 dTRes2[0], [Temp], dstStep + VST1 dTRes1[0], [pDst] + VST1 dTRes3[0], [Temp] + + M_ADR pArgs, ppArgs + B Block4x4LoopEnd +Case_7 + ;// Case g + M_PRINTF "Case 7 \n" + MOV pSrcBK, pSrc + ADD pSrc, pSrc, #1 + SUB pSrc, pSrc, srcStep, LSL #1 + BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe + SUB pSrc, pSrcBK, #2 + BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe + VRHADD dAccH0, dAccH0, dAccV0 + VRHADD dAccH2, dAccH2, dAccV2 + VRHADD dAccH1, dAccH1, dAccV1 + VRHADD dAccH3, dAccH3, dAccV3 + ADD Temp, pDst, dstStep, LSL #1 + VST1 dResultH0[0], [pDst], dstStep + VST1 dResultH2[0], [Temp], dstStep + VST1 dResultH1[0], [pDst] + VST1 dResultH3[0], [Temp] + + M_ADR pArgs, ppArgs + B Block4x4LoopEnd +Case_8 + ;// Case h + M_PRINTF "Case 8 \n" + + SUB pSrc, pSrc, srcStep, LSL #1 + BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe + ADD Temp, pDst, dstStep, LSL #1 + VST1 dResultV0[0], [pDst], dstStep + VST1 dResultV2[0], [Temp], dstStep + VST1 dResultV1[0], [pDst] + VST1 dResultV3[0], [Temp] + M_ADR pArgs, ppArgs + B Block4x4LoopEnd +Case_9 + ;// Case i + M_PRINTF "Case 9 \n" + SUB pSrc, pSrc, srcStep, LSL #1 + SUB pSrc, pSrc, #2 + BL armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe + VEXT dTempP0, dTempP0, dTempP1, #2 + VEXT dTempQ0, dTempQ0, dTempQ1, #2 + VEXT dTempR0, dTempR0, dTempR1, #2 + VEXT dTempS0, dTempS0, dTempS1, #2 + + VQRSHRUN dTResult0, qTempP01, #5 + VQRSHRUN dTResult1, qTempQ01, #5 + VQRSHRUN dTResult2, qTempR01, #5 + VQRSHRUN dTResult3, qTempS01, #5 + + VRHADD dTAcc0, dTAcc0, dTResult0 + VRHADD dTAcc2, dTAcc2, dTResult2 + VRHADD dTAcc1, dTAcc1, dTResult1 + VRHADD dTAcc3, dTAcc3, dTResult3 + ADD Temp, pDst, dstStep, LSL #1 + VST1 dTRes0[0], [pDst], dstStep + VST1 dTRes2[0], [Temp], dstStep + VST1 dTRes1[0], [pDst] + VST1 dTRes3[0], [Temp] + M_ADR pArgs, ppArgs + B Block4x4LoopEnd +Case_a + ;// Case j + M_PRINTF "Case a \n" + + SUB pSrc, pSrc, srcStep, LSL #1 + SUB pSrc, pSrc, #2 + BL armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe + ADD Temp, pDst, dstStep, LSL #1 + VST1 dTRes0[0], [pDst], dstStep + VST1 dTRes2[0], [Temp], dstStep + VST1 dTRes1[0], [pDst] + VST1 dTRes3[0], [Temp] + M_ADR pArgs, ppArgs + B Block4x4LoopEnd +Case_b + ;// Case k + M_PRINTF "Case b \n" + SUB pSrc, pSrc, srcStep, LSL #1 + SUB pSrc, pSrc, #2 + BL armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe + VEXT dTempP0, dTempP0, dTempP1, #3 + VEXT dTempQ0, dTempQ0, dTempQ1, #3 + VEXT dTempR0, dTempR0, dTempR1, #3 + VEXT dTempS0, dTempS0, dTempS1, #3 + + VQRSHRUN dTResult0, qTempP01, #5 + VQRSHRUN dTResult1, qTempQ01, #5 + VQRSHRUN dTResult2, qTempR01, #5 + VQRSHRUN dTResult3, qTempS01, #5 + + VRHADD dTAcc0, dTAcc0, dTResult0 + VRHADD dTAcc2, dTAcc2, dTResult2 + VRHADD dTAcc1, dTAcc1, dTResult1 + VRHADD dTAcc3, dTAcc3, dTResult3 + ADD Temp, pDst, dstStep, LSL #1 + VST1 dTRes0[0], [pDst], dstStep + VST1 dTRes2[0], [Temp], dstStep + VST1 dTRes1[0], [pDst] + VST1 dTRes3[0], [Temp] + M_ADR pArgs, ppArgs + B Block4x4LoopEnd +Case_c + ;// Case n + M_PRINTF "Case c \n" + + SUB pSrc, pSrc, srcStep, LSL #1 + BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe + VRHADD dAccV0, dAccV0, dSrc1 + VRHADD dAccV2, dAccV2, dSrc3 + VRHADD dAccV1, dAccV1, dSrc2 + VRHADD dAccV3, dAccV3, dSrc4 + ADD Temp, pDst, dstStep, LSL #1 + VST1 dResultV0[0], [pDst], dstStep + VST1 dResultV2[0], [Temp], dstStep + VST1 dResultV1[0], [pDst] + VST1 dResultV3[0], [Temp] + M_ADR pArgs, ppArgs + B Block4x4LoopEnd +Case_d + ;// Case p + M_PRINTF "Case d \n" + + MOV pSrcBK, pSrc + SUB pSrc, pSrc, srcStep, LSL #1 + BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe + ADD pSrc, pSrcBK, srcStep + SUB pSrc, pSrc, #2 + BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe + VRHADD dAccH0, dAccH0, dAccV0 + VRHADD dAccH2, dAccH2, dAccV2 + VRHADD dAccH1, dAccH1, dAccV1 + VRHADD dAccH3, dAccH3, dAccV3 + ADD Temp, pDst, dstStep, LSL #1 + VST1 dResultH0[0], [pDst], dstStep + VST1 dResultH2[0], [Temp], dstStep + VST1 dResultH1[0], [pDst] + VST1 dResultH3[0], [Temp] + M_ADR pArgs, ppArgs + B Block4x4LoopEnd +Case_e + ;// Case q + M_PRINTF "Case e \n" + + SUB pSrc, pSrc, srcStep, LSL #1 + SUB pSrc, pSrc, #2 + BL armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe + VQRSHRUN dTResult0, qRes3, #5 + VQRSHRUN dTResult1, qRes4, #5 + VQRSHRUN dTResult2, qRes5, #5 + VQRSHRUN dTResult3, qRes6, #5 + + VRHADD dTAcc0, dTAcc0, dTResult0 + VRHADD dTAcc2, dTAcc2, dTResult2 + VRHADD dTAcc1, dTAcc1, dTResult1 + VRHADD dTAcc3, dTAcc3, dTResult3 + ADD Temp, pDst, dstStep, LSL #1 + VST1 dTRes0[0], [pDst], dstStep + VST1 dTRes2[0], [Temp], dstStep + VST1 dTRes1[0], [pDst] + VST1 dTRes3[0], [Temp] + M_ADR pArgs, ppArgs + B Block4x4LoopEnd +Case_f + ;// Case r + M_PRINTF "Case f \n" + MOV pSrcBK, pSrc + ADD pSrc, pSrc, #1 + SUB pSrc, pSrc, srcStep, LSL #1 + BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe + ADD pSrc, pSrcBK, srcStep + SUB pSrc, pSrc, #2 + BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe + VRHADD dAccH0, dAccH0, dAccV0 + VRHADD dAccH2, dAccH2, dAccV2 + VRHADD dAccH1, dAccH1, dAccV1 + VRHADD dAccH3, dAccH3, dAccV3 + ADD Temp, pDst, dstStep, LSL #1 + VST1 dResultH0[0], [pDst], dstStep + VST1 dResultH2[0], [Temp], dstStep + VST1 dResultH1[0], [pDst] + VST1 dResultH3[0], [Temp] + M_ADR pArgs, ppArgs + + +Block4x4LoopEnd + + ;// Width Loop + ;//M_ADR pArgs, ppArgs + LDM pArgs, {pSrc,srcStep,pDst,dstStep} ;// Load arguments + SUBS iWidth, iWidth, #4 + ADD pSrc, pSrc, #4 + ADD pDst, pDst, #4 + BGT Block4x4WidthLoop + + ;// Height Loop + SUBS iHeight, iHeight, #4 + M_LDR iWidth, ptrWidth + M_ADR pArgs, ppArgs + ADD pSrc, pSrc, srcStep, LSL #2 + ADD pDst, pDst, dstStep, LSL #2 + SUB pSrc, pSrc, iWidth + SUB pDst, pDst, iWidth + BGT Block4x4HeightLoop + +EndOfInterpolation + MOV r0, #0 + M_END + + ENDIF + ;// End of CortexA8 + + END + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8_s.s new file mode 100755 index 0000000000000000000000000000000000000000..3a607058adb936f9d477f23f95c47681b2f642be --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8_s.s @@ -0,0 +1,436 @@ +;// +;// +;// File Name: omxVCM4P10_PredictIntraChroma_8x8_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 12290 +;// Date: Wednesday, April 9, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + EXPORT armVCM4P10_pIndexTable8x8 + +;// Define the processor variants supported by this file + + M_VARIANTS CortexA8 + + AREA table, DATA +;//------------------------------------------------------- +;// This table for implementing switch case of C in asm by +;// the mehtod of two levels of indexing. +;//------------------------------------------------------- + + M_TABLE armVCM4P10_pIndexTable8x8 + DCD OMX_VC_CHROMA_DC, OMX_VC_CHROMA_HOR + DCD OMX_VC_CHROMA_VERT, OMX_VC_CHROMA_PLANE + + M_TABLE armVCM4P10_MultiplierTableChroma8x8,1 + DCW 3, 2, 1,4 + DCW -3,-2,-1,0 + DCW 1, 2, 3,4 + + + + IF CortexA8 + +;//-------------------------------------------- +;// Scratch variable +;//-------------------------------------------- + +pc RN 15 +return RN 0 +pTable RN 8 + +;//-------------------------------------------- +;// Input Arguments +;//-------------------------------------------- +pSrcLeft RN 0 ;// input pointer +pSrcAbove RN 1 ;// input pointer +pSrcAboveLeft RN 2 ;// input pointer +pDst RN 3 ;// output pointer +leftStep RN 4 ;// input variable +dstStep RN 5 ;// input variable +predMode RN 6 ;// input variable +availability RN 7 ;// input variable +pMultiplierTable RN 2 + +pTmp RN 9 +step RN 10 + +;//--------------------- +;// Neon Registers +;//--------------------- + +;// OMX_VC_CHROMA_HOR + +dLeftVal0 DN D0.8 +dLeftVal1 DN D1.8 +dLeftVal2 DN D2.8 +dLeftVal3 DN D3.8 +dLeftVal4 DN D4.8 +dLeftVal5 DN D5.8 +dLeftVal6 DN D6.8 +dLeftVal7 DN D7.8 + +;// OMX_VC_CHROMA_VERT + +dAboveVal DN D0.U8 + +;// OMX_VC_CHROMA_DC + +dLeftVal DN D1.U8 +dSumAboveValU16 DN D2.U16 +dSumAboveValU32 DN D3.U32 +dSumAboveValU8 DN D3.U8 +dSumLeftValU16 DN D2.U16 +dSumLeftValU32 DN D1.U32 +dSumLeftValU8 DN D1.U8 +dSumAboveLeft DN D2.U32 +dSumAboveLeftU8 DN D2.U8 +dIndexRow0U8 DN D5.U8 +dIndexRow0 DN D5.U64 +dIndexRow4U8 DN D6.U8 +dIndexRow4 DN D6.U64 +dDstRow0 DN D0.U8 +dDstRow4 DN D4.U8 +dConst128U8 DN D0.U8 + +;// OMX_VC_CHROMA_PLANE + +dRevAboveVal DN D3.U8 +dRevAboveValU64 DN D3.U64 +dAboveLeftVal DN D2.U8 +qAbove7minus0 QN Q3.S16 +qAboveDiff QN Q2.S16 +dIndex DN D8.U8 +dDiffAboveU8 DN D9.U8 +dDiffAboveS16 DN D9.S16 +dAboveDiff0U8 DN D4.U8 +dAboveDiff0U64 DN D4.U64 +dAbove7minus0U8 DN D6.U8 +dMultiplier DN D10.S16 +dHorPred DN D11.S16 +dRevLeftVal DN D3.U8 +dRevLeftValU64 DN D3.U64 +qLeft7minus0 QN Q7.S16 +qLeftDiff QN Q6.S16 +dDiffLeftU8 DN D16.U8 +dDiffLeftS16 DN D16.S16 +dLeftDiff0U8 DN D12.U8 +dLeftDiff0U64 DN D12.U64 +dLeft7minus0U8 DN D14.U8 +dVerPred DN D3.S16 +dHVValS16 DN D3.S16 +dHVValS32 DN D3.S32 +dHVTempS32 DN D2.S32 +qA QN Q0.S16 +qB QN Q2.S16 +qC QN Q3.S16 +qMultiplier QN Q5.S16 +dMultiplier0 DN D10.S16 +dMultiplier1 DN D11.S16 +qC0 QN Q0.S16 +qC1 QN Q1.S16 +qC2 QN Q4.S16 +qC3 QN Q5.S16 +qC4 QN Q6.S16 +qC5 QN Q7.S16 +qC6 QN Q8.S16 +qC7 QN Q9.S16 +qSum0 QN Q0.S16 +qSum1 QN Q1.S16 +qSum2 QN Q4.S16 +qSum3 QN Q5.S16 +qSum4 QN Q6.S16 +qSum5 QN Q7.S16 +qSum6 QN Q8.S16 +qSum7 QN Q9.S16 +dSum0 DN D0.U8 +dSum1 DN D1.U8 +dSum2 DN D2.U8 +dSum3 DN D3.U8 +dSum4 DN D4.U8 +dSum5 DN D5.U8 +dSum6 DN D6.U8 +dSum7 DN D7.U8 + +;//----------------------------------------------------------------------------------------------- +;// omxVCM4P10_PredictIntraChroma_8x8 starts +;//----------------------------------------------------------------------------------------------- + + ;// Write function header + M_START omxVCM4P10_PredictIntraChroma_8x8, r10, d15 + + ;// Define stack arguments + M_ARG LeftStep, 4 + M_ARG DstStep, 4 + M_ARG PredMode, 4 + M_ARG Availability, 4 + + LDR pTable,=armVCM4P10_pIndexTable8x8 ;// Load index table for switch case + + ;// Load argument from the stack + M_LDR predMode, PredMode ;// Arg predMode loaded from stack to reg + M_LDR leftStep, LeftStep ;// Arg leftStep loaded from stack to reg + M_LDR dstStep, DstStep ;// Arg dstStep loaded from stack to reg + M_LDR availability, Availability ;// Arg availability loaded from stack to reg + + + LDR pc, [pTable, predMode, LSL #2] ;// Branch to the case based on preMode + +OMX_VC_CHROMA_DC + + TST availability, #OMX_VC_LEFT + BEQ DCChroma8x8LeftNotAvailable + + ADD pTmp, pSrcLeft, leftStep + ADD step, leftStep, leftStep + + ;// Load Left Edge + VLD1 {dLeftVal[0]},[pSrcLeft],step ;// pSrcLeft[0*leftStep] + VLD1 {dLeftVal[1]},[pTmp],step ;// pSrcLeft[1*leftStep] + VLD1 {dLeftVal[2]},[pSrcLeft],step ;// pSrcLeft[2*leftStep] + VLD1 {dLeftVal[3]},[pTmp],step ;// pSrcLeft[3*leftStep] + VLD1 {dLeftVal[4]},[pSrcLeft],step ;// pSrcLeft[4*leftStep] + VLD1 {dLeftVal[5]},[pTmp],step ;// pSrcLeft[5*leftStep] + VLD1 {dLeftVal[6]},[pSrcLeft],step ;// pSrcLeft[6*leftStep] + VLD1 {dLeftVal[7]},[pTmp] ;// pSrcLeft[7*leftStep] + + TST availability, #OMX_VC_UPPER + BEQ DCChroma8x8LeftOnlyAvailable + + ;// Load Upper Edge also + VLD1 dAboveVal,[pSrcAbove] ;// pSrcAbove[0 to 7] + + MOV return, #OMX_Sts_NoErr ;// returnNoError + + VPADDL dSumAboveValU16, dAboveVal ;// pSrcAbove[ 6+7 | 4+5 | 2+3 | 0+1 ] + VPADDL dSumAboveValU32, dSumAboveValU16 ;// pSrcAbove[ 4+5+6+7 | 0+1+2+3 ] + + VPADDL dSumLeftValU16, dLeftVal ;// pSrcLeft[ 6+7 | 4+5 | 2+3 | 0+1 ] + VPADDL dSumLeftValU32, dSumLeftValU16 ;// pSrcLeft[ 4+5+6+7 | 0+1+2+3 ] + + VADD dSumAboveLeft,dSumAboveValU32,dSumLeftValU32 + VRSHR dSumAboveLeft,dSumAboveLeft,#3 ;// Sum = (Sum + 4) >> 3 + VRSHR dSumAboveValU32,dSumAboveValU32,#2 ;// Sum = (Sum + 2) >> 2 + VRSHR dSumLeftValU32,dSumLeftValU32,#2 ;// Sum = (Sum + 2) >> 2 + + VMOV dIndexRow0U8,#0x0c + VMOV dIndexRow4U8,#0x04 + VSHL dIndexRow0,dIndexRow0,#32 ;// index0 = 0x0c0c0c0c00000000 + VSHR dIndexRow4,dIndexRow4,#32 ;// index4 = 0x0000000004040404 + VADD dIndexRow4U8,dIndexRow4U8,dIndexRow0U8 ;// index4 = 0x0c0c0c0c04040404 + VTBL dDstRow0,{dSumAboveLeftU8,dSumAboveValU8},dIndexRow0U8 + VTBL dDstRow4,{dSumLeftValU8,dSumAboveLeftU8},dIndexRow4U8 + +DCChroma8x8LeftStore + ADD pTmp, pDst, dstStep + ADD step, dstStep, dstStep + + VST1 dDstRow0,[pDst],step ;// pDst[0*dstStep+x] :0<= x <= 7 + VST1 dDstRow0,[pTmp],step ;// pDst[1*dstStep+x] :0<= x <= 7 + VST1 dDstRow0,[pDst],step ;// pDst[2*dstStep+x] :0<= x <= 7 + VST1 dDstRow0,[pTmp],step ;// pDst[3*dstStep+x] :0<= x <= 7 + VST1 dDstRow4,[pDst],step ;// pDst[4*dstStep+x] :0<= x <= 7 + VST1 dDstRow4,[pTmp],step ;// pDst[5*dstStep+x] :0<= x <= 7 + VST1 dDstRow4,[pDst],step ;// pDst[6*dstStep+x] :0<= x <= 7 + VST1 dDstRow4,[pTmp] ;// pDst[7*dstStep+x] :0<= x <= 7 + + M_EXIT + + +DCChroma8x8LeftOnlyAvailable + + MOV return, #OMX_Sts_NoErr + + VPADDL dSumLeftValU16, dLeftVal ;// pSrcLeft[ 6+7 | 4+5 | 2+3 | 0+1 ] + VPADDL dSumLeftValU32, dSumLeftValU16 ;// pSrcLeft[ 4+5+6+7 | 0+1+2+3 ] + VRSHR dSumLeftValU32,dSumLeftValU32,#2 ;// Sum = (Sum + 2) >> 2 + + VDUP dDstRow0,dSumLeftValU8[0] + VDUP dDstRow4,dSumLeftValU8[4] + + B DCChroma8x8LeftStore + + +DCChroma8x8LeftNotAvailable + + TST availability, #OMX_VC_UPPER + BEQ DCChroma8x8NoneAvailable + + ;// Load Upper Edge + VLD1 dAboveVal,[pSrcAbove] ;// pSrcAbove[0 to 7] + MOV return, #OMX_Sts_NoErr ;// returnNoError + + VPADDL dSumAboveValU16, dAboveVal ;// pSrcAbove[ 6+7 | 4+5 | 2+3 | 0+1 ] + VPADDL dSumAboveValU32, dSumAboveValU16 ;// pSrcAbove[ 4+5+6+7 | 0+1+2+3 ] + VRSHR dSumAboveValU32,dSumAboveValU32,#2 ;// Sum = (Sum + 2) >> 2 + VMOV dIndexRow0U8,#0x04 + VSHL dIndexRow0,dIndexRow0,#32 ;// index = 0x0404040400000000 + VTBL dDstRow0,{dSumAboveValU8},dIndexRow0U8 + + B DCChroma8x8UpperStore + + +DCChroma8x8NoneAvailable + + VMOV dConst128U8,#0x80 ;// 0x8080808080808080 if(count == 0) + MOV return, #OMX_Sts_NoErr ;// returnNoError + +DCChroma8x8UpperStore + + ADD pTmp, pDst, dstStep + ADD step, dstStep, dstStep + + VST1 dDstRow0,[pDst],step ;// pDst[0*dstStep+x] :0<= x <= 7 + VST1 dDstRow0,[pTmp],step ;// pDst[1*dstStep+x] :0<= x <= 7 + VST1 dDstRow0,[pDst],step ;// pDst[2*dstStep+x] :0<= x <= 7 + VST1 dDstRow0,[pTmp],step ;// pDst[3*dstStep+x] :0<= x <= 7 + VST1 dDstRow0,[pDst],step ;// pDst[4*dstStep+x] :0<= x <= 7 + VST1 dDstRow0,[pTmp],step ;// pDst[5*dstStep+x] :0<= x <= 7 + VST1 dDstRow0,[pDst],step ;// pDst[6*dstStep+x] :0<= x <= 7 + VST1 dDstRow0,[pTmp] ;// pDst[7*dstStep+x] :0<= x <= 7 + + M_EXIT + + +OMX_VC_CHROMA_VERT + + VLD1 dAboveVal,[pSrcAbove] ;// pSrcAbove[x] :0<= x <= 7 + MOV return, #OMX_Sts_NoErr + + B DCChroma8x8UpperStore + + +OMX_VC_CHROMA_HOR + + ADD pTmp, pSrcLeft, leftStep + ADD step, leftStep, leftStep + + VLD1 {dLeftVal0[]},[pSrcLeft],step ;// pSrcLeft[0*leftStep] + VLD1 {dLeftVal1[]},[pTmp],step ;// pSrcLeft[1*leftStep] + VLD1 {dLeftVal2[]},[pSrcLeft],step ;// pSrcLeft[2*leftStep] + VLD1 {dLeftVal3[]},[pTmp],step ;// pSrcLeft[3*leftStep] + VLD1 {dLeftVal4[]},[pSrcLeft],step ;// pSrcLeft[4*leftStep] + VLD1 {dLeftVal5[]},[pTmp],step ;// pSrcLeft[5*leftStep] + VLD1 {dLeftVal6[]},[pSrcLeft],step ;// pSrcLeft[6*leftStep] + VLD1 {dLeftVal7[]},[pTmp] ;// pSrcLeft[7*leftStep] + + B DCChroma8x8PlaneStore + + +OMX_VC_CHROMA_PLANE + ADD pTmp, pSrcLeft, leftStep + ADD step, leftStep, leftStep + + VLD1 dAboveVal,[pSrcAbove] ;// pSrcAbove[x] :0<= x <= 7 + VLD1 dAboveLeftVal[0],[pSrcAboveLeft] + + VLD1 {dLeftVal[0]},[pSrcLeft],step ;// pSrcLeft[0*leftStep] + VLD1 {dLeftVal[1]},[pTmp],step ;// pSrcLeft[1*leftStep] + VLD1 {dLeftVal[2]},[pSrcLeft],step ;// pSrcLeft[2*leftStep] + VLD1 {dLeftVal[3]},[pTmp],step ;// pSrcLeft[3*leftStep] + VLD1 {dLeftVal[4]},[pSrcLeft],step ;// pSrcLeft[4*leftStep] + VLD1 {dLeftVal[5]},[pTmp],step ;// pSrcLeft[5*leftStep] + VLD1 {dLeftVal[6]},[pSrcLeft],step ;// pSrcLeft[6*leftStep] + VLD1 {dLeftVal[7]},[pTmp] ;// pSrcLeft[7*leftStep] + + + VREV64 dRevAboveVal,dAboveVal ;// Reverse order of bytes = pSrcAbove[0:1:2:3:4:5:6:7] + VSUBL qAbove7minus0,dRevAboveVal,dAboveLeftVal ;// qAbove7minus0[0] = pSrcAbove[7] - pSrcAboveLeft[0] + VSHR dRevAboveValU64,dRevAboveValU64,#8 ;// pSrcAbove[X:0:1:2:3:4:5:6] + VSUBL qAboveDiff,dRevAboveVal,dAboveVal ;// pSrcAbove[6] - pSrcAbove[0] + ;// pSrcAbove[5] - pSrcAbove[1] + ;// pSrcAbove[4] - pSrcAbove[2] + + VREV64 dRevLeftVal,dLeftVal ;// Reverse order of bytes = pSrcLeft[0:1:2:3:4:5:6:7] + VSUBL qLeft7minus0,dRevLeftVal,dAboveLeftVal ;// qAbove7minus0[0] = pSrcLeft[7] - pSrcAboveLeft[0] + VSHR dRevLeftValU64,dRevLeftValU64,#8 ;// pSrcLeft[X:0:1:2:3:4:5:6] + VSUBL qLeftDiff,dRevLeftVal,dLeftVal ;// pSrcLeft[6] - pSrcLeft[0] + ;// pSrcLeft[5] - pSrcLeft[1] + ;// pSrcLeft[4] - pSrcLeft[2] + + LDR pMultiplierTable,=armVCM4P10_MultiplierTableChroma8x8 ;// Used to calculate Hval & Vval + VSHL dAboveDiff0U64,dAboveDiff0U64,#16 + VEXT dDiffAboveU8,dAboveDiff0U8,dAbove7minus0U8,#2 ;// pSrcAbove[ 7-0 | 4-2 | 5-1 | 6-0 ] + VLD1 dMultiplier,[pMultiplierTable]! + VSHL dLeftDiff0U64,dLeftDiff0U64,#16 + VEXT dDiffLeftU8,dLeftDiff0U8,dLeft7minus0U8,#2 ;// pSrcLeft[ 7-0 | 4-2 | 5-1 | 6-0 ] + + + VMUL dHorPred,dDiffAboveS16,dMultiplier ;// pSrcAbove[ 4*(7-0) | 1*(4-2) | 2*(5-1) | 3*(6-0) ] + VMUL dVerPred,dDiffLeftS16,dMultiplier + VPADD dHVValS16,dHorPred,dVerPred + + + VPADDL dHVValS32,dHVValS16 ;// [V|H] in 32 bits each + VSHL dHVTempS32,dHVValS32,#4 ;// 17*H = 16*H + H = (H<<4)+H + VADD dHVValS32,dHVValS32,dHVTempS32 ;// [ 17*V | 17*H ]in 32 bits each + VLD1 {dMultiplier0,dMultiplier1},[pMultiplierTable] ;// qMultiplier = [ 4|3|2|1|0|-1|-2|-3 ] + VRSHR dHVValS32,dHVValS32,#5 ;// [c|b] in 16bits each + VADDL qA,dAboveVal,dLeftVal + VDUP qA,qA[7] + VSHL qA,qA,#4 ;// [a|a|a|a|a|a|a|a] + VDUP qB,dHVValS16[0] ;// [b|b|b|b|b|b|b|b] + VDUP qC,dHVValS16[2] ;// [c|c|c|c|c|c|c|c] + + + VMUL qB,qB,qMultiplier + VMUL qC,qC,qMultiplier + VADD qB,qB,qA + + VDUP qC0,qC[0] + VDUP qC1,qC[1] + VDUP qC2,qC[2] + VDUP qC3,qC[3] + VDUP qC4,qC[4] + VDUP qC5,qC[5] + VDUP qC6,qC[6] + VDUP qC7,qC[7] + + VADD qSum0,qB,qC0 + VADD qSum1,qB,qC1 + VADD qSum2,qB,qC2 + VADD qSum3,qB,qC3 + VADD qSum4,qB,qC4 + VADD qSum5,qB,qC5 + VADD qSum6,qB,qC6 + VADD qSum7,qB,qC7 + + VQRSHRUN dSum0,qSum0,#5 ;// (OMX_U8)armClip(0,255,(Sum+16)>>5) + VQRSHRUN dSum1,qSum1,#5 + VQRSHRUN dSum2,qSum2,#5 + VQRSHRUN dSum3,qSum3,#5 + VQRSHRUN dSum4,qSum4,#5 + VQRSHRUN dSum5,qSum5,#5 + VQRSHRUN dSum6,qSum6,#5 + VQRSHRUN dSum7,qSum7,#5 + +DCChroma8x8PlaneStore + ADD pTmp, pDst, dstStep + ADD step, dstStep, dstStep + + VST1 dSum0,[pDst],step ;// pDst[0*dstStep+x] :0<= x <= 7 + VST1 dSum1,[pTmp],step ;// pDst[1*dstStep+x] :0<= x <= 7 + VST1 dSum2,[pDst],step ;// pDst[2*dstStep+x] :0<= x <= 7 + VST1 dSum3,[pTmp],step ;// pDst[3*dstStep+x] :0<= x <= 7 + VST1 dSum4,[pDst],step ;// pDst[4*dstStep+x] :0<= x <= 7 + VST1 dSum5,[pTmp],step ;// pDst[5*dstStep+x] :0<= x <= 7 + VST1 dSum6,[pDst],step ;// pDst[6*dstStep+x] :0<= x <= 7 + VST1 dSum7,[pTmp] ;// pDst[7*dstStep+x] :0<= x <= 7 + + MOV return, #OMX_Sts_NoErr + M_END + + ENDIF ;// CortexA8 + + END +;//----------------------------------------------------------------------------------------------- +;// omxVCM4P10_PredictIntraChroma_8x8 ends +;//----------------------------------------------------------------------------------------------- diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_PredictIntra_16x16_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_PredictIntra_16x16_s.s new file mode 100755 index 0000000000000000000000000000000000000000..e9c0eee49b62fe4797c303bb37df568cc57b9b9a --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_PredictIntra_16x16_s.s @@ -0,0 +1,424 @@ +;// +;// +;// File Name: omxVCM4P10_PredictIntra_16x16_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 12290 +;// Date: Wednesday, April 9, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS CortexA8 + + +;//------------------------------------------------------- +;// This table for implementing switch case of C in asm by +;// the mehtod of two levels of indexing. +;//------------------------------------------------------- + + M_TABLE armVCM4P10_pIndexTable16x16 + DCD OMX_VC_16X16_VERT, OMX_VC_16X16_HOR + DCD OMX_VC_16X16_DC, OMX_VC_16X16_PLANE + + + IF CortexA8 + + M_TABLE armVCM4P10_MultiplierTable16x16,1 + DCW 7, 6, 5, 4, 3, 2, 1, 8 + DCW 0, 1, 2, 3, 4, 5, 6, 7 + DCW 8, 9, 10, 11, 12, 13, 14, 15 + +;//-------------------------------------------- +;// Constants +;//-------------------------------------------- +BLK_SIZE EQU 0x10 +MUL_CONST0 EQU 0x01010101 +MUL_CONST1 EQU 0x00060004 +MUL_CONST2 EQU 0x00070005 +MUL_CONST3 EQU 0x00030001 +MASK_CONST EQU 0x00FF00FF + +;//-------------------------------------------- +;// Scratch variable +;//-------------------------------------------- +y RN 12 +pc RN 15 + +return RN 0 +pTable RN 9 +count RN 11 +pMultTable RN 9 +; ---------------------------------------------- +; Neon registers +; ---------------------------------------------- +qAbove QN Q0.U8 +qLeft QN Q1.U8 +qSum8 QN Q0.U16 +dSum80 DN D0.U16 +dSum81 DN D1.U16 +dSum4 DN D0.U16 +dSum2 DN D0.U32 +dSum1 DN D0.U64 +qOut QN Q3.U8 +dSumLeft DN D6.U64 +dSumAbove DN D7.U64 +dSum DN D8.U64 +dSum0 DN D8.U8[0] + +qH QN Q11.S32 +qV QN Q12.S32 +qA QN Q11.S16 +qB QN Q6.S16 +qC QN Q7.S16 + +qB0 QN Q5.S16 +qB1 QN Q6.S16 +dA1 DN D23.S16 + +dH0 DN D22.S32 +dH1 DN D23.S32 +dV0 DN D24.S32 +dV1 DN D25.S32 + +qHV QN Q11.S64 +qHV0 QN Q11.S32 +qHV1 QN Q12.S64 + +dHV00 DN D22.S32 +dHV01 DN D23.S32 + +dHV0 DN D22.S16[0] +dHV1 DN D23.S16[0] +dHV10 DN D24.S64 +dHV11 DN D25.S64 + +qSum0 QN Q0.S16 +qSum1 QN Q1.S16 + +dOut0 DN D6.U8 +dOut1 DN D7.U8 + +dLeft0 DN D2.U8 +dLeft1 DN D3.U8 +qConst QN Q13.S16 + +dAbove0 DN D0.U8 +dAbove1 DN D1.U8 + +dRevLeft64 DN D12.U64 +dRevLeft DN D12.U8 +dRevAbove64 DN D5.U64 +dRevAbove DN D5.U8 +qLeftDiff QN Q8.S16 +dLeftDiff1 DN D17.S16 +dLeftDiff64 DN D17.S64 +qDiffLeft QN Q8.S16 +qDiffAbove QN Q4.S16 +dAboveDiff1 DN D9.S16 +dAboveDiff64 DN D9.S64 +qAboveDiff QN Q4.S16 + +dAboveLeft DN D4.U8 + +dDiffLeft0 DN D16.S16 +dDiffLeft1 DN D17.S16 +dDiffAbove0 DN D8.S16 +dDiffAbove1 DN D9.S16 + +qLeft15minus0 QN Q7.S16 +dLeft15minus0 DN D14.S16 +qAbove15minus0 QN Q3.S16 +dAbove15minus0 DN D6.S16 + +qMultiplier QN Q10.S16 +qMultiplier0 QN Q10.S16 +qMultiplier1 QN Q12.S16 +dMultiplier0 DN D20.S16 +dMultiplier1 DN D21.S16 + +dBPlusCMult7 DN D1.S64 +dBPlusCMult7S16 DN D1.S16 + +qTmp QN Q0.U8 + +;//-------------------------------------------- +;// Declare input registers +;//-------------------------------------------- +pSrcLeft RN 0 ;// input pointer +pSrcAbove RN 1 ;// input pointer +pSrcAboveLeft RN 2 ;// input pointer +pDst RN 3 ;// output pointer +leftStep RN 4 ;// input variable +dstStep RN 5 ;// input variable +predMode RN 6 ;// input variable +availability RN 7 ;// input variable + +pTmp RN 8 +step RN 10 +pTmp2 RN 11 + +;//----------------------------------------------------------------------------------------------- +;// omxVCM4P10_PredictIntra_16x16 starts +;//----------------------------------------------------------------------------------------------- + + ;// Write function header + M_START omxVCM4P10_PredictIntra_16x16, r11, d15 + + ;// Define stack arguments + M_ARG LeftStep, 4 + M_ARG DstStep, 4 + M_ARG PredMode, 4 + M_ARG Availability, 4 + + ;// M_STALL ARM1136JS=4 + + LDR pTable,=armVCM4P10_pIndexTable16x16 ;// Load index table for switch case + + ;// Load argument from the stack + M_LDR predMode, PredMode ;// Arg predMode loaded from stack to reg + M_LDR leftStep, LeftStep ;// Arg leftStep loaded from stack to reg + M_LDR dstStep, DstStep ;// Arg dstStep loaded from stack to reg + M_LDR availability, Availability ;// Arg availability loaded from stack to reg + + MOV y, #BLK_SIZE ;// Outer Loop Count + LDR pc, [pTable, predMode, LSL #2] ;// Branch to the case based on preMode + +OMX_VC_16X16_VERT + VLD1 qAbove, [pSrcAbove] + ADD pTmp, pDst, dstStep + ADD step, dstStep, dstStep + VST1 qAbove, [pDst], step + VST1 qAbove, [pTmp], step + VST1 qAbove, [pDst], step + VST1 qAbove, [pTmp], step + VST1 qAbove, [pDst], step + VST1 qAbove, [pTmp], step + VST1 qAbove, [pDst], step + VST1 qAbove, [pTmp], step + VST1 qAbove, [pDst], step + VST1 qAbove, [pTmp], step + VST1 qAbove, [pDst], step + VST1 qAbove, [pTmp], step + VST1 qAbove, [pDst], step + VST1 qAbove, [pTmp], step + VST1 qAbove, [pDst] + VST1 qAbove, [pTmp] + MOV return, #OMX_Sts_NoErr ;// returnNoError + M_EXIT + +OMX_VC_16X16_HOR + ADD pTmp, pSrcLeft, leftStep + ADD leftStep, leftStep, leftStep + ADD pTmp2, pDst, dstStep + ADD dstStep, dstStep, dstStep +LoopHor + VLD1 {qLeft[]}, [pSrcLeft], leftStep + VLD1 {qTmp[]}, [pTmp], leftStep + SUBS y, y, #8 + VST1 qLeft, [pDst], dstStep + VST1 qTmp, [pTmp2], dstStep + VLD1 {qLeft[]}, [pSrcLeft], leftStep + VLD1 {qTmp[]}, [pTmp], leftStep + VST1 qLeft, [pDst], dstStep + VST1 qTmp, [pTmp2], dstStep + VLD1 {qLeft[]}, [pSrcLeft], leftStep + VLD1 {qTmp[]}, [pTmp], leftStep + VST1 qLeft, [pDst], dstStep + VST1 qTmp, [pTmp2], dstStep + VLD1 {qLeft[]}, [pSrcLeft], leftStep + VLD1 {qTmp[]}, [pTmp], leftStep + VST1 qLeft, [pDst], dstStep + VST1 qTmp, [pTmp2], dstStep + + BNE LoopHor ;// Loop for 16 times + MOV return, #OMX_Sts_NoErr + M_EXIT + +OMX_VC_16X16_DC + MOV count, #0 ;// count = 0 + TST availability, #OMX_VC_LEFT + BEQ UpperOrNoneAvailable ;// Jump to Upper if not left + + ADD pTmp, pSrcLeft, leftStep + ADD step, leftStep, leftStep + + VLD1 {qLeft[0]}, [pSrcLeft],step + VLD1 {qLeft[1]}, [pTmp],step + VLD1 {qLeft[2]}, [pSrcLeft],step + VLD1 {qLeft[3]}, [pTmp],step + VLD1 {qLeft[4]}, [pSrcLeft],step + VLD1 {qLeft[5]}, [pTmp],step + VLD1 {qLeft[6]}, [pSrcLeft],step + VLD1 {qLeft[7]}, [pTmp],step + VLD1 {qLeft[8]}, [pSrcLeft],step + VLD1 {qLeft[9]}, [pTmp],step + VLD1 {qLeft[10]},[pSrcLeft],step + VLD1 {qLeft[11]},[pTmp],step + VLD1 {qLeft[12]},[pSrcLeft],step + VLD1 {qLeft[13]},[pTmp],step + VLD1 {qLeft[14]},[pSrcLeft],step + VLD1 {qLeft[15]},[pTmp] + + VPADDL qSum8, qLeft + ADD count, count, #1 + VPADD dSum4, dSum80, dSum81 + VPADDL dSum2, dSum4 + VPADDL dSumLeft, dSum2 + VRSHR dSum, dSumLeft, #4 + +UpperOrNoneAvailable + TST availability, #OMX_VC_UPPER ;// if(availability & #OMX_VC_UPPER) + BEQ BothOrNoneAvailable ;// Jump to Left if not upper + VLD1 qAbove, [pSrcAbove] + ADD count, count, #1 ;// if upper inc count by 1 + VPADDL qSum8, qAbove + VPADD dSum4, dSum80, dSum81 + VPADDL dSum2, dSum4 + VPADDL dSumAbove, dSum2 + VRSHR dSum, dSumAbove, #4 + +BothOrNoneAvailable + CMP count, #2 ;// check if both available + BNE NoneAvailable + VADD dSum, dSumAbove, dSumLeft + VRSHR dSum, dSum, #5 + + +NoneAvailable + VDUP qOut, dSum0 + CMP count, #0 ;// check if none available + ADD pTmp, pDst, dstStep + ADD step, dstStep, dstStep + BNE LoopDC + VMOV qOut, #128 +LoopDC + VST1 qOut, [pDst], step + VST1 qOut, [pTmp], step + VST1 qOut, [pDst], step + VST1 qOut, [pTmp], step + VST1 qOut, [pDst], step + VST1 qOut, [pTmp], step + VST1 qOut, [pDst], step + VST1 qOut, [pTmp], step + VST1 qOut, [pDst], step + VST1 qOut, [pTmp], step + VST1 qOut, [pDst], step + VST1 qOut, [pTmp], step + VST1 qOut, [pDst], step + VST1 qOut, [pTmp], step + VST1 qOut, [pDst], step + VST1 qOut, [pTmp], step + MOV return, #OMX_Sts_NoErr + M_EXIT + +OMX_VC_16X16_PLANE + LDR pMultTable, =armVCM4P10_MultiplierTable16x16 + VLD1 qAbove, [pSrcAbove] ;// pSrcAbove[x] :0<= x <= 7 + VLD1 dAboveLeft[0],[pSrcAboveLeft] + ADD pTmp, pSrcLeft, leftStep + ADD step, leftStep, leftStep + VLD1 {qLeft[0]}, [pSrcLeft],step + VLD1 {qLeft[1]}, [pTmp],step + VLD1 {qLeft[2]}, [pSrcLeft],step + VLD1 {qLeft[3]}, [pTmp],step + VLD1 {qLeft[4]}, [pSrcLeft],step + VLD1 {qLeft[5]}, [pTmp],step + VLD1 {qLeft[6]}, [pSrcLeft],step + VLD1 {qLeft[7]}, [pTmp],step + VLD1 {qLeft[8]}, [pSrcLeft],step + VLD1 {qLeft[9]}, [pTmp],step + VLD1 {qLeft[10]}, [pSrcLeft],step + VLD1 {qLeft[11]}, [pTmp],step + VLD1 {qLeft[12]}, [pSrcLeft],step + VLD1 {qLeft[13]}, [pTmp],step + VLD1 {qLeft[14]}, [pSrcLeft],step + VLD1 {qLeft[15]}, [pTmp] + + VREV64 dRevAbove, dAbove1 ;// pSrcAbove[15:14:13:12:11:10:9:8] + VSUBL qAbove15minus0, dRevAbove, dAboveLeft ;// qAbove7minus0[0] = pSrcAbove[15] - pSrcAboveLeft[0] + VSHR dRevAbove64, dRevAbove64, #8 ;// pSrcAbove[14:13:12:11:10:9:8:X] + VSUBL qAboveDiff, dRevAbove, dAbove0 + + VSHL dAboveDiff64, dAboveDiff64, #16 + VEXT dDiffAbove1, dAboveDiff1, dAbove15minus0, #1 + + VREV64 dRevLeft,dLeft1 ;// pSrcLeft[15:14:13:12:11:10:9:8] + VSUBL qLeft15minus0,dRevLeft, dAboveLeft ;// qAbove7minus0[0] = pSrcLeft[7] - pSrcAboveLeft[0] + VSHR dRevLeft64, dRevLeft64, #8 ;// pSrcLeft[14:13:12:11:10:9:8:X] + VSUBL qLeftDiff,dRevLeft, dLeft0 + + ;// Multiplier = [8|1|2|...|6|7] + VLD1 qMultiplier, [pMultTable]! + + VSHL dLeftDiff64, dLeftDiff64, #16 + VEXT dDiffLeft1, dLeftDiff1, dLeft15minus0, #1 + + VMULL qH,dDiffAbove0, dMultiplier0 + VMULL qV,dDiffLeft0, dMultiplier0 + VMLAL qH,dDiffAbove1, dMultiplier1 + VMLAL qV,dDiffLeft1, dMultiplier1 + + VPADD dHV00,dH1,dH0 + VPADD dHV01,dV1,dV0 + VPADDL qHV, qHV0 + VSHL qHV1,qHV,#2 + VADD qHV,qHV,qHV1 + + ;// HV = [c = ((5*V+32)>>6) | b = ((5*H+32)>>6)] + VRSHR qHV,qHV,#6 + + ;// HV1 = [c*7|b*7] + VSHL qHV1,qHV,#3 + VSUB qHV1,qHV1,qHV + + ;// Multiplier1 = [0|1|2|...|7] + VLD1 qMultiplier0, [pMultTable]! + VDUP qB, dHV0 + VDUP qC, dHV1 + + VADDL qA,dAbove1,dLeft1 + VSHL qA,qA, #4 + VDUP qA,dA1[3] + VADD dBPlusCMult7, dHV10, dHV11 + + ;// Multiplier1 = [8|9|10|...|15] + VLD1 qMultiplier1, [pMultTable] + ;// Const = a - 7*(b+c) + VDUP qConst, dBPlusCMult7S16[0] + VSUB qConst, qA, qConst + + ;// B0 = [0*b|1*b|2*b|3*b|......|7*b] + VMUL qB0,qB,qMultiplier0 + + ;// B0 = [8*b|9*b|10*b|11*b|....|15*b] + VMUL qB1,qB,qMultiplier1 + + VADD qSum0, qB0, qConst + VADD qSum1, qB1, qConst + + ;// Loops for 16 times +LoopPlane + ;// (b*x + c*y + C)>>5 + VQRSHRUN dOut0, qSum0,#5 + VQRSHRUN dOut1, qSum1,#5 + SUBS y, y, #1 + VST1 qOut,[pDst],dstStep + VADD qSum0,qSum0,qC + VADD qSum1,qSum1,qC + BNE LoopPlane + + MOV return, #OMX_Sts_NoErr + + M_END + + ENDIF ;// CortexA8 + + END +;----------------------------------------------------------------------------------------------- +; omxVCM4P10_PredictIntra_16x16 ends +;----------------------------------------------------------------------------------------------- diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_PredictIntra_4x4_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_PredictIntra_4x4_s.s new file mode 100755 index 0000000000000000000000000000000000000000..39eb8a40d100ebaed6505d039ba46f6457ace542 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_PredictIntra_4x4_s.s @@ -0,0 +1,531 @@ +;// +;// +;// File Name: omxVCM4P10_PredictIntra_4x4_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 12290 +;// Date: Wednesday, April 9, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + +;// Define the processor variants supported by this file + + M_VARIANTS CortexA8 + +;//------------------------------------------------------- +;// This table for implementing switch case of C in asm by +;// the mehtod of two levels of indexing. +;//------------------------------------------------------- + + M_TABLE armVCM4P10_pSwitchTable4x4 + DCD OMX_VC_4x4_VERT, OMX_VC_4x4_HOR + DCD OMX_VC_4x4_DC, OMX_VC_4x4_DIAG_DL + DCD OMX_VC_4x4_DIAG_DR, OMX_VC_4x4_VR + DCD OMX_VC_4x4_HD, OMX_VC_4x4_VL + DCD OMX_VC_4x4_HU + + + IF CortexA8 + +;//-------------------------------------------- +;// Scratch variable +;//-------------------------------------------- +return RN 0 +pTable RN 8 +pc RN 15 + +;//-------------------------------------------- +;// Declare input registers +;//-------------------------------------------- +pSrcLeft RN 0 ;// input pointer +pSrcAbove RN 1 ;// input pointer +pSrcAboveLeft RN 2 ;// input pointer +pDst RN 3 ;// output pointer +leftStep RN 4 ;// input variable +dstStep RN 5 ;// input variable +predMode RN 6 ;// input variable +availability RN 7 ;// input variable +pDst1 RN 1 +pDst2 RN 4 +pDst3 RN 6 + +pSrcTmp RN 9 +srcStep RN 10 +pDstTmp RN 11 +dstep RN 12 + +;//------------------- +;// Neon registers +;//------------------- + +;// OMX_VC_CHROMA_VERT +dAboveU32 DN D0.U32 + +;// OMX_VC_CHROMA_HOR +dLeftVal0 DN D0.8 +dLeftVal1 DN D1.8 +dLeftVal2 DN D2.8 +dLeftVal3 DN D3.8 +dLeftVal0U32 DN D0.U32 +dLeftVal1U32 DN D1.U32 +dLeftVal2U32 DN D2.U32 +dLeftVal3U32 DN D3.U32 + +;// OMX_VC_4x4_DC +dLeftVal DN D0.U8 +dLeftValU32 DN D0.U32 +dSumAboveLeftU16 DN D1.U16 +dSumAboveLeftU32 DN D1.U32 +dSumAboveLeftU64 DN D1.U64 +dSumAboveLeftU8 DN D1.U8 +dSum DN D0.U8 + +dSumLeftValU16 DN D1.U16 +dSumLeftValU32 DN D1.U32 +dSumLeftValU64 DN D1.U64 +dSumLeftValU8 DN D1.U8 + +dAboveVal DN D0.U8 +dSumAboveValU16 DN D1.U16 +dSumAboveValU32 DN D1.U32 +dSumAboveValU64 DN D1.U64 +dSumAboveValU8 DN D1.U8 +dConst128U8 DN D0.U8 + + +;//OMX_VC_4x4_DIAG_DL + +dAbove DN D0.U8 +dU7 DN D2.U8 +dU3 DN D2.U8 +dAbove0 DN D3.U8 +dAbove1 DN D4.U8 +dAbove2 DN D5.U8 +dTmp DN D6.U8 +dTmp0 DN D7.U8 +dTmp1 DN D8.U8 +dTmp2 DN D9.U8 +dTmp3 DN D10.U8 +dTmpU32 DN D6.U32 + + +;//OMX_VC_4x4_DIAG_DR +dLeft DN D1.U8 +dUL DN D2.U8 + +;//OMX_VC_4x4_VR +dLeft0 DN D1.U8 +dLeft1 DN D2.U8 +dEven0 DN D3.U8 +dEven1 DN D4.U8 +dEven2 DN D5.U8 +dOdd0 DN D6.U8 +dOdd1 DN D11.U8 +dOdd2 DN D12.U8 +dTmp3U32 DN D10.U32 +dTmp2U32 DN D9.U32 + + +;//OMX_VC_4x4_HD +dTmp1U64 DN D8.U64 +dTmp0U64 DN D7.U64 +dTmpU64 DN D6.U64 +dTmpU32 DN D6.U32 +dTmp1U32 DN D8.U32 + +;//OMX_VC_4x4_HU +dL3 DN D2.U8 +dLeftHU0 DN D3.U8 +dLeftHU1 DN D4.U8 +dLeftHU2 DN D5.U8 +dTmp0U32 DN D7.U32 + + + + +;//----------------------------------------------------------------------------------------------- +;// omxVCM4P10_PredictIntra_4x4 starts +;//----------------------------------------------------------------------------------------------- + + ;// Write function header + M_START omxVCM4P10_PredictIntra_4x4, r12,d12 + + ;// Define stack arguments + M_ARG LeftStep, 4 + M_ARG DstStep, 4 + M_ARG PredMode, 4 + M_ARG Availability, 4 + + + LDR pTable,=armVCM4P10_pSwitchTable4x4 ;// Load index table for switch case + + ;// Load argument from the stack + M_LDRD predMode,availability,PredMode ;// Arg predMode & availability loaded from stack to reg + M_LDRD leftStep,dstStep,LeftStep ;// Arg leftStep & dstStep loaded from stack to reg + + + LDR pc, [pTable, predMode, LSL #2] ;// Branch to the case based on preMode + + +OMX_VC_4x4_HOR + + ADD pSrcTmp, pSrcLeft, leftStep + ADD srcStep, leftStep, leftStep + ;// Load Left Edge + VLD1 {dLeftVal0[]},[pSrcLeft],srcStep ;// pSrcLeft[0*leftStep] + VLD1 {dLeftVal1[]},[pSrcTmp],srcStep ;// pSrcLeft[1*leftStep] + VLD1 {dLeftVal2[]},[pSrcLeft] ;// pSrcLeft[2*leftStep] + VLD1 {dLeftVal3[]},[pSrcTmp] ;// pSrcLeft[3*leftStep] + + ADD pDstTmp, pDst, dstStep + ADD dstep, dstStep, dstStep + + VST1 dLeftVal0U32[0],[pDst],dstep ;// pDst[0*dstStep+x] :0<= x <= 7 + VST1 dLeftVal1U32[0],[pDstTmp],dstep ;// pDst[1*dstStep+x] :0<= x <= 7 + VST1 dLeftVal2U32[0],[pDst] ;// pDst[2*dstStep+x] :0<= x <= 7 + VST1 dLeftVal3U32[0],[pDstTmp] ;// pDst[3*dstStep+x] :0<= x <= 7 + + B ExitPredict4x4 ;// Branch to exit code + +OMX_VC_4x4_VERT + + ;// Load Upper Edge + VLD1 dAboveU32[0],[pSrcAbove] + ADD pDstTmp, pDst, dstStep + ADD dstep, dstStep, dstStep + +DCPredict4x4VertStore + + VST1 dAboveU32[0],[pDst],dstep + VST1 dAboveU32[0],[pDstTmp],dstep + VST1 dAboveU32[0],[pDst] + VST1 dAboveU32[0],[pDstTmp] + + B ExitPredict4x4 ;// Branch to exit code + +OMX_VC_4x4_DC + + + TST availability, #OMX_VC_LEFT + BEQ DCPredict4x4LeftNotAvailable + + ADD pSrcTmp, pSrcLeft, leftStep + ADD srcStep, leftStep, leftStep + ;// Load Left Edge + VLD1 {dLeftVal[0]},[pSrcLeft],srcStep ;// pSrcLeft[0*leftStep] + VLD1 {dLeftVal[1]},[pSrcTmp],srcStep ;// pSrcLeft[1*leftStep] + VLD1 {dLeftVal[2]},[pSrcLeft] ;// pSrcLeft[2*leftStep] + VLD1 {dLeftVal[3]},[pSrcTmp] ;// pSrcLeft[3*leftStep] + + TST availability, #OMX_VC_UPPER + BEQ DCPredict4x4LeftOnlyAvailable + + ;// Load Upper Edge also + VLD1 dLeftValU32[1],[pSrcAbove] ;// pSrcAbove[0 to 3] + MOV return, #OMX_Sts_NoErr + + VPADDL dSumAboveLeftU16, dLeftVal ;// [pSrcAbove[2+3 | 0+1] | pSrcLeft[2+3 | 0+1]] + VPADDL dSumAboveLeftU32, dSumAboveLeftU16 ;// [pSrcAbove[2+3+0+1] | pSrcLeft[2+3+0+1]] + VPADDL dSumAboveLeftU64, dSumAboveLeftU32 ;// [pSrcAbove[2+3+0+1] + pSrcLeft[2+3+0+1]] + VRSHR dSumAboveLeftU64,dSumAboveLeftU64,#3 ;// Sum = (Sum + 4) >> 3 + ADD pDstTmp, pDst, dstStep + ADD dstep, dstStep, dstStep + VDUP dSum,dSumAboveLeftU8[0] + + B DCPredict4x4VertStore + +DCPredict4x4LeftOnlyAvailable + + MOV return, #OMX_Sts_NoErr ;// returnNoError + + VPADDL dSumLeftValU16, dLeftVal ;// [ XX | pSrcLeft[2+3 | 0+1]] + VPADDL dSumLeftValU32, dSumLeftValU16 ;// [ XXXX | pSrcLeft[2+3+0+1]] + + VRSHR dSumLeftValU32,dSumLeftValU32,#2 ;// Sum = (Sum + 2) >> 2 + ADD pDstTmp, pDst, dstStep + ADD dstep, dstStep, dstStep + VDUP dSum,dSumLeftValU8[0] + + B DCPredict4x4VertStore + +DCPredict4x4LeftNotAvailable + + TST availability, #OMX_VC_UPPER + BEQ DCPredict4x4NoneAvailable + + ;// Load Upper Edge + VLD1 dAboveU32[0],[pSrcAbove] ;// pSrcAbove[0 to 3] + MOV return, #OMX_Sts_NoErr + + VPADDL dSumAboveValU16, dAboveVal ;// [ XX | pSrcAbove[2+3 | 0+1]] + VPADDL dSumAboveValU32, dSumAboveValU16 ;// [ XXXX | pSrcAbove[2+3+0+1]] + + VRSHR dSumAboveValU32,dSumAboveValU32,#2 ;// Sum = (Sum + 2) >> 2 + ADD pDstTmp, pDst, dstStep + ADD dstep, dstStep, dstStep + VDUP dSum,dSumAboveValU8[0] + + B DCPredict4x4VertStore + +DCPredict4x4NoneAvailable + + VMOV dConst128U8,#0x80 ;// 0x8080808080808080 if(count == 0) + MOV return, #OMX_Sts_NoErr + + ADD pDstTmp, pDst, dstStep + ADD dstep, dstStep, dstStep + B DCPredict4x4VertStore + + + +OMX_VC_4x4_DIAG_DL + + TST availability, #OMX_VC_UPPER_RIGHT + BEQ DiagDLUpperRightNotAvailable + + VLD1 dAbove0,[pSrcAbove] ;// [U7|U6|U5|U4|U3|U2|U1|U0] + VDUP dU7, dAbove0[7] ;// [U7|U7|U7|U7|U7|U7|U7|U7] + VEXT dAbove1, dAbove0, dU7, #1 ;// [U7|U7|U6|U5|U4|U3|U2|U1] + VEXT dAbove2, dAbove0, dU7, #2 ;// [U7|U7|U7|U6|U5|U4|U3|U2] + B DiagDLPredict4x4Store + +DiagDLUpperRightNotAvailable + VLD1 dAboveU32[1],[pSrcAbove] ;// [U3|U2|U1|U0|-|-|-|-] + VDUP dU3, dAbove[7] ;// [U3 U3 U3 U3 U3 U3 U3 U3] + + VEXT dAbove0, dAbove, dU3, #4 ;// [U3 U3 U3 U3 U3 U2 U1 U0] + VEXT dAbove1, dAbove, dU3, #5 ;// [U3 U3 U3 U3 U3 U3 U2 U1] + VEXT dAbove2, dAbove, dU3, #6 ;// [U3 U3 U3 U3 U3 U3 U3 U2] + +DiagDLPredict4x4Store + + VHADD dTmp, dAbove0, dAbove2 + VRHADD dTmp, dTmp, dAbove1 ;// (a+2*b+c+2)>>2 + + + VST1 dTmpU32[0],[pDst],dstStep + VEXT dTmp,dTmp,dTmp,#1 + VST1 dTmpU32[0],[pDst],dstStep + VEXT dTmp,dTmp,dTmp,#1 + VST1 dTmpU32[0],[pDst],dstStep + VEXT dTmp,dTmp,dTmp,#1 + VST1 dTmpU32[0],[pDst] + + B ExitPredict4x4 ;// Branch to exit code + + +OMX_VC_4x4_DIAG_DR + + + ;// Load U0,U1,U2,U3 + + VLD1 dAboveU32[0],[pSrcAbove] ;// [X|X|X|X|U3|U2|U1|U0] + + ;// Load UL,L0,L1,L2,L3 ;// dLeft = [UL|L0|L1|L2|L3|X|X|X] + VLD1 {dLeft[7]},[pSrcAboveLeft] + ADD pSrcTmp, pSrcLeft, leftStep + ADD srcStep, leftStep, leftStep + ADD pDst1,pDst,dstStep + + VLD1 {dLeft[6]},[pSrcLeft],srcStep ;// pSrcLeft[0*leftStep] + VLD1 {dLeft[5]},[pSrcTmp],srcStep ;// pSrcLeft[1*leftStep] + VLD1 {dLeft[4]},[pSrcLeft] ;// pSrcLeft[2*leftStep] + VLD1 {dLeft[3]},[pSrcTmp] ;// pSrcLeft[3*leftStep] + + + VEXT dAbove0,dLeft,dAbove,#3 ;// [U2|U1|U0|UL|L0|L1|L2|L3] + ADD pDst2,pDst1,dstStep + VEXT dAbove1,dLeft,dAbove,#4 ;// [U3|U2|U1|U0|UL|L0|L1|L2] + ADD pDst3,pDst2,dstStep + VEXT dAbove2,dLeft,dAbove,#5 ;// [ X|U3|U2|U1|U0|UL|L0|L1] + + VHADD dTmp, dAbove0, dAbove2 + VRHADD dTmp, dTmp, dAbove1 ;// (a+2*b+c+2)>>2 + + + VST1 dTmpU32[0],[pDst3] ;// Store pTmp[0],[1],[2],[3] @ pDst3 + VEXT dTmp,dTmp,dTmp,#1 + VST1 dTmpU32[0],[pDst2] ;// Store pTmp[1],[2],[3],[4] @ pDst2 + VEXT dTmp,dTmp,dTmp,#1 + VST1 dTmpU32[0],[pDst1] ;// Store pTmp[2],[3],[4],[5] @ pDst1 + VEXT dTmp,dTmp,dTmp,#1 + VST1 dTmpU32[0],[pDst] ;// Store pTmp[3],[4],[5],[6] @ pDst + + B ExitPredict4x4 ;// Branch to exit code + +OMX_VC_4x4_VR + + + ;// Load UL,U0,U1,U2,U3 + VLD1 dAboveU32[0],[pSrcAbove] + VLD1 dAbove[7],[pSrcAboveLeft] ;// [UL|X|X|X|U3|U2|U1|U0] + + ;// Load L0,L1,L2 ;// dLeft0 = [L0|L2|X|X|X|X|X|X] + ;// dLeft1 = [L1| X|X|X|X|X|X|X] + VLD1 {dLeft0[7]},[pSrcLeft],leftStep ;// pSrcLeft[0*leftStep] + VLD1 {dLeft1[7]},[pSrcLeft],leftStep ;// pSrcLeft[1*leftStep] + VLD1 {dLeft0[6]},[pSrcLeft] ;// pSrcLeft[2*leftStep] + + + VEXT dOdd2,dAbove,dAbove,#7 ;// [ x x x U3 U2 U1 U0 UL ] + VEXT dEven0,dLeft0,dOdd2,#6 ;// [ x x x U1 U0 UL L0 L2 ] + VEXT dEven1,dLeft1,dOdd2,#7 ;// [ x x x U2 U1 U0 UL L1 ] + VEXT dEven2,dLeft0,dAbove,#7 ;// [ x x x U3 U2 U1 U0 L0 ] + VEXT dOdd0,dLeft1,dAbove,#7 ;// [ x x x U3 U2 U1 U0 L1 ] + VEXT dOdd1,dLeft0,dOdd2,#7 ;// [ x x x U2 U1 U0 UL L0 ] + + VHADD dTmp1, dOdd0, dOdd2 + VRHADD dTmp1, dTmp1, dOdd1 ;// Tmp[ x x x 9 7 5 3 1 ] + + VHADD dTmp0, dEven0, dEven2 + VRHADD dTmp0, dTmp0, dEven1 ;// Tmp[ x x x 8 6 4 2 0 ] + + + VEXT dTmp3,dTmp1,dTmp1,#1 ;// Tmp[ x x x x 9 7 5 3 ] + ADD pDstTmp, pDst, dstStep + ADD dstep, dstStep, dstStep + VEXT dTmp2,dTmp0,dTmp0,#1 ;// Tmp[ x x x x 8 6 4 2 ] + + + VST1 dTmp3U32[0],[pDst],dstep ;// Tmp[9],[7],[5],[3] + VST1 dTmp2U32[0],[pDstTmp],dstep ;// Tmp[8],[6],[4],[2] + VST1 dTmp1U32[0],[pDst],dstep ;// Tmp[7],[5],[3],[1] + VST1 dTmp0U32[0],[pDstTmp] ;// Tmp[6],[4],[2],[0] + + B ExitPredict4x4 ;// Branch to exit code + +OMX_VC_4x4_HD + + + ;// Load U0,U1,U2,U3 + VLD1 dAbove,[pSrcAbove] ;//dAboveLeftVal = [U7|U6|U5|U4|U3|U2|U1|U0] + + ;// Load UL,L0,L1,L2,L3 ;// dLeft = [UL|L0|L1|L2|L3|X|X|X] + VLD1 {dLeft[7]},[pSrcAboveLeft] + ADD pSrcTmp, pSrcLeft, leftStep + ADD srcStep, leftStep, leftStep + + VLD1 {dLeft[6]},[pSrcLeft],srcStep ;// pSrcLeft[0*leftStep] + VLD1 {dLeft[5]},[pSrcTmp],srcStep ;// pSrcLeft[1*leftStep] + VLD1 {dLeft[4]},[pSrcLeft] ;// pSrcLeft[2*leftStep] + VLD1 {dLeft[3]},[pSrcTmp] ;// pSrcLeft[3*leftStep] + + VEXT dAbove0,dLeft,dAbove,#3 ;// [ U2|U1|U0|UL|L0|L1|L2|L3 ] + VEXT dAbove1,dLeft,dAbove,#2 ;// [ U1|U0|UL|L0|L1|L2|L3|X ] + VEXT dAbove2,dLeft,dAbove,#1 ;// [ U0|UL|L0|L1|L2|L3|X|X ] + + VHADD dTmp0, dAbove0, dAbove2 + VRHADD dTmp0, dTmp0, dAbove1 ;// Tmp[ 0 | 1 | 2 | 4 | 6 | 8 | X | X ] + + + VRHADD dTmp1, dAbove1, dAbove0 ;// (a+b+1)>>1 + VSHL dTmp1U64,dTmp1U64,#24 ;// Tmp[ 3|5| 7 |9 | X | X | X | X ] + + + VSHL dTmpU64,dTmp0U64,#16 ;// Tmp[ 2|4|6|8| X | X | X | X ] + VZIP dTmp1,dTmp ;// dTmp = [ 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 ] + VEXT dTmp0,dTmp0,dTmp0,#6 ;// Tmp[ X| X| X| X| X| X| 0 | 1 ] + VEXT dTmp1,dTmp,dTmp0,#2 ;// Tmp[ 0 | 1 | 2 | 3 | 4 | 5 | 6 |7 ] + + ADD pDstTmp, pDst, dstStep + ADD dstep, dstStep, dstStep + + VST1 dTmp1U32[1],[pDst],dstep ;// Store pTmp[0|1|2|3] + VST1 dTmpU32[1],[pDstTmp],dstep ;// Store pTmp[2|3|4|5] + VST1 dTmp1U32[0],[pDst] ;// Store pTmp[4|5|6|7] + VST1 dTmpU32[0],[pDstTmp] ;// Store pTmp[6|7|8|9] + + B ExitPredict4x4 ;// Branch to exit code + +OMX_VC_4x4_VL + + + TST availability, #OMX_VC_UPPER_RIGHT + BEQ DiagVLUpperRightNotAvailable + + VLD1 dAbove0,[pSrcAbove] ;// [U7|U6|U5|U4|U3|U2|U1|U0] + VEXT dAbove1,dAbove0,dAbove0,#1 ;// [ X|U7|U6|U5|U4|U3|U2|U1] + VEXT dAbove2,dAbove1,dAbove1,#1 ;// [ X| X|U7|U6|U5|U4|U3|U2] + + B DiagVLPredict4x4Store + +DiagVLUpperRightNotAvailable + VLD1 dAboveU32[1],[pSrcAbove] ;// [U3|U2|U1|U0|-|-|-|-] + VDUP dU3, dAbove[7] ;// [U3 U3 U3 U3 U3 U3 U3 U3] + + VEXT dAbove0, dAbove, dU3, #4 ;// [U3 U3 U3 U3 U3 U2 U1 U0] + VEXT dAbove1, dAbove, dU3, #5 ;// [U3 U3 U3 U3 U3 U3 U2 U1] + VEXT dAbove2, dAbove, dU3, #6 ;// [U3 U3 U3 U3 U3 U3 U3 U2] + +DiagVLPredict4x4Store + + VRHADD dTmp0, dAbove1, dAbove0 ;// (a+b+1)>>1 + ;// Tmp[ X| X| X| 8| 6| 4| 2| 0 ] + + VHADD dTmp3, dAbove0, dAbove2 + VRHADD dTmp3, dTmp3, dAbove1 ;// (a+2*b+c+2)>>2 + ;// Tmp[ X| X| X| 9| 7| 5| 3| 1 ] + + VEXT dTmp1,dTmp0,dTmp0,#1 ;// Tmp[ X| X| X| X| 8| 6| 4| 2 ] + ADD pDstTmp, pDst, dstStep + ADD dstep, dstStep, dstStep + VEXT dTmp2,dTmp3,dTmp1,#1 ;// Tmp[ X| X| X| X| 9| 7| 5| 3 ] + + VST1 dTmp0U32[0],[pDst],dstep ;// Tmp[6],[4],[2],[0] + VST1 dTmp3U32[0],[pDstTmp],dstep ;// Tmp[7],[5],[3],[1] + VST1 dTmp1U32[0],[pDst] ;// Tmp[8],[6],[4],[2] + VST1 dTmp2U32[0],[pDstTmp] ;// Tmp[9],[7],[5],[3] + + B ExitPredict4x4 ;// Branch to exit code + +OMX_VC_4x4_HU + ADD pSrcTmp, pSrcLeft, leftStep + ADD srcStep, leftStep, leftStep + + ;// Load Left Edge ;// [L3|L2|L1|L0|X|X|X|X] + VLD1 {dLeft[4]},[pSrcLeft],srcStep ;// pSrcLeft[0*leftStep] + VLD1 {dLeft[5]},[pSrcTmp],srcStep ;// pSrcLeft[1*leftStep] + VLD1 {dLeft[6]},[pSrcLeft] ;// pSrcLeft[2*leftStep] + VLD1 {dLeft[7]},[pSrcTmp] ;// pSrcLeft[3*leftStep] + + VDUP dL3,dLeft[7] ;// [L3|L3|L3|L3|L3|L3|L3|L3] + + VEXT dLeftHU0,dLeft,dL3,#4 ;// [L3|L3|L3|L3|L3|L2|L1|L0] + VEXT dLeftHU1,dLeft,dL3,#5 ;// [L3|L3|L3|L3|L3|L3|L2|L1] + VEXT dLeftHU2,dLeft,dL3,#6 ;// [L3|L3|L3|L3|L3|L3|L3|L2] + + VHADD dTmp0, dLeftHU0, dLeftHU2 + VRHADD dTmp0, dTmp0, dLeftHU1 ;// Tmp[ L3 | L3 | L3 | L3 | L3 | 5 | 3 | 1 ] + + VRHADD dTmp1, dLeftHU1, dLeftHU0 ;// (a+b+1)>>1 + ;// Tmp[ L3 | L3 | L3 | L3 | L3 | 4 | 2 | 0 ] + + VZIP dTmp1,dTmp0 ;// dTmp1 = Tmp[7| 6| 5| 4| 3| 2| 1| 0] + ;// dTmp0 = [L3|L3|L3|L3|L3|L3|L3|L3] + + + VST1 dTmp1U32[0],[pDst],dstStep ;// [3|2|1|0] + VEXT dTmp1,dTmp1,dTmp1,#2 + VST1 dTmp1U32[0],[pDst],dstStep ;// [5|4|3|2] + VEXT dTmp1,dTmp1,dTmp1,#2 + VST1 dTmp1U32[0],[pDst],dstStep ;// [7|6|5|4] + VST1 dTmp0U32[0],[pDst] ;// [9|8|7|6] + + +ExitPredict4x4 + + MOV return, #OMX_Sts_NoErr + M_END + + ENDIF ;// CortexA8 + + END +;//----------------------------------------------------------------------------------------------- +;// omxVCM4P10_PredictIntra_4x4 ends +;//----------------------------------------------------------------------------------------------- diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair_s.s new file mode 100755 index 0000000000000000000000000000000000000000..e3943397c9a0702149343a18f3103f8033d5db36 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair_s.s @@ -0,0 +1,140 @@ +;// +;// +;// File Name: omxVCM4P10_TransformDequantChromaDCFromPair_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 12290 +;// Date: Wednesday, April 9, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + IMPORT armVCM4P10_QPDivTable + IMPORT armVCM4P10_VMatrixQPModTable + + M_VARIANTS CortexA8 + + + + + IF CortexA8 + +;// ARM Registers +;//-------------------------------------- +;// Declare input registers +;//-------------------------------------- +ppSrc RN 0 +pDst RN 1 +QP RN 2 + +;//-------------------------------- +;// Scratch variable for Unpack2x2 +;//-------------------------------- +pSrc RN 9 +Value RN 4 +Value2 RN 5 +Flag RN 6 +strOffset RN 7 +cstOffset RN 8 + +;//-------------------------------- +;// Scratch variable +;//-------------------------------- +r0w0 RN 3 +r0w1 RN 4 + +c0w0 RN 5 +c1w0 RN 6 + +return RN 0 +pQPDivTable RN 5 +pQPModTable RN 6 +Shift RN 9 +Scale RN 2 + + + +;// Neon Registers + +dZero DN D0.U16 +dInvTrCoeff DN D0.S16 +dScale DN D1.S16 +qDqntCoeff QN Q1.S32 +dDqntCoeff DN D2.S16 + + + ;// Write function header + M_START omxVCM4P10_TransformDequantChromaDCFromPair, r9 + + LDR pSrc, [ppSrc] ;// Load pSrc + VMOV dZero, #0 + MOV cstOffset, #31 ;// To be used in the loop, to compute offset + + ;//----------------------------------------------------------------------- + ;// Firstly, fill all the coefficient values on the buffer by zero + ;//----------------------------------------------------------------------- + + VST1 dZero,[pDst] ;// pDst[0] = pDst[1] = pDst[2] = pDst[3] = 0 + LDRB Flag, [pSrc], #1 ;// Preload before + + +unpackLoop + TST Flag, #0x10 ;// Computing (Flag & 0x10) + LDRSBNE Value2,[pSrc,#1] + LDRBNE Value, [pSrc], #2 ;// Load byte wise to avoid unaligned access + AND strOffset, cstOffset, Flag, LSL #1 ;// strOffset = (Flag & 15) < 1; + LDRSBEQ Value, [pSrc], #1 ;// Value = (OMX_U8) *pSrc++ + ORRNE Value,Value,Value2, LSL #8 ;// Value = (OMX_U16) *pSrc++ + + TST Flag, #0x20 ;// Computing (Flag & 0x20) to check, if we're done + LDRBEQ Flag, [pSrc], #1 ;// Flag = (OMX_U8) *pSrc++, for next iteration + STRH Value, [pDst, strOffset] ;// Store at offset + BEQ unpackLoop ;// Branch to the loop beginning + + ;//-------------------------------------------------- + ;//InvTransformDC2x2: Inlined (Implemented in ARM V6) + ;//-------------------------------------------------- + + LDMIA pDst, {r0w0, r0w1} ;// r0w0 = |c1|c0| & r0w1 = |c3|c2| + + STR pSrc, [ppSrc] ;// Update the bitstream pointer + + LDR pQPDivTable, =armVCM4P10_QPDivTable ;// QP Division look-up-table base pointer + LDR pQPModTable, =armVCM4P10_VMatrixQPModTable ;// QP Modulo look-up-table base pointer + + SADDSUBX r0w0, r0w0, r0w0 ;// [ c00+c01, c00-c01 ] + SADDSUBX r0w1, r0w1, r0w1 ;// [ c10+c11, c10-c11 ] + + LDRSB Shift, [pQPDivTable, QP] ;// Shift = pQPDivTable[QP] + LDRSB Scale, [pQPModTable, QP] ;// Scale = pQPModTable[QP] + + SADD16 c0w0, r0w0, r0w1 ;// [ d00+d10, d01+d11 ] + SSUB16 c1w0, r0w0, r0w1 ;// [ d00-d10, d01-d11 ] + + ;//------------------------------------------------- + ;//DequantChromaDC2x2: Inlined (Neon Implementation) + ;//------------------------------------------------- + + LSL Scale, Scale, Shift ;// Scale = Scale << Shift + VMOV dInvTrCoeff, c0w0, c1w0 + VREV32 dInvTrCoeff,dInvTrCoeff + VDUP dScale,Scale + + VMULL qDqntCoeff,dInvTrCoeff,dScale + VSHRN dDqntCoeff,qDqntCoeff,#1 + + + VST1 dDqntCoeff,[pDst] ;// Storing all the coefficients at once + + MOV return, #OMX_Sts_NoErr + M_END + + ENDIF ;// CortexA8 + + + END diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair_s.s new file mode 100755 index 0000000000000000000000000000000000000000..25299595344e1ad73ad75e5d3da25ec21a83cf08 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair_s.s @@ -0,0 +1,264 @@ +;// +;// +;// File Name: omxVCM4P10_TransformDequantLumaDCFromPair_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 12290 +;// Date: Wednesday, April 9, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// +;// Description: +;// H.264 inverse quantize and transform module +;// +;// + +;// Include standard headers + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + +;// Import/Export symbols required from/to other files +;// (For example tables) + + IMPORT armVCM4P10_UnpackBlock4x4 + IMPORT armVCM4P10_QPDivTable + IMPORT armVCM4P10_VMatrixQPModTable + + M_VARIANTS CortexA8 + +;// Set debugging level +;//DEBUG_ON SETL {TRUE} + + +;// Static Function: armVCM4P10_InvTransformDequantLumaDC4x4 + + +;// Guarding implementation by the processor name + + + +;// Static Function: armVCM4P10_InvTransformDequantLumaDC4x4 + +;// Guarding implementation by the processor name + + IF CortexA8 + +;//Input Registers +pData RN 0 +QP RN 1 + + +;//Local Scratch Registers + +;// ARM Registers + +pQPDivTable RN 2 +pQPModTable RN 3 +Shift RN 4 +Scale RN 5 + +;// NEON Registers + +;// Packed Input pixels +dIn0 DN D0.S16 +dIn1 DN D1.S16 +dIn2 DN D2.S16 +dIn3 DN D3.S16 + +;// Intermediate calculations +dRowSum1 DN D4.S16 +dRowSum2 DN D5.S16 +dRowDiff1 DN D6.S16 +dRowDiff2 DN D7.S16 + +;// Row operated pixels +dRowOp0 DN D0.S16 +dRowOp1 DN D1.S16 +dRowOp2 DN D2.S16 +dRowOp3 DN D3.S16 +qRowOp01 QN Q0.32 +qRowOp23 QN Q1.32 + +;// Intermediate calculations +dColSum1 DN D4.S16 +dColSum2 DN D5.S16 +dColDiff1 DN D6.S16 +dColDiff2 DN D7.S16 + +;// Coloumn operated pixels +dColOp0 DN D0.S16 +dColOp1 DN D1.S16 +dColOp2 DN D2.S16 +dColOp3 DN D3.S16 + +;// Temporary scratch varaibles + +dScale DN D5.S16 +qRound0 QN Q3.S32 +qRound1 QN Q4.S32 +qRound2 QN Q5.S32 +qRound3 QN Q6.S32 + +;// InvTransformed and Dequantized pixels +dOut0 DN D0.S16 +dOut1 DN D1.S16 +dOut2 DN D2.S16 +dOut3 DN D3.S16 + + + ;// Allocate stack memory required by the function + + + ;// Write function header + M_START armVCM4P10_InvTransformDequantLumaDC4x4,r5,d13 + + ;****************************************************************** + ;// The strategy used in implementing the transform is as follows:* + ;// Load the 4x4 block into 4 D-registers * + ;// Transpose the 4x4 matrix * + ;// Perform the row operations (on columns) using SIMD * + ;// Transpose the 4x4 result matrix * + ;// Perform the coloumn operations * + ;****************************************************************** + + ;// Load all the 4x4 pixels in Transposed form + + VLD4 {dIn0,dIn1,dIn2,dIn3},[pData] + LDR pQPDivTable, =armVCM4P10_QPDivTable ;// QP Division look-up-table base pointer + LDR pQPModTable, =armVCM4P10_VMatrixQPModTable ;// QP Modulo look-up-table base pointer + + ;**************************************** + ;// Row Operations (Performed on columns) + ;**************************************** + ;// Scale factor calculation is done using ARM instructions + ;// Interleaved with NEON instructions inorder to Dual issue + + VADD dRowSum1,dIn0,dIn1 + VADD dRowSum2,dIn2,dIn3 + VSUB dRowDiff1,dIn0,dIn1 + LDRSB Shift, [pQPDivTable, QP] ;// ARM CODE: Shift = pQPDivTable[QP] + VSUB dRowDiff2,dIn2,dIn3 + LDRSB Scale, [pQPModTable, QP] ;// ARM CODE: Scale = pQPModTable[QP] + VADD dRowOp0,dRowSum1,dRowSum2 + VSUB dRowOp1,dRowSum1,dRowSum2 + VSUB dRowOp2,dRowDiff1,dRowDiff2 + LSL Scale, Scale, Shift ;// ARM CODE: Scale = Scale << Shift + VADD dRowOp3,dRowDiff1,dRowDiff2 + + ;**************************************** + ;// Transpose the resultant matrix + ;**************************************** + + VTRN dRowOp0,dRowOp1 + VTRN dRowOp2,dRowOp3 + VTRN qRowOp01,qRowOp23 + + ;**************************************** + ;// Coloumn Operations + ;**************************************** + + VADD dColSum1,dRowOp0,dRowOp1 + VADD dColSum2,dRowOp2,dRowOp3 + VSUB dColDiff1,dRowOp0,dRowOp1 + VSUB dColDiff2,dRowOp2,dRowOp3 + VADD dColOp0,dColSum1,dColSum2 + VSUB dColOp1,dColSum1,dColSum2 + VSUB dColOp2,dColDiff1,dColDiff2 + VADD dColOp3,dColDiff1,dColDiff2 + + ;//---------------------------------------------------------------------- + ;// + ;// improves on the c-reference code + ;// Both the cases i.e., Shift>=0 and Shift<0 cases are covered together + ;// We do not subtract 2 from Shift as in C reference, instead perform a + ;// Scale << Shift once in the beginning and do a right shift by a + ;// constant 2 after the Multiplication. The value of Round would be 2 + ;// + ;// By doing this we aviod the Branches required and also + ;// reduce the code size substantially + ;// + ;//---------------------------------------------------------------------- + + + VDUP dScale, Scale ;// ARM -> NEON copy 'scale' to vector + + + VMOV qRound0,#2 ;// Set the Round Value + VMOV qRound1,#2 + VMOV qRound2,#2 + VMOV qRound3,#2 + + VMLAL qRound0,dColOp0,dScale ;// pDst[i] * Scale + Round + VMLAL qRound1,dColOp1,dScale + VMLAL qRound2,dColOp2,dScale + VMLAL qRound3,dColOp3,dScale + + VSHRN dOut0,qRound0,#2 ;// Right shift by 2 & (OMX_S16)Value + VSHRN dOut1,qRound1,#2 + VSHRN dOut2,qRound2,#2 + VSHRN dOut3,qRound3,#2 + + ;*************************** + ;// Store all the 4x4 pixels + ;*************************** + + VST1 {dOut0,dOut1,dOut2,dOut3}, [pData] + + + ;// Set return value + + ;// Write function tail + M_END + + ENDIF ;//CORTEXA8 + + + +;// Function: omxVCM4P10_TransformDequantLumaDCFromPair + +;//Input Registers +ppSrc RN 0 +pDst RN 1 +QPR2 RN 2 + +;//Output Registers +result RN 0 + +;//Local Scratch Registers +pDstR4 RN 4 +pDstR0 RN 0 +QPR1 RN 1 +QPR5 RN 5 + +;// Guarding implementation by the processor name + + IF CortexA8 + + ;// Allocate stack memory required by the function + + + ;// Write function header + M_START omxVCM4P10_TransformDequantLumaDCFromPair,r5 + + MOV pDstR4,pDst ;// Saving register r1 + MOV QPR5,QPR2 ;// Saving register r2 + BL armVCM4P10_UnpackBlock4x4 + + MOV pDstR0,pDstR4 ;// Setting up register r0 + MOV QPR1,QPR5 ;// Setting up register r1 + BL armVCM4P10_InvTransformDequantLumaDC4x4 + + + ;// Set return value + MOV result,#OMX_Sts_NoErr + + ;// Write function tail + M_END + + + ENDIF ;//ARM1136JS + + + END \ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_Average_4x_Align_unsafe_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_Average_4x_Align_unsafe_s.S new file mode 100644 index 0000000000000000000000000000000000000000..aca2df4879f906b628746cd393f0007a02ebcd3a --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_Average_4x_Align_unsafe_s.S @@ -0,0 +1,134 @@ +/* + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + */ + + .eabi_attribute 24, 1 + .eabi_attribute 25, 1 + + .arm + .fpu neon + .text + + .global armVCM4P10_Average_4x4_Align0_unsafe + .func armVCM4P10_Average_4x4_Align0_unsafe +armVCM4P10_Average_4x4_Align0_unsafe: + PUSH {r4-r6,lr} + LDR r7, =0x80808080 + LDR r12,[r2,#0] + LDR r10,[r0],r1 + LDR lr,[r2,r3] + LDR r11,[r0],r1 + MVN r12,r12 + MVN lr,lr + UHSUB8 r5,r10,r12 + UHSUB8 r4,r11,lr + EOR r5,r5,r7 + STR r5,[r2],r3 + EOR r4,r4,r7 + STR r4,[r2],r3 + LDR r10,[r0],r1 + LDR r12,[r2,#0] + LDR r11,[r0],r1 + LDR lr,[r2,r3] + MVN r12,r12 + UHSUB8 r5,r10,r12 + MVN lr,lr + UHSUB8 r4,r11,lr + EOR r5,r5,r7 + STR r5,[r2],r3 + EOR r4,r4,r7 + STR r4,[r2],r3 + POP {r4-r6,pc} + .endfunc + + .global armVCM4P10_Average_4x4_Align2_unsafe + .func armVCM4P10_Average_4x4_Align2_unsafe +armVCM4P10_Average_4x4_Align2_unsafe: + PUSH {r4-r6,lr} + LDR r7, =0x80808080 + LDR r4,[r0,#4] + LDR r10,[r0],r1 + LDR r12,[r2,#0] + LDR lr,[r2,r3] + LDR r5,[r0,#4] + LDR r11,[r0],r1 + MVN r12,r12 + MVN lr,lr + LSR r10,r10,#16 + ORR r10,r10,r4,LSL #16 + LSR r11,r11,#16 + ORR r11,r11,r5,LSL #16 + UHSUB8 r5,r10,r12 + UHSUB8 r4,r11,lr + EOR r5,r5,r7 + STR r5,[r2],r3 + EOR r4,r4,r7 + STR r4,[r2],r3 + LDR r4,[r0,#4] + LDR r10,[r0],r1 + LDR r12,[r2,#0] + LDR lr,[r2,r3] + LDR r5,[r0,#4] + LDR r11,[r0],r1 + MVN r12,r12 + MVN lr,lr + LSR r10,r10,#16 + ORR r10,r10,r4,LSL #16 + LSR r11,r11,#16 + ORR r11,r11,r5,LSL #16 + UHSUB8 r5,r10,r12 + UHSUB8 r4,r11,lr + EOR r5,r5,r7 + STR r5,[r2],r3 + EOR r4,r4,r7 + STR r4,[r2],r3 + POP {r4-r6,pc} + .endfunc + + .global armVCM4P10_Average_4x4_Align3_unsafe + .func armVCM4P10_Average_4x4_Align3_unsafe +armVCM4P10_Average_4x4_Align3_unsafe: + PUSH {r4-r6,lr} + LDR r7, =0x80808080 + LDR r4,[r0,#4] + LDR r10,[r0],r1 + LDR r12,[r2,#0] + LDR lr,[r2,r3] + LDR r5,[r0,#4] + LDR r11,[r0],r1 + MVN r12,r12 + MVN lr,lr + LSR r10,r10,#24 + ORR r10,r10,r4,LSL #8 + LSR r11,r11,#24 + ORR r11,r11,r5,LSL #8 + UHSUB8 r5,r10,r12 + UHSUB8 r4,r11,lr + EOR r5,r5,r7 + STR r5,[r2],r3 + EOR r4,r4,r7 + STR r4,[r2],r3 + LDR r4,[r0,#4] + LDR r10,[r0],r1 + LDR r12,[r2,#0] + LDR lr,[r2,r3] + LDR r5,[r0,#4] + LDR r11,[r0],r1 + MVN r12,r12 + MVN lr,lr + LSR r10,r10,#24 + ORR r10,r10,r4,LSL #8 + LSR r11,r11,#24 + ORR r11,r11,r5,LSL #8 + UHSUB8 r5,r10,r12 + UHSUB8 r4,r11,lr + EOR r5,r5,r7 + STR r5,[r2],r3 + EOR r4,r4,r7 + STR r4,[r2],r3 + POP {r4-r6,pc} + .endfunc + + .end + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DeblockingChroma_unsafe_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DeblockingChroma_unsafe_s.S new file mode 100644 index 0000000000000000000000000000000000000000..b9ee2214ba16e70ec7c5941eac2c8e6f73da0057 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DeblockingChroma_unsafe_s.S @@ -0,0 +1,54 @@ +/* + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + */ + + .eabi_attribute 24, 1 + .eabi_attribute 25, 1 + + .arm + .fpu neon + .text + + .global armVCM4P10_DeblockingChromabSLT4_unsafe + .func armVCM4P10_DeblockingChromabSLT4_unsafe +armVCM4P10_DeblockingChromabSLT4_unsafe: + VLD1.32 {d18[0]},[r5]! + VSUBL.U8 q11,d5,d9 + VMOV d28,d18 + VSUBL.U8 q10,d8,d4 + VSHR.S16 q11,q11,#2 + VZIP.8 d18,d28 + VBIF d18,d14,d16 + VRHADD.S16 q10,q11,q10 + VADD.I8 d31,d18,d15 + VQMOVN.S16 d20,q10 + VLD1.8 {d0[]},[r2] + VMIN.S8 d20,d20,d31 + VNEG.S8 d31,d31 + VLD1.8 {d2[]},[r3] + VMAX.S8 d20,d20,d31 + VMOVL.U8 q14,d4 + VMOVL.U8 q12,d8 + VADDW.S8 q14,q14,d20 + VSUBW.S8 q12,q12,d20 + VQMOVUN.S16 d29,q14 + VQMOVUN.S16 d24,q12 + BX lr + .endfunc + + .global armVCM4P10_DeblockingChromabSGE4_unsafe + .func armVCM4P10_DeblockingChromabSGE4_unsafe +armVCM4P10_DeblockingChromabSGE4_unsafe: + VHADD.U8 d13,d4,d9 + VHADD.U8 d31,d8,d5 + VLD1.8 {d0[]},[r2] + ADD r5,r5,#4 + VLD1.8 {d2[]},[r3] + VRHADD.U8 d13,d13,d5 + VRHADD.U8 d31,d31,d9 + BX lr + .endfunc + + .end + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DeblockingLuma_unsafe_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DeblockingLuma_unsafe_s.S new file mode 100644 index 0000000000000000000000000000000000000000..47f3d44287ffd0f9660c22a45934e3e2f7c9476d --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DeblockingLuma_unsafe_s.S @@ -0,0 +1,102 @@ +/* + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + */ + + .eabi_attribute 24, 1 + .eabi_attribute 25, 1 + + .arm + .fpu neon + .text + + .global armVCM4P10_DeblockingLumabSLT4_unsafe + .func armVCM4P10_DeblockingLumabSLT4_unsafe +armVCM4P10_DeblockingLumabSLT4_unsafe: + VSUBL.U8 q11,d5,d9 + VLD1.8 {d18[]},[r5]! + VSUBL.U8 q10,d8,d4 + VLD1.8 {d19[]},[r5]! + VSHR.S16 q11,q11,#2 + VEXT.8 d18,d18,d19,#4 + VAND d19,d17,d15 + VBIF d18,d14,d16 + VRHADD.S16 q10,q11,q10 + VRHADD.U8 d24,d4,d8 + VADD.I8 d31,d18,d19 + VAND d19,d12,d15 + VQADD.U8 d23,d5,d18 + VQMOVN.S16 d20,q10 + VADD.I8 d31,d31,d19 + VQSUB.U8 d22,d5,d18 + VQADD.U8 d19,d9,d18 + VHADD.U8 d26,d24,d6 + VMIN.S8 d20,d20,d31 + VNEG.S8 d31,d31 + VQSUB.U8 d21,d9,d18 + VHADD.U8 d27,d24,d10 + VMAX.U8 d30,d26,d22 + VMAX.S8 d20,d20,d31 + VMOVL.U8 q14,d4 + VMOVL.U8 q12,d8 + VADDW.S8 q14,q14,d20 + VSUBW.S8 q12,q12,d20 + VQMOVUN.S16 d29,q14 + VQMOVUN.S16 d24,q12 + VMAX.U8 d25,d27,d21 + VMIN.U8 d30,d30,d23 + VMIN.U8 d25,d25,d19 + VBIF d29,d4,d16 + VBIF d30,d5,d17 + VBIF d24,d8,d16 + VBIF d25,d9,d12 + BX lr + .endfunc + + .global armVCM4P10_DeblockingLumabSGE4_unsafe + .func armVCM4P10_DeblockingLumabSGE4_unsafe +armVCM4P10_DeblockingLumabSGE4_unsafe: + VSHR.U8 d19,d0,#2 + VADD.I8 d19,d19,d15 + VADDL.U8 q10,d8,d4 + VADD.I8 d19,d19,d15 + VADDL.U8 q11,d6,d9 + VADDW.U8 q12,q10,d5 + VCGT.U8 d19,d19,d13 + VSHR.U16 q11,q11,#1 + VHADD.U16 q11,q12,q11 + VADDW.U8 q12,q12,d6 + VADDL.U8 q13,d7,d6 + VAND d17,d17,d19 + VHADD.U8 d28,d4,d9 + VSRA.U16 q13,q12,#1 + VAND d12,d12,d19 + VQRSHRN.U16 d29,q11,#1 + VRHADD.U8 d28,d28,d5 + VQRSHRN.U16 d30,q12,#2 + VADDL.U8 q11,d10,d5 + VADDW.U8 q12,q10,d9 + VBIF d29,d28,d17 + VQRSHRN.U16 d31,q13,#2 + VADDL.U8 q13,d11,d10 + VSHR.U16 q11,q11,#1 + VHADD.U16 q11,q12,q11 + VADDW.U8 q12,q12,d10 + VHADD.U8 d28,d8,d5 + VBIF d29,d4,d16 + VBIF d30,d5,d17 + VSRA.U16 q13,q12,#1 + VQRSHRN.U16 d25,q12,#2 + VQRSHRN.U16 d24,q11,#1 + VRHADD.U8 d22,d28,d9 + VBIF d25,d9,d12 + VBIF d31,d6,d17 + VBIF d24,d22,d12 + VQRSHRN.U16 d28,q13,#2 + VBIF d24,d8,d16 + VBIF d28,d10,d12 + BX lr + .endfunc + + .end + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DecodeCoeffsToPair_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DecodeCoeffsToPair_s.S new file mode 100644 index 0000000000000000000000000000000000000000..e68bd8ef3353c36d026400a5c32b2252342d315e --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DecodeCoeffsToPair_s.S @@ -0,0 +1,272 @@ +/* + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + */ + + .eabi_attribute 24, 1 + .eabi_attribute 25, 1 + + .arm + .fpu neon + .text + + .global armVCM4P10_DecodeCoeffsToPair + .func armVCM4P10_DecodeCoeffsToPair +armVCM4P10_DecodeCoeffsToPair: + PUSH {r4-r12,lr} + SUB sp,sp,#0x40 + LDR r10,[r0,#0] + LDR r12,[r1,#0] + LDR r6, =armVCM4P10_CAVLCCoeffTokenTables + LDR r4,[sp,#0x68] + LDRB r9,[r10,#2] + LDRB r8,[r10,#1] + LDRB r11,[r10],#3 + ADD r12,r12,#8 + LDR r6,[r6,r4,LSL #2] + ORR r9,r9,r8,LSL #8 + ORR r11,r9,r11,LSL #16 + LSLS r8,r11,r12 + MOVS r7,#0x1e + AND r7,r7,r8,LSR #27 + SUBS r12,r12,#8 +L0x44: + BCC L1 + LDRB r8,[r10],#1 +L1: + LDRH r7,[r6,r7] + ADDCC r12,r12,#8 + ADD r12,r12,#4 + ORRCS r11,r8,r11,LSL #8 + LSRS r8,r7,#1 + BCS L0x74 + LSLS r8,r11,r12 + SUBS r12,r12,#0xa + ADD r7,r7,r8,LSR #29 + BIC r7,r7,#1 + B L0x44 +L0x74: + SUB r12,r12,r7,LSR #13 + BIC r7,r8,#0xf000 + LSRS r5,r7,#2 + STRB r5,[r2,#0] + BEQ L0x344 + CMP r7,#0x44 + BGE L0x33c + STR r0,[sp,#0] + STR r1,[sp,#4] + STR r3,[sp,#8] + ANDS r1,r7,#3 + ADD r2,sp,#0xc + BEQ L0xd8 + MOV r0,r1 +L0xac: + LSLS r7,r11,r12 + SUBS r12,r12,#7 + BCC L2 + LDRB r8,[r10],#1 +L2: + ADDCC r12,r12,#8 + LSR r7,r7,#31 + ORRCS r11,r8,r11,LSL #8 + SUBS r0,r0,#1 + MOV r8,#1 + SUB r8,r8,r7,LSL #1 + STRH r8,[r2],#2 + BGT L0xac +L0xd8: + SUBS r0,r5,r1 + BEQ L0x1b8 + MOV r4,#1 + CMP r5,#0xa + MOVLE r4,#0 + CMP r1,#3 + MOVLT r1,#4 + MOVGE r1,#2 + MOVGE r4,#0 +L0xfc: + LSLS r7,r11,r12 + CLZ r7,r7 + ADD r12,r12,r7 + SUBS r12,r12,#7 + BCC L3 + LDRB r8,[r10],#1 + ORR r11,r8,r11,LSL #8 + SUBS r12,r12,#8 + BCC L3 + LDRB r8,[r10],#1 +L3: + ADDCC r12,r12,#8 + ORRCS r11,r8,r11,LSL #8 + CMP r7,#0x10 + BGE L0x33c + MOVS lr,r4 + TEQEQ r7,#0xe + MOVEQ lr,#4 + TEQ r7,#0xf + MOVEQ lr,#0xc + TEQEQ r4,#0 + ADDEQ r7,r7,#0xf + TEQ lr,#0 + BEQ L0x184 + LSL r3,r11,r12 + ADD r12,r12,lr + SUBS r12,r12,#8 + RSB r9,lr,#0x20 + BCC L4 + LDRB r8,[r10],#1 + ORR r11,r8,r11,LSL #8 + SUBS r12,r12,#8 + BCC L4 + LDRB r8,[r10],#1 +L4: + ADDCC r12,r12,#8 + LSR r3,r3,r9 + ORRCS r11,r8,r11,LSL #8 + LSL r7,r7,r4 + ADD r7,r3,r7 +L0x184: + ADD r7,r7,r1 + MOV r1,#2 + LSRS r8,r7,#1 + RSBCS r8,r8,#0 + STRH r8,[r2],#2 + LDR r9, =armVCM4P10_SuffixToLevel + LDRSB r8,[r9,r4] + TEQ r4,#0 + MOVEQ r4,#1 + CMP r7,r8 + ADDCS r4,r4,#1 + SUBS r0,r0,#1 + BGT L0xfc +L0x1b8: + LDR r8,[sp,#0x6c] + SUB r0,r5,#1 + SUBS r1,r8,r5 + ADD r4,sp,#0x2c + MOV lr,r5 + SUB lr,lr,#1 + BEQ L0x2b0 + TEQ r8,#4 + LDREQ r6, =(armVCM4P10_CAVLCTotalZeros2x2Tables - 4) + LDRNE r6, =(armVCM4P10_CAVLCTotalZeroTables - 4) + LDR r6,[r6,r5,LSL #2] + LSLS r8,r11,r12 + MOVS r7,#0x1e + AND r7,r7,r8,LSR #27 + SUBS r12,r12,#8 +L0x1f4: + BCC L5 + LDRB r8,[r10],#1 +L5: + LDRH r7,[r6,r7] + ADDCC r12,r12,#8 + ADD r12,r12,#4 + ORRCS r11,r8,r11,LSL #8 + LSRS r8,r7,#1 + BCS L0x224 + LSLS r8,r11,r12 + SUBS r12,r12,#0xa + ADD r7,r7,r8,LSR #29 + BIC r7,r7,#1 + B L0x1f4 +L0x224: + SUB r12,r12,r7,LSR #13 + BIC r7,r8,#0xf000 + CMP r7,#0x10 + BGE L0x33c + LDR r3, =(armVCM4P10_CAVLCRunBeforeTables - 4) + ADD r4,sp,#0x2c + MOVS r1,r7 + ADD lr,lr,r1 + BEQ L0x2b0 +L0x248: + SUBS r0,r0,#1 + LDR r6,[r3,r1,LSL #2] + BLT L0x2bc + LSLS r8,r11,r12 + MOVS r7,#0xe + AND r7,r7,r8,LSR #28 + SUBS r12,r12,#8 +L0x264: + BCC L6 + LDRB r8,[r10],#1 +L6: + LDRH r7,[r6,r7] + ADDCC r12,r12,#8 + ADD r12,r12,#3 + ORRCS r11,r8,r11,LSL #8 + LSRS r8,r7,#1 + BCS L0x294 + LSLS r8,r11,r12 + SUBS r12,r12,#9 + ADD r7,r7,r8,LSR #29 + BIC r7,r7,#1 + B L0x264 +L0x294: + SUB r12,r12,r7,LSR #13 + BIC r7,r8,#0xf000 + CMP r7,#0xf + BGE L0x33c + SUBS r1,r1,r7 + STRB r7,[r4],#1 + BGT L0x248 +L0x2b0: + SUBS r0,r0,#1 + BLT L7 + STRB r1,[r4],#1 +L7: + BGT L0x2b0 +L0x2bc: + STRB r1,[r4],#1 + LDR r8,[sp,#0x6c] + TEQ r8,#0xf + ADDEQ lr,lr,#1 + SUB r4,r4,r5 + SUB r2,r2,r5 + SUB r2,r2,r5 + LDR r3,[sp,#8] + LDR r0,[r3,#0] + TEQ r8,#4 + LDREQ r6, =armVCM4P10_ZigZag_2x2 + LDRNE r6, =armVCM4P10_ZigZag_4x4 +L0x2ec: + LDRB r9,[r4],#1 + LDRB r8,[r6,lr] + SUB lr,lr,#1 + SUB lr,lr,r9 + LDRSH r9,[r2],#2 + SUBS r5,r5,#1 + ORREQ r8,r8,#0x20 + ADD r1,r9,#0x80 + CMP r1,#0x100 + ORRCS r8,r8,#0x10 + TEQ r5,#0 + STRB r8,[r0],#1 + STRB r9,[r0],#1 + LSR r9,r9,#8 + BCC L8 + STRB r9,[r0],#1 +L8: + BNE L0x2ec + STR r0,[r3,#0] + LDR r0,[sp,#0] + LDR r1,[sp,#4] + B L0x344 +L0x33c: + MVN r0,#1 + B L0x35c +L0x344: + ADD r10,r10,r12,LSR #3 + AND r12,r12,#7 + SUB r10,r10,#4 + STR r12,[r1,#0] + STR r10,[r0,#0] + MOV r0,#0 +L0x35c: + ADD sp,sp,#0x40 + POP {r4-r12,pc} + .endfunc + + .end + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DequantTables_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DequantTables_s.S new file mode 100644 index 0000000000000000000000000000000000000000..44eb4286c5e6c8fce44519399fe91f3bae5f13f0 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DequantTables_s.S @@ -0,0 +1,103 @@ +/* + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + */ + + .eabi_attribute 24, 1 + .eabi_attribute 25, 1 + + .arm + .fpu neon + .section .rodata + .align 4 + + + .global armVCM4P10_QPDivTable + .global armVCM4P10_VMatrixQPModTable + .global armVCM4P10_PosToVCol4x4 + .global armVCM4P10_PosToVCol2x2 + .global armVCM4P10_VMatrix + .global armVCM4P10_QPModuloTable + .global armVCM4P10_VMatrixU16 + +armVCM4P10_PosToVCol4x4: + .byte 0, 2, 0, 2 + .byte 2, 1, 2, 1 + .byte 0, 2, 0, 2 + .byte 2, 1, 2, 1 + +armVCM4P10_PosToVCol2x2: + .byte 0, 2 + .byte 2, 1 + +armVCM4P10_VMatrix: + .byte 10, 16, 13 + .byte 11, 18, 14 + .byte 13, 20, 16 + .byte 14, 23, 18 + .byte 16, 25, 20 + .byte 18, 29, 23 + +;//------------------------------------------------------- +;// This table evaluates the expression [(INT)(QP/6)], +;// for values of QP from 0 to 51 (inclusive). +;//------------------------------------------------------- + +armVCM4P10_QPDivTable: + .byte 0, 0, 0, 0, 0, 0 + .byte 1, 1, 1, 1, 1, 1 + .byte 2, 2, 2, 2, 2, 2 + .byte 3, 3, 3, 3, 3, 3 + .byte 4, 4, 4, 4, 4, 4 + .byte 5, 5, 5, 5, 5, 5 + .byte 6, 6, 6, 6, 6, 6 + .byte 7, 7, 7, 7, 7, 7 + .byte 8, 8, 8, 8, 8, 8 + +;//---------------------------------------------------- +;// This table contains armVCM4P10_VMatrix[QP%6][0] entires, +;// for values of QP from 0 to 51 (inclusive). +;//---------------------------------------------------- + +armVCM4P10_VMatrixQPModTable: + .byte 10, 11, 13, 14, 16, 18 + .byte 10, 11, 13, 14, 16, 18 + .byte 10, 11, 13, 14, 16, 18 + .byte 10, 11, 13, 14, 16, 18 + .byte 10, 11, 13, 14, 16, 18 + .byte 10, 11, 13, 14, 16, 18 + .byte 10, 11, 13, 14, 16, 18 + .byte 10, 11, 13, 14, 16, 18 + .byte 10, 11, 13, 14, 16, 18 + +;//------------------------------------------------------- +;// This table evaluates the modulus expression [QP%6]*6, +;// for values of QP from 0 to 51 (inclusive). +;//------------------------------------------------------- + +armVCM4P10_QPModuloTable: + .byte 0, 6, 12, 18, 24, 30 + .byte 0, 6, 12, 18, 24, 30 + .byte 0, 6, 12, 18, 24, 30 + .byte 0, 6, 12, 18, 24, 30 + .byte 0, 6, 12, 18, 24, 30 + .byte 0, 6, 12, 18, 24, 30 + .byte 0, 6, 12, 18, 24, 30 + .byte 0, 6, 12, 18, 24, 30 + .byte 0, 6, 12, 18, 24, 30 + +;//------------------------------------------------------- +;// This table contains the invidual byte values stored as +;// halfwords. This avoids unpacking inside the function +;//------------------------------------------------------- + +armVCM4P10_VMatrixU16: + .hword 10, 16, 13 + .hword 11, 18, 14 + .hword 13, 20, 16 + .hword 14, 23, 18 + .hword 16, 25, 20 + .hword 18, 29, 23 + + .end + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_Align_unsafe_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_Align_unsafe_s.S new file mode 100644 index 0000000000000000000000000000000000000000..37bc69b60a00462eb0d0b7d78fea8606eb02a1bc --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_Align_unsafe_s.S @@ -0,0 +1,123 @@ +/* + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + */ + + .eabi_attribute 24, 1 + .eabi_attribute 25, 1 + + .arm + .fpu neon + .text + + .global armVCM4P10_InterpolateLuma_HorAlign9x_unsafe + .func armVCM4P10_InterpolateLuma_HorAlign9x_unsafe +armVCM4P10_InterpolateLuma_HorAlign9x_unsafe: + MOV r12,r8 + AND r7,r0,#3 + BIC r0,r0,#3 + ADD pc,pc,r7,LSL #2 + NOP + B Copy0toAligned + B Copy1toAligned + B Copy2toAligned + B Copy3toAligned +Copy0toAligned: + LDM r0,{r7,r10,r11} + SUBS r9,r9,#1 + ADD r0,r0,r1 + STM r8!,{r7,r10,r11} + BGT Copy0toAligned + B CopyEnd +Copy1toAligned: + LDM r0,{r7,r10,r11} + SUBS r9,r9,#1 + ADD r0,r0,r1 + LSR r7,r7,#8 + ORR r7,r7,r10,LSL #24 + LSR r10,r10,#8 + ORR r10,r10,r11,LSL #24 + LSR r11,r11,#8 + STM r8!,{r7,r10,r11} + BGT Copy1toAligned + B CopyEnd +Copy2toAligned: + LDM r0,{r7,r10,r11} + SUBS r9,r9,#1 + ADD r0,r0,r1 + LSR r7,r7,#16 + ORR r7,r7,r10,LSL #16 + LSR r10,r10,#16 + ORR r10,r10,r11,LSL #16 + LSR r11,r11,#16 + STM r8!,{r7,r10,r11} + BGT Copy2toAligned + B CopyEnd +Copy3toAligned: + LDM r0,{r7,r10,r11} + SUBS r9,r9,#1 + ADD r0,r0,r1 + LSR r7,r7,#24 + ORR r7,r7,r10,LSL #8 + LSR r10,r10,#24 + ORR r10,r10,r11,LSL #8 + LSR r11,r11,#24 + STM r8!,{r7,r10,r11} + BGT Copy3toAligned +CopyEnd: + MOV r0,r12 + MOV r1,#0xc + BX lr + .endfunc + + .global armVCM4P10_InterpolateLuma_VerAlign4x_unsafe + .func armVCM4P10_InterpolateLuma_VerAlign4x_unsafe +armVCM4P10_InterpolateLuma_VerAlign4x_unsafe: + AND r7,r0,#3 + BIC r0,r0,#3 + ADD pc,pc,r7,LSL #2 + NOP + B Copy0toVAligned + B Copy1toVAligned + B Copy2toVAligned + B Copy3toVAligned +Copy0toVAligned: + LDR r7,[r0],r1 + SUBS r9,r9,#1 + STR r7,[r8],#4 + BGT Copy0toVAligned + B CopyVEnd +Copy1toVAligned: + LDR r10,[r0,#4] + LDR r7,[r0],r1 + SUBS r9,r9,#1 + LSL r10,r10,#24 + ORR r7,r10,r7,LSR #8 + STR r7,[r8],#4 + BGT Copy1toVAligned + B CopyVEnd +Copy2toVAligned: + LDR r10,[r0,#4] + LDR r7,[r0],r1 + SUBS r9,r9,#1 + LSL r10,r10,#16 + ORR r7,r10,r7,LSR #16 + STR r7,[r8],#4 + BGT Copy2toVAligned + B CopyVEnd +Copy3toVAligned: + LDR r10,[r0,#4] + LDR r7,[r0],r1 + SUBS r9,r9,#1 + LSL r10,r10,#8 + ORR r7,r10,r7,LSR #24 + STR r7,[r8],#4 + BGT Copy3toVAligned +CopyVEnd: + SUB r0,r8,#0x1c + MOV r1,#4 + BX lr + .endfunc + + .end + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_Copy_unsafe_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_Copy_unsafe_s.S new file mode 100644 index 0000000000000000000000000000000000000000..fe92201ac57be0db0a6af5185fb78dec8ada2284 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_Copy_unsafe_s.S @@ -0,0 +1,105 @@ +/* + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + */ + + .eabi_attribute 24, 1 + .eabi_attribute 25, 1 + + .arm + .fpu neon + .text + + .global armVCM4P10_InterpolateLuma_Copy4x4_unsafe + .func armVCM4P10_InterpolateLuma_Copy4x4_unsafe +armVCM4P10_InterpolateLuma_Copy4x4_unsafe: + PUSH {r4-r6,lr} + AND r12,r0,#3 + BIC r0,r0,#3 + ADD pc,pc,r12,LSL #2 + NOP + B Copy4x4Align0 + B Copy4x4Align1 + B Copy4x4Align2 + B Copy4x4Align3 +Copy4x4Align0: + LDR r4,[r0],r1 + LDR r5,[r0],r1 + STR r4,[r2],r3 + LDR r8,[r0],r1 + STR r5,[r2],r3 + LDR r9,[r0],r1 + STR r8,[r2],r3 + STR r9,[r2],r3 + B Copy4x4End +Copy4x4Align1: + LDR r5,[r0,#4] + LDR r4,[r0],r1 + LDR r9,[r0,#4] + LDR r8,[r0],r1 + LSR r4,r4,#8 + ORR r4,r4,r5,LSL #24 + STR r4,[r2],r3 + LSR r8,r8,#8 + ORR r8,r8,r9,LSL #24 + LDR r5,[r0,#4] + LDR r4,[r0],r1 + STR r8,[r2],r3 + LDR r9,[r0,#4] + LDR r8,[r0],r1 + LSR r4,r4,#8 + ORR r4,r4,r5,LSL #24 + STR r4,[r2],r3 + LSR r8,r8,#8 + ORR r8,r8,r9,LSL #24 + STR r8,[r2],r3 + B Copy4x4End +Copy4x4Align2: + LDR r5,[r0,#4] + LDR r4,[r0],r1 + LDR r9,[r0,#4] + LDR r8,[r0],r1 + LSR r4,r4,#16 + ORR r4,r4,r5,LSL #16 + STR r4,[r2],r3 + LSR r8,r8,#16 + ORR r8,r8,r9,LSL #16 + STR r8,[r2],r3 + LDR r5,[r0,#4] + LDR r4,[r0],r1 + LDR r9,[r0,#4] + LDR r8,[r0],r1 + LSR r4,r4,#16 + ORR r4,r4,r5,LSL #16 + STR r4,[r2],r3 + LSR r8,r8,#16 + ORR r8,r8,r9,LSL #16 + STR r8,[r2],r3 + B Copy4x4End +Copy4x4Align3: + LDR r5,[r0,#4] + LDR r4,[r0],r1 + LDR r9,[r0,#4] + LDR r8,[r0],r1 + LSR r4,r4,#24 + ORR r4,r4,r5,LSL #8 + STR r4,[r2],r3 + LSR r8,r8,#24 + ORR r8,r8,r9,LSL #8 + STR r8,[r2],r3 + LDR r5,[r0,#4] + LDR r4,[r0],r1 + LDR r9,[r0,#4] + LDR r8,[r0],r1 + LSR r4,r4,#24 + ORR r4,r4,r5,LSL #8 + STR r4,[r2],r3 + LSR r8,r8,#24 + ORR r8,r8,r9,LSL #8 + STR r8,[r2],r3 +Copy4x4End: + POP {r4-r6,pc} + .endfunc + + .end + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.S new file mode 100644 index 0000000000000000000000000000000000000000..544abe82a28bdbc1a114cbd1412374b14a28c7a6 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.S @@ -0,0 +1,107 @@ +/* + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + */ + + .eabi_attribute 24, 1 + .eabi_attribute 25, 1 + + .arm + .fpu neon + .text + + .global armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe + .func armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe +armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe: + PUSH {r4-r6,lr} + MOV lr,#4 + LDR r6, =0xfe00fe0 + LDR r12, =0xff00ff +LoopStart1: + LDR r11,[r0,#0xc] + LDR r10,[r0,#8] + LDR r5,[r0,#4] + LDR r4,[r0],r1 + UQSUB16 r11,r11,r6 + UQSUB16 r10,r10,r6 + UQSUB16 r5,r5,r6 + UQSUB16 r4,r4,r6 + USAT16 r11,#13,r11 + USAT16 r10,#13,r10 + USAT16 r5,#13,r5 + USAT16 r4,#13,r4 + AND r11,r12,r11,LSR #5 + AND r10,r12,r10,LSR #5 + AND r5,r12,r5,LSR #5 + AND r4,r12,r4,LSR #5 + ORR r11,r10,r11,LSL #8 + ORR r10,r4,r5,LSL #8 + SUBS lr,lr,#1 + STRD r10,r11,[r7],#8 + BGT LoopStart1 + SUB r0,r7,#0x20 + MOV r1,#8 + POP {r4-r6,pc} + .endfunc + + .global armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe + .func armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe +armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe: + PUSH {r4-r6,lr} + LDR r6, =0xfe00fe0 + LDR r12, =0xff00ff + MOV lr,#2 +LoopStart: + LDR r11,[r0,#0xc] + LDR r10,[r0,#8] + LDR r5,[r0,#4] + LDR r4,[r0],r1 + UQSUB16 r11,r11,r6 + UQSUB16 r10,r10,r6 + UQSUB16 r5,r5,r6 + UQSUB16 r4,r4,r6 + USAT16 r11,#13,r11 + USAT16 r10,#13,r10 + USAT16 r5,#13,r5 + USAT16 r4,#13,r4 + AND r11,r12,r11,LSR #5 + AND r10,r12,r10,LSR #5 + AND r5,r12,r5,LSR #5 + AND r4,r12,r4,LSR #5 + ORR r11,r10,r11,LSL #8 + ORR r10,r4,r5,LSL #8 + PKHBT r4,r10,r11,LSL #16 + STR r4,[r7],#8 + PKHTB r5,r11,r10,ASR #16 + STR r5,[r7],#-4 + LDR r11,[r0,#0xc] + LDR r10,[r0,#8] + LDR r5,[r0,#4] + LDR r4,[r0],r1 + UQSUB16 r11,r11,r6 + UQSUB16 r10,r10,r6 + UQSUB16 r5,r5,r6 + UQSUB16 r4,r4,r6 + USAT16 r11,#13,r11 + USAT16 r10,#13,r10 + USAT16 r5,#13,r5 + USAT16 r4,#13,r4 + AND r11,r12,r11,LSR #5 + AND r10,r12,r10,LSR #5 + AND r5,r12,r5,LSR #5 + AND r4,r12,r4,LSR #5 + ORR r11,r10,r11,LSL #8 + ORR r10,r4,r5,LSL #8 + PKHBT r4,r10,r11,LSL #16 + SUBS lr,lr,#1 + STR r4,[r7],#8 + PKHTB r5,r11,r10,ASR #16 + STR r5,[r7],#4 + BGT LoopStart + SUB r0,r7,#0x18 + MOV r1,#4 + POP {r4-r6,pc} + .endfunc + + .end + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.S new file mode 100644 index 0000000000000000000000000000000000000000..a330972f43bb73c4256d6236a8435584efab5efb --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.S @@ -0,0 +1,164 @@ +/* + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + */ + + .eabi_attribute 24, 1 + .eabi_attribute 25, 1 + + .arm + .fpu neon + .text + + .global armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe + .func armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe +armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe: + PUSH {r4-r12,lr} + VLD1.8 {d0,d1},[r0],r1 + VMOV.I16 d31,#0x14 + VMOV.I16 d30,#0x5 + VEXT.8 d4,d0,d1,#1 + VEXT.8 d2,d0,d1,#2 + VEXT.8 d3,d0,d1,#3 + VEXT.8 d5,d0,d1,#4 + VEXT.8 d1,d0,d1,#5 + VADDL.U8 q1,d2,d3 + VADDL.U8 q2,d4,d5 + VADDL.U8 q5,d0,d1 + VLD1.8 {d0,d1},[r0],r1 + VMLA.I16 d10,d2,d31 + VMUL.I16 d8,d4,d30 + VEXT.8 d4,d0,d1,#1 + VEXT.8 d2,d0,d1,#2 + VEXT.8 d3,d0,d1,#3 + VEXT.8 d5,d0,d1,#4 + VEXT.8 d1,d0,d1,#5 + VADDL.U8 q1,d2,d3 + VADDL.U8 q2,d4,d5 + VADDL.U8 q6,d0,d1 + VLD1.8 {d0,d1},[r0],r1 + VSUB.I16 d10,d10,d8 + VMLA.I16 d12,d2,d31 + VMUL.I16 d8,d4,d30 + VEXT.8 d4,d0,d1,#1 + VEXT.8 d2,d0,d1,#2 + VEXT.8 d3,d0,d1,#3 + VEXT.8 d5,d0,d1,#4 + VEXT.8 d1,d0,d1,#5 + VADDL.U8 q1,d2,d3 + VADDL.U8 q2,d4,d5 + VADDL.U8 q7,d0,d1 + VLD1.8 {d0,d1},[r0],r1 + VSUB.I16 d12,d12,d8 + VMLA.I16 d14,d2,d31 + VMUL.I16 d8,d4,d30 + VEXT.8 d4,d0,d1,#1 + VEXT.8 d2,d0,d1,#2 + VEXT.8 d3,d0,d1,#3 + VEXT.8 d5,d0,d1,#4 + VEXT.8 d1,d0,d1,#5 + VADDL.U8 q1,d2,d3 + VADDL.U8 q2,d4,d5 + VADDL.U8 q8,d0,d1 + VLD1.8 {d0,d1},[r0],r1 + VSUB.I16 d14,d14,d8 + VMLA.I16 d16,d2,d31 + VMUL.I16 d8,d4,d30 + VEXT.8 d4,d0,d1,#1 + VEXT.8 d2,d0,d1,#2 + VEXT.8 d3,d0,d1,#3 + VEXT.8 d5,d0,d1,#4 + VEXT.8 d1,d0,d1,#5 + VADDL.U8 q1,d2,d3 + VADDL.U8 q2,d4,d5 + VADDL.U8 q9,d0,d1 + VLD1.8 {d0,d1},[r0],r1 + VSUB.I16 d16,d16,d8 + VMLA.I16 d18,d2,d31 + VMUL.I16 d8,d4,d30 + VEXT.8 d4,d0,d1,#1 + VEXT.8 d2,d0,d1,#2 + VEXT.8 d3,d0,d1,#3 + VEXT.8 d5,d0,d1,#4 + VEXT.8 d1,d0,d1,#5 + VADDL.U8 q1,d2,d3 + VADDL.U8 q2,d4,d5 + VADDL.U8 q10,d0,d1 + VLD1.8 {d0,d1},[r0],r1 + VSUB.I16 d18,d18,d8 + VMLA.I16 d20,d2,d31 + VMUL.I16 d8,d4,d30 + VEXT.8 d4,d0,d1,#1 + VEXT.8 d2,d0,d1,#2 + VEXT.8 d3,d0,d1,#3 + VEXT.8 d5,d0,d1,#4 + VEXT.8 d1,d0,d1,#5 + VADDL.U8 q1,d2,d3 + VADDL.U8 q2,d4,d5 + VADDL.U8 q11,d0,d1 + VLD1.8 {d0,d1},[r0],r1 + VSUB.I16 d20,d20,d8 + VMLA.I16 d22,d2,d31 + VMUL.I16 d8,d4,d30 + VEXT.8 d4,d0,d1,#1 + VEXT.8 d2,d0,d1,#2 + VEXT.8 d3,d0,d1,#3 + VEXT.8 d5,d0,d1,#4 + VEXT.8 d1,d0,d1,#5 + VADDL.U8 q1,d2,d3 + VADDL.U8 q2,d4,d5 + VADDL.U8 q12,d0,d1 + VLD1.8 {d0,d1},[r0],r1 + VSUB.I16 d22,d22,d8 + VMLA.I16 d24,d2,d31 + VMUL.I16 d8,d4,d30 + VEXT.8 d4,d0,d1,#1 + VEXT.8 d2,d0,d1,#2 + VEXT.8 d3,d0,d1,#3 + VEXT.8 d5,d0,d1,#4 + VEXT.8 d1,d0,d1,#5 + VADDL.U8 q1,d2,d3 + VADDL.U8 q2,d4,d5 + VADDL.U8 q13,d0,d1 + VSUB.I16 d24,d24,d8 + VMLA.I16 d26,d2,d31 + VMUL.I16 d8,d4,d30 + VMOV.I32 q15,#0x14 + VMOV.I32 q14,#0x5 + VADDL.S16 q5,d10,d20 + VADDL.S16 q1,d14,d16 + VADDL.S16 q0,d12,d18 + VSUB.I16 d26,d26,d8 + VMLA.I32 q5,q1,q15 + VMUL.I32 q4,q0,q14 + VADDL.S16 q6,d12,d22 + VADDL.S16 q1,d16,d18 + VADDL.S16 q0,d14,d20 + VMLA.I32 q6,q1,q15 + VSUB.I32 q5,q5,q4 + VMUL.I32 q4,q0,q14 + VADDL.S16 q2,d14,d24 + VADDL.S16 q1,d18,d20 + VADDL.S16 q0,d16,d22 + VMLA.I32 q2,q1,q15 + VSUB.I32 q6,q6,q4 + VMUL.I32 q4,q0,q14 + VADDL.S16 q3,d16,d26 + VADDL.S16 q1,d20,d22 + VADDL.S16 q0,d18,d24 + VMLA.I32 q3,q1,q15 + VSUB.I32 q2,q2,q4 + VMLS.I32 q3,q0,q14 + VQRSHRUN.S32 d0,q5,#10 + VQRSHRUN.S32 d2,q6,#10 + VQRSHRUN.S32 d4,q2,#10 + VQRSHRUN.S32 d6,q3,#10 + VQMOVN.U16 d0,q0 + VQMOVN.U16 d2,q1 + VQMOVN.U16 d4,q2 + VQMOVN.U16 d6,q3 + POP {r4-r12,pc} + .endfunc + + .end + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.S new file mode 100644 index 0000000000000000000000000000000000000000..991c33f9a180a53cd3dacdbdfc269ccbfd56384f --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.S @@ -0,0 +1,119 @@ +/* + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + */ + + .eabi_attribute 24, 1 + .eabi_attribute 25, 1 + + .arm + .fpu neon + .text + + .global armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe + .func armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe +armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe: + PUSH {r4-r12,lr} + VLD1.8 {d0,d1},[r0],r1 + ADD r12,r0,r1,LSL #2 + VMOV.I8 d30,#0x5 + VMOV.I8 d31,#0x14 + VLD1.8 {d10,d11},[r12],r1 + VLD1.8 {d2,d3},[r0],r1 + VLD1.8 {d12,d13},[r12],r1 + VADDL.U8 q9,d0,d10 + VLD1.8 {d4,d5},[r0],r1 + VADDL.U8 q0,d1,d11 + VLD1.8 {d6,d7},[r0],r1 + VADDL.U8 q10,d2,d12 + VLD1.8 {d8,d9},[r0],r1 + VMLAL.U8 q9,d4,d31 + VLD1.8 {d14,d15},[r12],r1 + VMLAL.U8 q0,d5,d31 + VLD1.8 {d16,d17},[r12],r1 + VMLAL.U8 q9,d6,d31 + VMLAL.U8 q10,d6,d31 + VMLSL.U8 q0,d3,d30 + VADDL.U8 q11,d4,d14 + VMLSL.U8 q9,d2,d30 + VADDL.U8 q1,d3,d13 + VMLAL.U8 q0,d7,d31 + VMLAL.U8 q10,d8,d31 + VMLSL.U8 q9,d8,d30 + VMLAL.U8 q1,d7,d31 + VMLSL.U8 q0,d9,d30 + VMLAL.U8 q11,d8,d31 + VMLSL.U8 q10,d4,d30 + VMLSL.U8 q1,d5,d30 + VADDL.U8 q2,d5,d15 + VMLAL.U8 q11,d10,d31 + VMLSL.U8 q10,d10,d30 + VMLAL.U8 q1,d9,d31 + VMLAL.U8 q2,d9,d31 + VADDL.U8 q12,d6,d16 + VMLSL.U8 q11,d6,d30 + VMLSL.U8 q1,d11,d30 + VMLSL.U8 q2,d7,d30 + VADDL.U8 q3,d7,d17 + VMLAL.U8 q12,d10,d31 + VMLSL.U8 q11,d12,d30 + VMLSL.U8 q2,d13,d30 + VMLAL.U8 q3,d11,d31 + VMLAL.U8 q12,d12,d31 + VEXT.8 d26,d18,d19,#2 + VMLAL.U8 q2,d11,d31 + VMLAL.U8 q3,d13,d31 + VMLSL.U8 q12,d8,d30 + VEXT.8 d27,d18,d19,#4 + VMOV.I16 d31,#0x14 + VMLSL.U8 q3,d9,d30 + VMLSL.U8 q12,d14,d30 + VEXT.8 d29,d19,d0,#2 + VEXT.8 d28,d18,d19,#6 + VMLSL.U8 q3,d15,d30 + VADDL.S16 q0,d18,d29 + VADD.I16 d27,d27,d28 + VMOV.I16 d30,#0x5 + VADD.I16 d26,d26,d19 + VMLAL.S16 q0,d27,d31 + VEXT.8 d27,d20,d21,#4 + VEXT.8 d28,d20,d21,#6 + VEXT.8 d29,d21,d2,#2 + VMLSL.S16 q0,d26,d30 + VEXT.8 d26,d20,d21,#2 + VADDL.S16 q1,d20,d29 + VADD.I16 d27,d27,d28 + VADD.I16 d26,d26,d21 + VEXT.8 d28,d22,d23,#6 + VMLAL.S16 q1,d27,d31 + VEXT.8 d29,d23,d4,#2 + VEXT.8 d27,d22,d23,#4 + VEXT.8 d8,d22,d23,#2 + VADDL.S16 q2,d22,d29 + VMLSL.S16 q1,d26,d30 + VADD.I16 d27,d27,d28 + VADD.I16 d26,d8,d23 + VEXT.8 d28,d24,d25,#6 + VMLAL.S16 q2,d27,d31 + VEXT.8 d27,d24,d25,#4 + VEXT.8 d29,d25,d6,#2 + VADD.I16 d27,d27,d28 + VEXT.8 d8,d24,d25,#2 + VADDL.S16 q3,d24,d29 + VMLSL.S16 q2,d26,d30 + VMLAL.S16 q3,d27,d31 + VADD.I16 d8,d8,d25 + VMLSL.S16 q3,d8,d30 + VQRSHRUN.S32 d0,q0,#10 + VQRSHRUN.S32 d2,q1,#10 + VQRSHRUN.S32 d4,q2,#10 + VQRSHRUN.S32 d6,q3,#10 + VQMOVN.U16 d0,q0 + VQMOVN.U16 d2,q1 + VQMOVN.U16 d4,q2 + VQMOVN.U16 d6,q3 + POP {r4-r12,pc} + .endfunc + + .end + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.S new file mode 100644 index 0000000000000000000000000000000000000000..40e141baef2277e40f33cfc775af0320ea643628 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.S @@ -0,0 +1,72 @@ +/* + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + */ + + .eabi_attribute 24, 1 + .eabi_attribute 25, 1 + + .arm + .fpu neon + .text + + .global armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe + .func armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe +armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe: + PUSH {r4-r12,lr} + VLD1.8 {d22,d23},[r0],r1 + VEXT.8 d10,d22,d23,#5 + VEXT.8 d12,d22,d23,#1 + VEXT.8 d14,d22,d23,#2 + VEXT.8 d15,d22,d23,#3 + VEXT.8 d13,d22,d23,#4 + VADDL.U8 q11,d22,d10 + VADDL.U8 q4,d14,d15 + VADDL.U8 q6,d12,d13 + VLD1.8 {d24,d25},[r0],r1 + VMLA.I16 d22,d8,d31 + VMUL.I16 d8,d12,d30 + VEXT.8 d10,d24,d25,#5 + VEXT.8 d12,d24,d25,#1 + VEXT.8 d16,d24,d25,#2 + VEXT.8 d17,d24,d25,#3 + VEXT.8 d13,d24,d25,#4 + VADDL.U8 q12,d24,d10 + VSUB.I16 d22,d22,d8 + VADDL.U8 q4,d16,d17 + VADDL.U8 q6,d12,d13 + VLD1.8 {d26,d27},[r0],r1 + VMLA.I16 d24,d8,d31 + VMUL.I16 d8,d12,d30 + VEXT.8 d10,d26,d27,#5 + VEXT.8 d12,d26,d27,#1 + VEXT.8 d18,d26,d27,#2 + VEXT.8 d19,d26,d27,#3 + VEXT.8 d13,d26,d27,#4 + VADDL.U8 q13,d26,d10 + VSUB.I16 d24,d24,d8 + VADDL.U8 q4,d18,d19 + VADDL.U8 q6,d12,d13 + VLD1.8 {d28,d29},[r0],r1 + VMLA.I16 d26,d8,d31 + VMUL.I16 d8,d12,d30 + VEXT.8 d10,d28,d29,#5 + VEXT.8 d12,d28,d29,#1 + VEXT.8 d20,d28,d29,#2 + VEXT.8 d21,d28,d29,#3 + VEXT.8 d13,d28,d29,#4 + VADDL.U8 q14,d28,d10 + VSUB.I16 d26,d26,d8 + VADDL.U8 q4,d20,d21 + VADDL.U8 q6,d12,d13 + VMLA.I16 d28,d8,d31 + VMLS.I16 d28,d12,d30 + VQRSHRUN.S16 d22,q11,#5 + VQRSHRUN.S16 d24,q12,#5 + VQRSHRUN.S16 d26,q13,#5 + VQRSHRUN.S16 d28,q14,#5 + POP {r4-r12,pc} + .endfunc + + .end + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.S new file mode 100644 index 0000000000000000000000000000000000000000..955846f3f778da706a4c94a62d2d02e22b053028 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.S @@ -0,0 +1,58 @@ +/* + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + */ + + .eabi_attribute 24, 1 + .eabi_attribute 25, 1 + + .arm + .fpu neon + .text + + .global armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe + .func armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe +armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe: + PUSH {r4-r12,lr} + VLD1.8 {d7},[r0],r1 + ADD r12,r0,r1,LSL #2 + VLD1.8 {d8},[r0],r1 + VLD1.8 {d12},[r12],r1 + VLD1.8 {d9},[r0],r1 + VADDL.U8 q0,d7,d12 + VLD1.8 {d10},[r0],r1 + VLD1.8 {d13},[r12],r1 + VLD1.8 {d11},[r0],r1 + VLD1.8 {d14},[r12],r1 + VADDL.U8 q8,d8,d11 + VADDL.U8 q9,d9,d10 + VLD1.8 {d15},[r12],r1 + VMLS.I16 d0,d16,d30 + VMUL.I16 d20,d18,d31 + VADDL.U8 q8,d9,d12 + VADDL.U8 q9,d10,d11 + VADDL.U8 q1,d8,d13 + VMLS.I16 d2,d16,d30 + VMUL.I16 d21,d18,d31 + VADDL.U8 q8,d10,d13 + VADDL.U8 q9,d11,d12 + VADDL.U8 q2,d9,d14 + VMLS.I16 d4,d16,d30 + VMUL.I16 d22,d18,d31 + VADDL.U8 q8,d11,d14 + VADDL.U8 q3,d10,d15 + VADDL.U8 q9,d12,d13 + VMLS.I16 d6,d16,d30 + VADD.I16 d0,d0,d20 + VADD.I16 d2,d2,d21 + VADD.I16 d4,d4,d22 + VMLA.I16 d6,d18,d31 + VQRSHRUN.S16 d0,q0,#5 + VQRSHRUN.S16 d2,q1,#5 + VQRSHRUN.S16 d4,q2,#5 + VQRSHRUN.S16 d6,q3,#5 + POP {r4-r12,pc} + .endfunc + + .end + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_Interpolate_Chroma_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_Interpolate_Chroma_s.S new file mode 100644 index 0000000000000000000000000000000000000000..66520da3adf5a43cf2ba2cc461f148dcdb863e94 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_Interpolate_Chroma_s.S @@ -0,0 +1,175 @@ +/* + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + */ + + .eabi_attribute 24, 1 + .eabi_attribute 25, 1 + + .arm + .fpu neon + + .section .rodata + .align 4 + +armVCM4P10_WidthBranchTableMVIsNotZero: + .word WidthIs2MVIsNotZero, WidthIs2MVIsNotZero + .word WidthIs4MVIsNotZero, WidthIs4MVIsNotZero + .word WidthIs8MVIsNotZero + +armVCM4P10_WidthBranchTableMVIsZero: + .word WidthIs2MVIsZero, WidthIs2MVIsZero + .word WidthIs4MVIsZero, WidthIs4MVIsZero + .word WidthIs8MVIsZero + + .text + + .global armVCM4P10_Interpolate_Chroma + .func armVCM4P10_Interpolate_Chroma +armVCM4P10_Interpolate_Chroma: + PUSH {r4-r12,lr} + VPUSH {d8-d15} + LDRD r6,r7,[sp,#0x70] + LDRD r4,r5,[sp,#0x68] + RSB r8,r6,#8 + RSB r9,r7,#8 + CMN r6,r7 + MOV r10,#1 + LDREQ r11, =armVCM4P10_WidthBranchTableMVIsZero + SUB lr,r1,r10 + LDRNE r11, =armVCM4P10_WidthBranchTableMVIsNotZero + VLD1.8 {d0},[r0],r10 + SMULBB r12,r8,r9 + SMULBB r9,r6,r9 + VLD1.8 {d1},[r0],lr + SMULBB r8,r8,r7 + SMULBB r6,r6,r7 + VDUP.8 d12,r12 + VDUP.8 d13,r9 + VDUP.8 d14,r8 + VDUP.8 d15,r6 + LDR pc,[r11,r4,LSL #1] + +WidthIs8MVIsNotZero: + VLD1.8 {d2},[r0],r10 + VMULL.U8 q2,d0,d12 + VLD1.8 {d3},[r0],lr + VMULL.U8 q3,d2,d12 + VLD1.8 {d16},[r0],r10 + VMLAL.U8 q2,d1,d13 + VLD1.8 {d17},[r0],lr + VMULL.U8 q11,d16,d12 + VMLAL.U8 q3,d3,d13 + VLD1.8 {d18},[r0],r10 + VMLAL.U8 q2,d2,d14 + VMLAL.U8 q11,d17,d13 + VMULL.U8 q12,d18,d12 + VLD1.8 {d19},[r0],lr + VMLAL.U8 q3,d16,d14 + VLD1.8 {d0},[r0],r10 + VMLAL.U8 q12,d19,d13 + VMLAL.U8 q11,d18,d14 + VMLAL.U8 q2,d3,d15 + VLD1.8 {d1},[r0],lr + VMLAL.U8 q12,d0,d14 + VMLAL.U8 q3,d17,d15 + VMLAL.U8 q11,d19,d15 + SUBS r5,r5,#4 + VMLAL.U8 q12,d1,d15 + VQRSHRN.U16 d8,q2,#6 + VQRSHRN.U16 d9,q3,#6 + VQRSHRN.U16 d20,q11,#6 + VST1.64 {d8},[r2],r3 + VQRSHRN.U16 d21,q12,#6 + VST1.64 {d9},[r2],r3 + VST1.64 {d20},[r2],r3 + VST1.64 {d21},[r2],r3 + BGT WidthIs8MVIsNotZero + MOV r0,#0 + VPOP {d8-d15} + POP {r4-r12,pc} + +WidthIs4MVIsNotZero: + VLD1.8 {d2},[r0],r10 + VMULL.U8 q2,d0,d12 + VMULL.U8 q3,d2,d12 + VLD1.8 {d3},[r0],lr + VMLAL.U8 q2,d1,d13 + VMLAL.U8 q3,d3,d13 + VLD1.8 {d0},[r0],r10 + VMLAL.U8 q2,d2,d14 + VMLAL.U8 q3,d0,d14 + VLD1.8 {d1},[r0],lr + SUBS r5,r5,#2 + VMLAL.U8 q3,d1,d15 + VMLAL.U8 q2,d3,d15 + VQRSHRN.U16 d9,q3,#6 + VQRSHRN.U16 d8,q2,#6 + VST1.32 {d8[0]},[r2],r3 + VST1.32 {d9[0]},[r2],r3 + BGT WidthIs4MVIsNotZero + MOV r0,#0 + VPOP {d8-d15} + POP {r4-r12,pc} + +WidthIs2MVIsNotZero: + VLD1.8 {d2},[r0],r10 + VMULL.U8 q2,d0,d12 + VMULL.U8 q3,d2,d12 + VLD1.8 {d3},[r0],lr + VMLAL.U8 q2,d1,d13 + VMLAL.U8 q3,d3,d13 + VLD1.8 {d0},[r0],r10 + VMLAL.U8 q2,d2,d14 + VMLAL.U8 q3,d0,d14 + VLD1.8 {d1},[r0],lr + SUBS r5,r5,#2 + VMLAL.U8 q3,d1,d15 + VMLAL.U8 q2,d3,d15 + VQRSHRN.U16 d9,q3,#6 + VQRSHRN.U16 d8,q2,#6 + VST1.16 {d8[0]},[r2],r3 + VST1.16 {d9[0]},[r2],r3 + BGT WidthIs2MVIsNotZero + MOV r0,#0 + VPOP {d8-d15} + POP {r4-r12,pc} + +WidthIs8MVIsZero: + SUB r0,r0,r1 +WidthIs8LoopMVIsZero: + VLD1.8 {d0},[r0],r1 + SUBS r5,r5,#2 + VLD1.8 {d1},[r0],r1 + VST1.64 {d0},[r2],r3 + VST1.64 {d1},[r2],r3 + BGT WidthIs8LoopMVIsZero + MOV r0,#0 + VPOP {d8-d15} + POP {r4-r12,pc} + +WidthIs4MVIsZero: + VLD1.8 {d1},[r0],r1 + SUBS r5,r5,#2 + VST1.32 {d0[0]},[r2],r3 + VLD1.8 {d0},[r0],r1 + VST1.32 {d1[0]},[r2],r3 + BGT WidthIs4MVIsZero + MOV r0,#0 + VPOP {d8-d15} + POP {r4-r12,pc} + +WidthIs2MVIsZero: + VLD1.8 {d1},[r0],r1 + SUBS r5,r5,#2 + VST1.16 {d0[0]},[r2],r3 + VLD1.8 {d0},[r0],r1 + VST1.16 {d1[0]},[r2],r3 + BGT WidthIs2MVIsZero + MOV r0,#0 + VPOP {d8-d15} + POP {r4-r12,pc} + .endfunc + + .end + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_QuantTables_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_QuantTables_s.S new file mode 100644 index 0000000000000000000000000000000000000000..f5d6d1f11a43b12e5dfe9b9aa53e5c8cd7144c07 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_QuantTables_s.S @@ -0,0 +1,68 @@ +/* + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + */ + + .eabi_attribute 24, 1 + .eabi_attribute 25, 1 + + .arm + .fpu neon + .section .rodata + .align 4 + + .global armVCM4P10_MFMatrixQPModTable + .global armVCM4P10_QPDivIntraTable + .global armVCM4P10_QPDivPlusOneTable + +;//------------------------------------------------------------------ +;// This table contains (1 << QbitsPlusOne) / 3 Values (Intra case) , +;// for values of iQP from 0 to 51 (inclusive). +;//------------------------------------------------------------------ + + +armVCM4P10_QPDivIntraTable: + .word 21845, 21845, 21845, 21845, 21845, 21845 + .word 43690, 43690, 43690, 43690, 43690, 43690 + .word 87381, 87381, 87381, 87381, 87381, 87381 + .word 174762, 174762, 174762, 174762, 174762, 174762 + .word 349525, 349525, 349525, 349525, 349525, 349525 + .word 699050, 699050, 699050, 699050, 699050, 699050 + .word 1398101, 1398101, 1398101, 1398101, 1398101, 1398101 + .word 2796202, 2796202, 2796202, 2796202, 2796202, 2796202 + + +;//-------------------------------------------------------------- +;// This table contains armVCM4P10_MFMatrix [iQP % 6][0] entires, +;// for values of iQP from 0 to 51 (inclusive). +;//-------------------------------------------------------------- + +armVCM4P10_MFMatrixQPModTable: + .hword 13107, 11916, 10082, 9362, 8192, 7282 + .hword 13107, 11916, 10082, 9362, 8192, 7282 + .hword 13107, 11916, 10082, 9362, 8192, 7282 + .hword 13107, 11916, 10082, 9362, 8192, 7282 + .hword 13107, 11916, 10082, 9362, 8192, 7282 + .hword 13107, 11916, 10082, 9362, 8192, 7282 + .hword 13107, 11916, 10082, 9362, 8192, 7282 + .hword 13107, 11916, 10082, 9362, 8192, 7282 + .hword 13107, 11916, 10082, 9362, 8192, 7282 + +;//--------------------------------------------------------------- +;// This table contains ARM_M4P10_Q_OFFSET + 1 + (iQP / 6) values, +;// for values of iQP from 0 to 51 (inclusive). +;//--------------------------------------------------------------- + +armVCM4P10_QPDivPlusOneTable: + .byte 16, 16, 16, 16, 16, 16 + .byte 17, 17, 17, 17, 17, 17 + .byte 18, 18, 18, 18, 18, 18 + .byte 19, 19, 19, 19, 19, 19 + .byte 20, 20, 20, 20, 20, 20 + .byte 21, 21, 21, 21, 21, 21 + .byte 22, 22, 22, 22, 22, 22 + .byte 23, 23, 23, 23, 23, 23 + .byte 24, 24, 24, 24, 24, 24 + + .end + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_TransformResidual4x4_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_TransformResidual4x4_s.S new file mode 100644 index 0000000000000000000000000000000000000000..c24d717cb6b195b23db9b4a13980964dc4014713 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_TransformResidual4x4_s.S @@ -0,0 +1,52 @@ +/* + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + */ + + .eabi_attribute 24, 1 + .eabi_attribute 25, 1 + + .arm + .fpu neon + .text + + .global armVCM4P10_TransformResidual4x4 + .func armVCM4P10_TransformResidual4x4 +armVCM4P10_TransformResidual4x4: + VPUSH {d8} + VLD4.16 {d0,d1,d2,d3},[r1] + VMOV.I16 d4,#0 + VADD.I16 d5,d0,d2 + VSUB.I16 d6,d0,d2 + VHADD.S16 d7,d1,d4 + VHADD.S16 d8,d3,d4 + VSUB.I16 d7,d7,d3 + VADD.I16 d8,d1,d8 + VADD.I16 d0,d5,d8 + VADD.I16 d1,d6,d7 + VSUB.I16 d2,d6,d7 + VSUB.I16 d3,d5,d8 + VTRN.16 d0,d1 + VTRN.16 d2,d3 + VTRN.32 q0,q1 + VADD.I16 d5,d0,d2 + VSUB.I16 d6,d0,d2 + VHADD.S16 d7,d1,d4 + VHADD.S16 d8,d3,d4 + VSUB.I16 d7,d7,d3 + VADD.I16 d8,d1,d8 + VADD.I16 d0,d5,d8 + VADD.I16 d1,d6,d7 + VSUB.I16 d2,d6,d7 + VSUB.I16 d3,d5,d8 + VRSHR.S16 d0,d0,#6 + VRSHR.S16 d1,d1,#6 + VRSHR.S16 d2,d2,#6 + VRSHR.S16 d3,d3,#6 + VST1.16 {d0,d1,d2,d3},[r0] + VPOP {d8} + BX lr + .endfunc + + .end + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_UnpackBlock4x4_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_UnpackBlock4x4_s.S new file mode 100644 index 0000000000000000000000000000000000000000..c552f8dcfbcf9e8b4938b534211f27b84eabb9d3 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_UnpackBlock4x4_s.S @@ -0,0 +1,40 @@ +/* + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + */ + + .eabi_attribute 24, 1 + .eabi_attribute 25, 1 + + .arm + .fpu neon + .text + + .global armVCM4P10_UnpackBlock4x4 + .func armVCM4P10_UnpackBlock4x4 +armVCM4P10_UnpackBlock4x4: + PUSH {r4-r8,lr} + LDR r2,[r0,#0] + MOV r7,#0x1f + MOV r4,#0 + MOV r5,#0 + LDRB r3,[r2],#1 + STRD r4,r5,[r1,#0] + STRD r4,r5,[r1,#8] + STRD r4,r5,[r1,#0x10] + STRD r4,r5,[r1,#0x18] +unpackLoop: + TST r3,#0x10 + LDRNESB r5,[r2,#1] + LDRNEB r4,[r2],#2 + AND r6,r7,r3,LSL #1 + LDREQSB r4,[r2],#1 + ORRNE r4,r4,r5,LSL #8 + TST r3,#0x20 + LDREQB r3,[r2],#1 + STRH r4,[r1,r6] + BEQ unpackLoop + STR r2,[r0,#0] + POP {r4-r8,pc} + .endfunc + .end diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_DeblockLuma_I.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_DeblockLuma_I.S new file mode 100644 index 0000000000000000000000000000000000000000..ba6105934e3942a0c4c07628904c147294ca4304 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_DeblockLuma_I.S @@ -0,0 +1,67 @@ +/* + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + */ + + .eabi_attribute 24, 1 + .eabi_attribute 25, 1 + + .arm + .fpu neon + .text + + .global omxVCM4P10_DeblockLuma_I + .func omxVCM4P10_DeblockLuma_I +omxVCM4P10_DeblockLuma_I: + PUSH {r4-r9,lr} + MOVS r6,r0 + SUB sp,sp,#0xc + MOV r9,r1 + MOV r7,r2 + MOV r8,r3 + LDR r4,[sp,#0x28] + LDR r5,[sp,#0x2c] + BEQ L0x58 + TST r6,#7 + TSTEQ r9,#7 + BNE L0x58 + CMP r7,#0 + CMPNE r8,#0 + CMPNE r4,#0 + BEQ L0x58 + TST r4,#3 + BNE L0x58 + CMP r5,#0 + BEQ L0x58 + TST r5,#3 + BEQ L0x64 +L0x58: + MVN r0,#4 +L0x5c: + ADD sp,sp,#0xc + POP {r4-r9,pc} +L0x64: + STR r4,[sp,#0] + MOV r3,r8 + STR r5,[sp,#4] + MOV r2,r7 + MOV r1,r9 + MOV r0,r6 + BL omxVCM4P10_FilterDeblockingLuma_VerEdge_I + CMP r0,#0 + BNE L0x5c + ADD r3,r5,#0x10 + ADD r2,r4,#0x10 + STR r3,[sp,#4] + STR r2,[sp,#0] + ADD r3,r8,#2 + ADD r2,r7,#2 + MOV r1,r9 + MOV r0,r6 + BL omxVCM4P10_FilterDeblockingLuma_HorEdge_I + ADD sp,sp,#0xc + POP {r4-r9,pc} + .endfunc + + .end + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.S new file mode 100644 index 0000000000000000000000000000000000000000..be21ee724c7979ccbcc7348d475ed57e0811d092 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.S @@ -0,0 +1,119 @@ +/* + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + */ + + .eabi_attribute 24, 1 + .eabi_attribute 25, 1 + + .arm + .fpu neon + .text + + .global omxVCM4P10_DequantTransformResidualFromPairAndAdd + .func omxVCM4P10_DequantTransformResidualFromPairAndAdd +omxVCM4P10_DequantTransformResidualFromPairAndAdd: + PUSH {r4-r12,lr} + VPUSH {d8-d9} + SUB sp,sp,#0x20 + ADD r4,sp,#0 + LDR r5,[sp,#0x64] + MOV r7,r1 + MOV r8,r2 + MOV r9,r3 + CMP r5,#0 + BEQ L0x114 + MOV r1,r4 + BL armVCM4P10_UnpackBlock4x4 ;// + LDR r1,[sp,#0x60] + LDR r11, =armVCM4P10_QPModuloTable + LDR r10, =armVCM4P10_QPDivTable + LDR r2, =armVCM4P10_VMatrixU16 + LDRSB r12,[r11,r1] + LDRSB lr,[r10,r1] + LDR r10, =0x3020504 + LDR r1, =0x5040100 + ADD r2,r2,r12 + VDUP.32 d7,r1 + VDUP.32 d9,r10 + VDUP.16 d5,lr + VLD1.8 {d6},[r2] + VTBL.8 d8,{d6},d7 + VTBL.8 d4,{d6},d9 + CMP r8,#0 + VLD1.16 {d0,d1,d2,d3},[r4] + VSHL.U16 d8,d8,d5 + VSHL.U16 d4,d4,d5 + BEQ L1 + LDRSH r10,[r8,#0] +L1: + VMUL.I16 d0,d0,d8 + VMUL.I16 d1,d1,d4 + VMUL.I16 d2,d2,d8 + VMUL.I16 d3,d3,d4 + VMOVNE.16 d0[0],r10 + VTRN.16 d0,d1 + VTRN.16 d2,d3 + VTRN.32 q0,q1 + VMOV.I16 d4,#0 + VADD.I16 d5,d0,d2 + VSUB.I16 d6,d0,d2 + VHADD.S16 d7,d1,d4 + VHADD.S16 d8,d3,d4 + VSUB.I16 d7,d7,d3 + VADD.I16 d8,d1,d8 + VADD.I16 d0,d5,d8 + VADD.I16 d1,d6,d7 + VSUB.I16 d2,d6,d7 + VSUB.I16 d3,d5,d8 + VTRN.16 d0,d1 + VTRN.16 d2,d3 + VTRN.32 q0,q1 + VADD.I16 d5,d0,d2 + VSUB.I16 d6,d0,d2 + VHADD.S16 d7,d1,d4 + VHADD.S16 d8,d3,d4 + VSUB.I16 d7,d7,d3 + VADD.I16 d8,d1,d8 + VADD.I16 d0,d5,d8 + VADD.I16 d1,d6,d7 + VSUB.I16 d2,d6,d7 + VSUB.I16 d3,d5,d8 + VRSHR.S16 d0,d0,#6 + VRSHR.S16 d1,d1,#6 + VRSHR.S16 d2,d2,#6 + VRSHR.S16 d3,d3,#6 + B L0x130 +L0x114: + LDRSH r10,[r8,#0] + ADD r10,r10,#0x20 + ASR r10,r10,#6 + VDUP.16 d0,r10 + VDUP.16 d1,r10 + VDUP.16 d2,r10 + VDUP.16 d3,r10 +L0x130: + LDR r1,[sp,#0x58] + LDR r10,[sp,#0x5c] + LDR r3,[r7],r1 + LDR r5,[r7],r1 + VMOV d4,r3,r5 + LDR r3,[r7],r1 + LDR r5,[r7,#0] + VMOV d5,r3,r5 + VADDW.U8 q3,q0,d4 + VADDW.U8 q4,q1,d5 + VQMOVUN.S16 d0,q3 + VQMOVUN.S16 d1,q4 + VST1.32 {d0[0]},[r9],r10 + VST1.32 {d0[1]},[r9],r10 + VST1.32 {d1[0]},[r9],r10 + VST1.32 {d1[1]},[r9] + MOV r0,#0 + ADD sp,sp,#0x20 + VPOP {d8-d9} + POP {r4-r12,pc} + .endfunc + + .end + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.S new file mode 100644 index 0000000000000000000000000000000000000000..79ba538d2be2a873d9a7fa9c74b4d88622c2250a --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.S @@ -0,0 +1,87 @@ +/* + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + */ + + .eabi_attribute 24, 1 + .eabi_attribute 25, 1 + + .arm + .fpu neon + .text + + .global omxVCM4P10_FilterDeblockingChroma_HorEdge_I + .func omxVCM4P10_FilterDeblockingChroma_HorEdge_I +omxVCM4P10_FilterDeblockingChroma_HorEdge_I: + PUSH {r4-r10,lr} + VPUSH {d8-d15} + VLD1.8 {d0[]},[r2]! + SUB r0,r0,r1,LSL #1 + SUB r0,r0,r1 + VLD1.8 {d2[]},[r3]! + LDR r4,[sp,#0x64] + LDR r5,[sp,#0x60] + LDR r9, =0x3030303 + LDR r8, =0x4040404 + VMOV.I8 d14,#0 + VMOV.I8 d15,#0x1 + VMOV.I16 d1,#0x4 + MOV r7,#0x40000000 +L0x38: + LDR r6,[r4],#8 + VLD1.8 {d6},[r0],r1 + VLD1.8 {d5},[r0],r1 + CMP r6,#0 + VLD1.8 {d4},[r0],r1 + VLD1.8 {d8},[r0],r1 + VABD.U8 d19,d6,d4 + VLD1.8 {d9},[r0],r1 + VABD.U8 d13,d4,d8 + VLD1.8 {d10},[r0],r1 + BEQ L0xe4 + VABD.U8 d12,d5,d4 + VABD.U8 d18,d9,d8 + VCGT.U8 d16,d0,d13 + VMOV.32 d26[0],r6 + VMAX.U8 d12,d18,d12 + VMOVL.U8 q13,d26 + VABD.U8 d17,d10,d8 + VCGT.S16 d27,d26,#0 + VCGT.U8 d12,d2,d12 + VCGT.U8 d19,d2,d19 + VAND d16,d16,d27 + TST r6,r9 + VCGT.U8 d17,d2,d17 + VAND d16,d16,d12 + VAND d12,d16,d17 + VAND d17,d16,d19 + BLNE armVCM4P10_DeblockingChromabSLT4_unsafe + TST r6,r8 + SUB r0,r0,r1,LSL #2 + VTST.16 d26,d26,d1 + BLNE armVCM4P10_DeblockingChromabSGE4_unsafe + VBIT d29,d13,d26 + VBIT d24,d31,d26 + VBIF d29,d4,d16 + VBIF d24,d8,d16 + VST1.8 {d29},[r0],r1 + ADDS r7,r7,r7 + VST1.8 {d24},[r0],r1 + BNE L0x38 + MOV r0,#0 + VPOP {d8-d15} + POP {r4-r10,pc} +L0xe4: + VLD1.8 {d0[]},[r2] + SUB r0,r0,r1,LSL #1 + ADDS r7,r7,r7 + VLD1.8 {d2[]},[r3] + ADD r5,r5,#4 + BNE L0x38 + MOV r0,#0 + VPOP {d8-d15} + POP {r4-r10,pc} + .endfunc + + .end + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.S new file mode 100644 index 0000000000000000000000000000000000000000..dcdddbeaf9517eda307936da424150ccd9b85729 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.S @@ -0,0 +1,123 @@ +/* + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + */ + + .eabi_attribute 24, 1 + .eabi_attribute 25, 1 + + .arm + .fpu neon + .text + + .global omxVCM4P10_FilterDeblockingChroma_VerEdge_I + .func omxVCM4P10_FilterDeblockingChroma_VerEdge_I +omxVCM4P10_FilterDeblockingChroma_VerEdge_I: + PUSH {r4-r12,lr} + VPUSH {d8-d15} + VLD1.8 {d0[]},[r2]! + SUB r0,r0,#4 + VLD1.8 {d2[]},[r3]! + LDR r4,[sp,#0x6c] + LDR r5,[sp,#0x68] + LDR r8, =0x4040404 + LDR r9, =0x3030303 + VMOV.I8 d14,#0 + VMOV.I8 d15,#0x1 + VMOV.I16 d1,#0x4 + MOV r7,#0x40000000 +L0x34: + LDR r6,[r4],#8 + ADD r10,r0,r1 + ADD lr,r1,r1 + VLD1.8 {d7},[r0],lr + VLD1.8 {d8},[r10],lr + VLD1.8 {d5},[r0],lr + VLD1.8 {d10},[r10],lr + VLD1.8 {d6},[r0],lr + VLD1.8 {d9},[r10],lr + VLD1.8 {d4},[r0],lr + VLD1.8 {d11},[r10],lr + VZIP.8 d7,d8 + VZIP.8 d5,d10 + VZIP.8 d6,d9 + VZIP.8 d4,d11 + VZIP.16 d7,d5 + VZIP.16 d8,d10 + VZIP.16 d6,d4 + VZIP.16 d9,d11 + VTRN.32 d7,d6 + VTRN.32 d5,d4 + VTRN.32 d10,d11 + VTRN.32 d8,d9 + CMP r6,#0 + VABD.U8 d19,d6,d4 + VABD.U8 d13,d4,d8 + BEQ L0x170 + VABD.U8 d12,d5,d4 + VABD.U8 d18,d9,d8 + VMOV.32 d26[0],r6 + VCGT.U8 d16,d0,d13 + VMAX.U8 d12,d18,d12 + VMOVL.U8 q13,d26 + VABD.U8 d17,d10,d8 + VCGT.S16 d27,d26,#0 + VCGT.U8 d12,d2,d12 + VCGT.U8 d19,d2,d19 + VAND d16,d16,d27 + TST r6,r9 + VCGT.U8 d17,d2,d17 + VAND d16,d16,d12 + VAND d12,d16,d17 + VAND d17,d16,d19 + BLNE armVCM4P10_DeblockingChromabSLT4_unsafe + TST r6,r8 + SUB r0,r0,r1,LSL #3 + VTST.16 d26,d26,d1 + BLNE armVCM4P10_DeblockingChromabSGE4_unsafe + VBIT d29,d13,d26 + VBIT d24,d31,d26 + ADD r10,r0,#3 + VBIF d29,d4,d16 + ADD r12,r10,r1 + ADD lr,r1,r1 + VBIF d24,d8,d16 + ADDS r7,r7,r7 + VST1.8 {d29[0]},[r10],lr + VST1.8 {d29[1]},[r12],lr + VST1.8 {d29[2]},[r10],lr + VST1.8 {d29[3]},[r12],lr + VST1.8 {d29[4]},[r10],lr + VST1.8 {d29[5]},[r12],lr + VST1.8 {d29[6]},[r10],lr + VST1.8 {d29[7]},[r12],lr + ADD r12,r0,#4 + ADD r10,r12,r1 + VST1.8 {d24[0]},[r12],lr + VST1.8 {d24[1]},[r10],lr + VST1.8 {d24[2]},[r12],lr + VST1.8 {d24[3]},[r10],lr + VST1.8 {d24[4]},[r12],lr + VST1.8 {d24[5]},[r10],lr + VST1.8 {d24[6]},[r12],lr + VST1.8 {d24[7]},[r10],lr + ADD r0,r0,#4 + BNE L0x34 + MOV r0,#0 + VPOP {d8-d15} + POP {r4-r12,pc} +L0x170: + VLD1.8 {d0[]},[r2] + ADD r0,r0,#4 + SUB r0,r0,r1,LSL #3 + ADDS r7,r7,r7 + VLD1.8 {d2[]},[r3] + ADD r5,r5,#4 + BNE L0x34 + MOV r0,#0 + VPOP {d8-d15} + POP {r4-r12,pc} + .endfunc + + .end + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.S new file mode 100644 index 0000000000000000000000000000000000000000..97558994d4c9d54b26db3b306c334b61815dc76a --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.S @@ -0,0 +1,107 @@ +/* + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + */ + + .eabi_attribute 24, 1 + .eabi_attribute 25, 1 + + .arm + .fpu neon + .text + + .global omxVCM4P10_FilterDeblockingLuma_HorEdge_I + .func omxVCM4P10_FilterDeblockingLuma_HorEdge_I +omxVCM4P10_FilterDeblockingLuma_HorEdge_I: + PUSH {r4-r12,lr} + VPUSH {d8-d15} + ADD r7,r2,#1 + ADD r8,r3,#1 + VLD1.8 {d0[]},[r2] + SUB r0,r0,r1,LSL #2 + VLD1.8 {d2[]},[r3] + LDR r4,[sp,#0x6c] + LDR r5,[sp,#0x68] + MOV r11,#0 + VMOV.I8 d14,#0 + VMOV.I8 d15,#0x1 + ADD r10,r1,r1 + MOV r9,#0x55000000 +L0x38: + LDRH r12,[r4],#2 + ADD r6,r0,r1 + CMP r12,#0 + BEQ L0xe4 + VLD1.8 {d7},[r0],r10 + VLD1.8 {d6},[r6],r10 + VLD1.8 {d5},[r0],r10 + VLD1.8 {d4},[r6],r10 + VLD1.8 {d8},[r0],r10 + VABD.U8 d12,d4,d5 + VLD1.8 {d9},[r6] + VABD.U8 d13,d8,d4 + VLD1.8 {d10},[r0],r1 + VABD.U8 d18,d9,d8 + VABD.U8 d19,d6,d4 + VCGT.U8 d16,d0,d13 + TST r12,#0xff + VMAX.U8 d12,d18,d12 + VABD.U8 d17,d10,d8 + VMOVEQ.32 d16[0],r11 + TST r12,#0xff00 + VCGT.U8 d19,d2,d19 + VCGT.U8 d12,d2,d12 + VMOVEQ.32 d16[1],r11 + VCGT.U8 d17,d2,d17 + VLD1.8 {d11},[r0] + VAND d16,d16,d12 + TST r12,#4 + VAND d12,d16,d17 + VAND d17,d16,d19 + BNE L0xf8 + SUB r0,r0,r1,LSL #2 + SUB r0,r0,r1 + BL armVCM4P10_DeblockingLumabSLT4_unsafe + VST1.8 {d30},[r0],r1 + VST1.8 {d29},[r0],r1 + SUB r6,r0,r1,LSL #2 + VST1.8 {d24},[r0],r1 + ADDS r9,r9,r9 + VST1.8 {d25},[r0] + ADD r0,r6,#8 + BCC L0x38 + B L0x130 +L0xe4: + ADD r0,r0,#8 + ADDS r9,r9,r9 + ADD r5,r5,#2 + BCC L0x38 + B L0x130 +L0xf8: + SUB r0,r0,r1,LSL #2 + SUB r0,r0,r1,LSL #1 + BL armVCM4P10_DeblockingLumabSGE4_unsafe + VST1.8 {d31},[r0],r1 + VST1.8 {d30},[r0],r1 + VST1.8 {d29},[r0],r1 + SUB r6,r0,r1,LSL #2 + VST1.8 {d24},[r0],r1 + ADDS r9,r9,r9 + VST1.8 {d25},[r0],r1 + ADD r5,r5,#2 + VST1.8 {d28},[r0] + ADD r0,r6,#8 + BCC L0x38 +L0x130: + SUB r0,r0,#0x10 + VLD1.8 {d0[]},[r7] + ADD r0,r0,r1,LSL #2 + VLD1.8 {d2[]},[r8] + BNE L0x38 + MOV r0,#0 + VPOP {d8-d15} + POP {r4-r12,pc} + .endfunc + + .end + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.S new file mode 100644 index 0000000000000000000000000000000000000000..66cc32ea2a00ebf6cbe7c519d069c8ac29362b7a --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.S @@ -0,0 +1,157 @@ +/* + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + */ + + .eabi_attribute 24, 1 + .eabi_attribute 25, 1 + + .arm + .fpu neon + .text + + .global omxVCM4P10_FilterDeblockingLuma_VerEdge_I + .func omxVCM4P10_FilterDeblockingLuma_VerEdge_I +omxVCM4P10_FilterDeblockingLuma_VerEdge_I: + PUSH {r4-r12,lr} + VPUSH {d8-d15} + ADD r7,r2,#1 + ADD r8,r3,#1 + VLD1.8 {d0[]},[r2] + SUB r0,r0,#4 + VLD1.8 {d2[]},[r3] + LDR r4,[sp,#0x6c] + LDR r5,[sp,#0x68] + MOV r6,#0 + VMOV.I8 d14,#0 + VMOV.I8 d15,#0x1 + MOV r9,#0x11000000 + ADD r11,r1,r1 +L0x38: + LDRH r12,[r4],#4 + CMP r12,#0 + BEQ L0x160 + ADD r10,r0,r1 + VLD1.8 {d7},[r0],r11 + VLD1.8 {d8},[r10],r11 + VLD1.8 {d5},[r0],r11 + VZIP.8 d7,d8 + VLD1.8 {d10},[r10],r11 + VLD1.8 {d6},[r0],r11 + VZIP.8 d5,d10 + VLD1.8 {d9},[r10],r11 + VLD1.8 {d4},[r0],r11 + VLD1.8 {d11},[r10],r11 + VZIP.8 d6,d9 + VZIP.16 d8,d10 + VZIP.8 d4,d11 + SUB r0,r0,r1,LSL #3 + VZIP.16 d7,d5 + VZIP.16 d9,d11 + VZIP.16 d6,d4 + VTRN.32 d8,d9 + VTRN.32 d5,d4 + VTRN.32 d10,d11 + VTRN.32 d7,d6 + VABD.U8 d13,d4,d8 + VABD.U8 d12,d5,d4 + VABD.U8 d18,d9,d8 + VABD.U8 d19,d6,d4 + TST r12,#0xff + VCGT.U8 d16,d0,d13 + VMAX.U8 d12,d18,d12 + VABD.U8 d17,d10,d8 + VMOVEQ.32 d16[0],r6 + TST r12,#0xff00 + VCGT.U8 d19,d2,d19 + VCGT.U8 d12,d2,d12 + VMOVEQ.32 d16[1],r6 + VCGT.U8 d17,d2,d17 + VAND d16,d16,d12 + TST r12,#4 + VAND d12,d16,d17 + VAND d17,d16,d19 + BNE L0x17c + BL armVCM4P10_DeblockingLumabSLT4_unsafe + VZIP.8 d7,d6 + VZIP.8 d30,d29 + VZIP.8 d24,d25 + VZIP.8 d10,d11 + VZIP.16 d7,d30 + ADD r10,r0,r1 + VZIP.16 d24,d10 + VZIP.16 d25,d11 + VZIP.16 d6,d29 + VTRN.32 d7,d24 + VTRN.32 d30,d10 + VTRN.32 d6,d25 + VTRN.32 d29,d11 + VST1.8 {d7},[r0],r11 + VST1.8 {d24},[r10],r11 + VST1.8 {d30},[r0],r11 + VST1.8 {d10},[r10],r11 + VST1.8 {d6},[r0],r11 + VST1.8 {d25},[r10],r11 + ADDS r9,r9,r9 + VST1.8 {d29},[r0],r11 + ADD r5,r5,#2 + VST1.8 {d11},[r10],r1 + SUB r0,r0,r1,LSL #3 + VLD1.8 {d0[]},[r7] + ADD r0,r0,#4 + VLD1.8 {d2[]},[r8] + BCC L0x38 + B L0x1f0 +L0x160: + ADD r0,r0,#4 + ADDS r9,r9,r9 + VLD1.8 {d0[]},[r7] + ADD r5,r5,#4 + VLD1.8 {d2[]},[r8] + BCC L0x38 + B L0x1f0 +L0x17c: + BL armVCM4P10_DeblockingLumabSGE4_unsafe + VZIP.8 d7,d31 + VZIP.8 d30,d29 + VZIP.8 d24,d25 + VZIP.8 d28,d11 + VZIP.16 d7,d30 + ADD r10,r0,r1 + VZIP.16 d24,d28 + VZIP.16 d25,d11 + VZIP.16 d31,d29 + VTRN.32 d7,d24 + VTRN.32 d30,d28 + VTRN.32 d31,d25 + VTRN.32 d29,d11 + VST1.8 {d7},[r0],r11 + VST1.8 {d24},[r10],r11 + VST1.8 {d30},[r0],r11 + VST1.8 {d28},[r10],r11 + VST1.8 {d31},[r0],r11 + VST1.8 {d25},[r10],r11 + ADDS r9,r9,r9 + VST1.8 {d29},[r0],r11 + ADD r5,r5,#4 + VST1.8 {d11},[r10],r11 + SUB r0,r0,r1,LSL #3 + VLD1.8 {d0[]},[r7] + ADD r0,r0,#4 + VLD1.8 {d2[]},[r8] + BCC L0x38 +L0x1f0: + SUB r4,r4,#0xe + SUB r5,r5,#0xe + SUB r0,r0,#0x10 + VLD1.8 {d0[]},[r2] + ADD r0,r0,r1,LSL #3 + VLD1.8 {d2[]},[r3] + BNE L0x38 + MOV r0,#0 + VPOP {d8-d15} + POP {r4-r12,pc} + .endfunc + + .end + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_InterpolateLuma_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_InterpolateLuma_s.S new file mode 100644 index 0000000000000000000000000000000000000000..76c3d7d1ecc199fde1dda9ee286be02b64ca5e49 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_InterpolateLuma_s.S @@ -0,0 +1,323 @@ +/* + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + */ + + .eabi_attribute 24, 1 + .eabi_attribute 25, 1 + + .arm + .fpu neon + .text + + .global omxVCM4P10_InterpolateLuma + .func omxVCM4P10_InterpolateLuma +omxVCM4P10_InterpolateLuma: + PUSH {r4-r12,lr} + VPUSH {d8-d15} + SUB sp,sp,#0x10 + LDR r6,[sp,#0x78] + LDR r7,[sp,#0x7c] + LDR r5,[sp,#0x80] + LDR r4,[sp,#0x84] + ADD r6,r6,r7,LSL #2 + ADD r11,sp,#0 + VMOV.I16 d31,#0x14 + VMOV.I16 d30,#0x5 +L0x2c: + STM r11,{r0-r3} + ADD pc,pc,r6,LSL #2 + B L0x3f0 + B L0x78 + B L0xa8 + B L0xdc + B L0x100 + B L0x134 + B L0x168 + B L0x1a8 + B L0x1f0 + B L0x234 + B L0x258 + B L0x2b0 + B L0x2d8 + B L0x330 + B L0x364 + B L0x3a8 + B L0x3f0 +L0x78: + ADD r12,r0,r1,LSL #1 + VLD1.8 {d9},[r0],r1 + VLD1.8 {d11},[r12],r1 + VLD1.8 {d10},[r0] + VLD1.8 {d12},[r12] + ADD r12,r2,r3,LSL #1 + VST1.32 {d9[0]},[r2],r3 + VST1.32 {d11[0]},[r12],r3 + VST1.32 {d10[0]},[r2] + VST1.32 {d12[0]},[r12] + ADD r11,sp,#0 + B L0x434 +L0xa8: + SUB r0,r0,#2 + BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe + VRHADD.U8 d22,d22,d14 + VRHADD.U8 d26,d26,d18 + VRHADD.U8 d24,d24,d16 + VRHADD.U8 d28,d28,d20 + ADD r12,r2,r3,LSL #1 + VST1.32 {d22[0]},[r2],r3 + VST1.32 {d26[0]},[r12],r3 + VST1.32 {d24[0]},[r2] + VST1.32 {d28[0]},[r12] + ADD r11,sp,#0 + B L0x434 +L0xdc: + SUB r0,r0,#2 + BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe + ADD r12,r2,r3,LSL #1 + VST1.32 {d22[0]},[r2],r3 + VST1.32 {d26[0]},[r12],r3 + VST1.32 {d24[0]},[r2] + VST1.32 {d28[0]},[r12] + ADD r11,sp,#0 + B L0x434 +L0x100: + SUB r0,r0,#2 + BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe + VRHADD.U8 d22,d22,d15 + VRHADD.U8 d26,d26,d19 + VRHADD.U8 d24,d24,d17 + VRHADD.U8 d28,d28,d21 + ADD r12,r2,r3,LSL #1 + VST1.32 {d22[0]},[r2],r3 + VST1.32 {d26[0]},[r12],r3 + VST1.32 {d24[0]},[r2] + VST1.32 {d28[0]},[r12] + ADD r11,sp,#0 + B L0x434 +L0x134: + SUB r0,r0,r1,LSL #1 + BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe + VRHADD.U8 d0,d0,d9 + VRHADD.U8 d4,d4,d11 + VRHADD.U8 d2,d2,d10 + VRHADD.U8 d6,d6,d12 + ADD r12,r2,r3,LSL #1 + VST1.32 {d0[0]},[r2],r3 + VST1.32 {d4[0]},[r12],r3 + VST1.32 {d2[0]},[r2] + VST1.32 {d6[0]},[r12] + ADD r11,sp,#0 + B L0x434 +L0x168: + MOV r8,r0 + SUB r0,r0,r1,LSL #1 + BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe + SUB r0,r8,#2 + BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe + VRHADD.U8 d22,d22,d0 + VRHADD.U8 d26,d26,d4 + VRHADD.U8 d24,d24,d2 + VRHADD.U8 d28,d28,d6 + ADD r12,r2,r3,LSL #1 + VST1.32 {d22[0]},[r2],r3 + VST1.32 {d26[0]},[r12],r3 + VST1.32 {d24[0]},[r2] + VST1.32 {d28[0]},[r12] + ADD r11,sp,#0 + B L0x434 +L0x1a8: + SUB r0,r0,r1,LSL #1 + SUB r0,r0,#2 + BL armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe + VQRSHRUN.S16 d14,q7,#5 + VQRSHRUN.S16 d16,q8,#5 + VQRSHRUN.S16 d18,q9,#5 + VQRSHRUN.S16 d20,q10,#5 + VRHADD.U8 d0,d0,d14 + VRHADD.U8 d4,d4,d18 + VRHADD.U8 d2,d2,d16 + VRHADD.U8 d6,d6,d20 + ADD r12,r2,r3,LSL #1 + VST1.32 {d0[0]},[r2],r3 + VST1.32 {d4[0]},[r12],r3 + VST1.32 {d2[0]},[r2] + VST1.32 {d6[0]},[r12] + ADD r11,sp,#0 + B L0x434 +L0x1f0: + MOV r8,r0 + ADD r0,r0,#1 + SUB r0,r0,r1,LSL #1 + BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe + SUB r0,r8,#2 + BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe + VRHADD.U8 d22,d22,d0 + VRHADD.U8 d26,d26,d4 + VRHADD.U8 d24,d24,d2 + VRHADD.U8 d28,d28,d6 + ADD r12,r2,r3,LSL #1 + VST1.32 {d22[0]},[r2],r3 + VST1.32 {d26[0]},[r12],r3 + VST1.32 {d24[0]},[r2] + VST1.32 {d28[0]},[r12] + ADD r11,sp,#0 + B L0x434 +L0x234: + SUB r0,r0,r1,LSL #1 + BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe + ADD r12,r2,r3,LSL #1 + VST1.32 {d0[0]},[r2],r3 + VST1.32 {d4[0]},[r12],r3 + VST1.32 {d2[0]},[r2] + VST1.32 {d6[0]},[r12] + ADD r11,sp,#0 + B L0x434 +L0x258: + SUB r0,r0,r1,LSL #1 + SUB r0,r0,#2 + BL armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe + VEXT.8 d18,d18,d19,#4 + VEXT.8 d20,d20,d21,#4 + VEXT.8 d22,d22,d23,#4 + VEXT.8 d24,d24,d25,#4 + VQRSHRUN.S16 d14,q9,#5 + VQRSHRUN.S16 d16,q10,#5 + VQRSHRUN.S16 d18,q11,#5 + VQRSHRUN.S16 d20,q12,#5 + VRHADD.U8 d0,d0,d14 + VRHADD.U8 d4,d4,d18 + VRHADD.U8 d2,d2,d16 + VRHADD.U8 d6,d6,d20 + ADD r12,r2,r3,LSL #1 + VST1.32 {d0[0]},[r2],r3 + VST1.32 {d4[0]},[r12],r3 + VST1.32 {d2[0]},[r2] + VST1.32 {d6[0]},[r12] + ADD r11,sp,#0 + B L0x434 +L0x2b0: + SUB r0,r0,r1,LSL #1 + SUB r0,r0,#2 + BL armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe + ADD r12,r2,r3,LSL #1 + VST1.32 {d0[0]},[r2],r3 + VST1.32 {d4[0]},[r12],r3 + VST1.32 {d2[0]},[r2] + VST1.32 {d6[0]},[r12] + ADD r11,sp,#0 + B L0x434 +L0x2d8: + SUB r0,r0,r1,LSL #1 + SUB r0,r0,#2 + BL armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe + VEXT.8 d18,d18,d19,#6 + VEXT.8 d20,d20,d21,#6 + VEXT.8 d22,d22,d23,#6 + VEXT.8 d24,d24,d25,#6 + VQRSHRUN.S16 d14,q9,#5 + VQRSHRUN.S16 d16,q10,#5 + VQRSHRUN.S16 d18,q11,#5 + VQRSHRUN.S16 d20,q12,#5 + VRHADD.U8 d0,d0,d14 + VRHADD.U8 d4,d4,d18 + VRHADD.U8 d2,d2,d16 + VRHADD.U8 d6,d6,d20 + ADD r12,r2,r3,LSL #1 + VST1.32 {d0[0]},[r2],r3 + VST1.32 {d4[0]},[r12],r3 + VST1.32 {d2[0]},[r2] + VST1.32 {d6[0]},[r12] + ADD r11,sp,#0 + B L0x434 +L0x330: + SUB r0,r0,r1,LSL #1 + BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe + VRHADD.U8 d0,d0,d10 + VRHADD.U8 d4,d4,d12 + VRHADD.U8 d2,d2,d11 + VRHADD.U8 d6,d6,d13 + ADD r12,r2,r3,LSL #1 + VST1.32 {d0[0]},[r2],r3 + VST1.32 {d4[0]},[r12],r3 + VST1.32 {d2[0]},[r2] + VST1.32 {d6[0]},[r12] + ADD r11,sp,#0 + B L0x434 +L0x364: + MOV r8,r0 + SUB r0,r0,r1,LSL #1 + BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe + ADD r0,r8,r1 + SUB r0,r0,#2 + BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe + VRHADD.U8 d22,d22,d0 + VRHADD.U8 d26,d26,d4 + VRHADD.U8 d24,d24,d2 + VRHADD.U8 d28,d28,d6 + ADD r12,r2,r3,LSL #1 + VST1.32 {d22[0]},[r2],r3 + VST1.32 {d26[0]},[r12],r3 + VST1.32 {d24[0]},[r2] + VST1.32 {d28[0]},[r12] + ADD r11,sp,#0 + B L0x434 +L0x3a8: + SUB r0,r0,r1,LSL #1 + SUB r0,r0,#2 + BL armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe + VQRSHRUN.S16 d14,q8,#5 + VQRSHRUN.S16 d16,q9,#5 + VQRSHRUN.S16 d18,q10,#5 + VQRSHRUN.S16 d20,q11,#5 + VRHADD.U8 d0,d0,d14 + VRHADD.U8 d4,d4,d18 + VRHADD.U8 d2,d2,d16 + VRHADD.U8 d6,d6,d20 + ADD r12,r2,r3,LSL #1 + VST1.32 {d0[0]},[r2],r3 + VST1.32 {d4[0]},[r12],r3 + VST1.32 {d2[0]},[r2] + VST1.32 {d6[0]},[r12] + ADD r11,sp,#0 + B L0x434 +L0x3f0: + MOV r8,r0 + ADD r0,r0,#1 + SUB r0,r0,r1,LSL #1 + BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe + ADD r0,r8,r1 + SUB r0,r0,#2 + BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe + VRHADD.U8 d22,d22,d0 + VRHADD.U8 d26,d26,d4 + VRHADD.U8 d24,d24,d2 + VRHADD.U8 d28,d28,d6 + ADD r12,r2,r3,LSL #1 + VST1.32 {d22[0]},[r2],r3 + VST1.32 {d26[0]},[r12],r3 + VST1.32 {d24[0]},[r2] + VST1.32 {d28[0]},[r12] + ADD r11,sp,#0 +L0x434: + LDM r11,{r0-r3} + SUBS r5,r5,#4 + ADD r0,r0,#4 + ADD r2,r2,#4 + BGT L0x2c + SUBS r4,r4,#4 + LDR r5,[sp,#0x80] + ADD r11,sp,#0 + ADD r0,r0,r1,LSL #2 + ADD r2,r2,r3,LSL #2 + SUB r0,r0,r5 + SUB r2,r2,r5 + BGT L0x2c + MOV r0,#0 + ADD sp,sp,#0x10 + VPOP {d8-d15} + POP {r4-r12,pc} + .endfunc + + .end + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntraChroma_8x8_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntraChroma_8x8_s.S new file mode 100644 index 0000000000000000000000000000000000000000..0d49e4bde023fc0cb9241d67a081cca88da8f217 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntraChroma_8x8_s.S @@ -0,0 +1,217 @@ +/* + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + */ + + .eabi_attribute 24, 1 + .eabi_attribute 25, 1 + + .arm + .fpu neon + .section .rodata + .align 4 + +armVCM4P10_pIndexTable8x8: + .word OMX_VC_CHROMA_DC, OMX_VC_CHROMA_HOR + .word OMX_VC_CHROMA_VERT, OMX_VC_CHROMA_PLANE + +armVCM4P10_MultiplierTableChroma8x8: + .hword 3, 2, 1,4 + .hword -3,-2,-1,0 + .hword 1, 2, 3,4 + + + .text + .global omxVCM4P10_PredictIntraChroma_8x8 + .func omxVCM4P10_PredictIntraChroma_8x8 +omxVCM4P10_PredictIntraChroma_8x8: + PUSH {r4-r10,lr} + VPUSH {d8-d15} + LDR r8, =armVCM4P10_pIndexTable8x8 + LDR r6,[sp,#0x68] + LDR r4,[sp,#0x60] + LDR r5,[sp,#0x64] + LDR r7,[sp,#0x6c] + LDR pc,[r8,r6,LSL #2] +OMX_VC_CHROMA_DC: + TST r7,#2 + BEQ L0xe8 + ADD r9,r0,r4 + ADD r10,r4,r4 + VLD1.8 {d1[0]},[r0],r10 + VLD1.8 {d1[1]},[r9],r10 + VLD1.8 {d1[2]},[r0],r10 + VLD1.8 {d1[3]},[r9],r10 + VLD1.8 {d1[4]},[r0],r10 + VLD1.8 {d1[5]},[r9],r10 + VLD1.8 {d1[6]},[r0],r10 + VLD1.8 {d1[7]},[r9] + TST r7,#1 + BEQ L0xcc + VLD1.8 {d0},[r1] + MOV r0,#0 + VPADDL.U8 d2,d0 + VPADDL.U16 d3,d2 + VPADDL.U8 d2,d1 + VPADDL.U16 d1,d2 + VADD.I32 d2,d3,d1 + VRSHR.U32 d2,d2,#3 + VRSHR.U32 d3,d3,#2 + VRSHR.U32 d1,d1,#2 + VMOV.I8 d5,#0xc + VMOV.I8 d6,#0x4 + VSHL.I64 d5,d5,#32 + VSHR.U64 d6,d6,#32 + VADD.I8 d6,d6,d5 + VTBL.8 d0,{d2-d3},d5 + VTBL.8 d4,{d1-d2},d6 +L0x9c: + ADD r9,r3,r5 + ADD r10,r5,r5 + VST1.8 {d0},[r3],r10 + VST1.8 {d0},[r9],r10 + VST1.8 {d0},[r3],r10 + VST1.8 {d0},[r9],r10 + VST1.8 {d4},[r3],r10 + VST1.8 {d4},[r9],r10 + VST1.8 {d4},[r3],r10 + VST1.8 {d4},[r9] + VPOP {d8-d15} + POP {r4-r10,pc} +L0xcc: + MOV r0,#0 + VPADDL.U8 d2,d1 + VPADDL.U16 d1,d2 + VRSHR.U32 d1,d1,#2 + VDUP.8 d0,d1[0] + VDUP.8 d4,d1[4] + B L0x9c +L0xe8: + TST r7,#1 + BEQ L0x114 + VLD1.8 {d0},[r1] + MOV r0,#0 + VPADDL.U8 d2,d0 + VPADDL.U16 d3,d2 + VRSHR.U32 d3,d3,#2 + VMOV.I8 d5,#0x4 + VSHL.I64 d5,d5,#32 + VTBL.8 d0,{d3},d5 + B L0x11c +L0x114: + VMOV.I8 d0,#0x80 + MOV r0,#0 +L0x11c: + ADD r9,r3,r5 + ADD r10,r5,r5 + VST1.8 {d0},[r3],r10 + VST1.8 {d0},[r9],r10 + VST1.8 {d0},[r3],r10 + VST1.8 {d0},[r9],r10 + VST1.8 {d0},[r3],r10 + VST1.8 {d0},[r9],r10 + VST1.8 {d0},[r3],r10 + VST1.8 {d0},[r9] + VPOP {d8-d15} + POP {r4-r10,pc} +OMX_VC_CHROMA_VERT: + VLD1.8 {d0},[r1] + MOV r0,#0 + B L0x11c +OMX_VC_CHROMA_HOR: + ADD r9,r0,r4 + ADD r10,r4,r4 + VLD1.8 {d0[]},[r0],r10 + VLD1.8 {d1[]},[r9],r10 + VLD1.8 {d2[]},[r0],r10 + VLD1.8 {d3[]},[r9],r10 + VLD1.8 {d4[]},[r0],r10 + VLD1.8 {d5[]},[r9],r10 + VLD1.8 {d6[]},[r0],r10 + VLD1.8 {d7[]},[r9] + B L0x28c +OMX_VC_CHROMA_PLANE: + ADD r9,r0,r4 + ADD r10,r4,r4 + VLD1.8 {d0},[r1] + VLD1.8 {d2[0]},[r2] + VLD1.8 {d1[0]},[r0],r10 + VLD1.8 {d1[1]},[r9],r10 + VLD1.8 {d1[2]},[r0],r10 + VLD1.8 {d1[3]},[r9],r10 + VLD1.8 {d1[4]},[r0],r10 + VLD1.8 {d1[5]},[r9],r10 + VLD1.8 {d1[6]},[r0],r10 + VLD1.8 {d1[7]},[r9] + VREV64.8 d3,d0 + VSUBL.U8 q3,d3,d2 + VSHR.U64 d3,d3,#8 + VSUBL.U8 q2,d3,d0 + VREV64.8 d3,d1 + VSUBL.U8 q7,d3,d2 + VSHR.U64 d3,d3,#8 + VSUBL.U8 q6,d3,d1 + LDR r2, =armVCM4P10_MultiplierTableChroma8x8 + VSHL.I64 d4,d4,#16 + VEXT.8 d9,d4,d6,#2 + VLD1.16 {d10},[r2]! + VSHL.I64 d12,d12,#16 + VEXT.8 d16,d12,d14,#2 + VMUL.I16 d11,d9,d10 + VMUL.I16 d3,d16,d10 + VPADD.I16 d3,d11,d3 + VPADDL.S16 d3,d3 + VSHL.I32 d2,d3,#4 + VADD.I32 d3,d3,d2 + VLD1.16 {d10,d11},[r2] + VRSHR.S32 d3,d3,#5 + VADDL.U8 q0,d0,d1 + VDUP.16 q0,d1[3] + VSHL.I16 q0,q0,#4 + VDUP.16 q2,d3[0] + VDUP.16 q3,d3[2] + VMUL.I16 q2,q2,q5 + VMUL.I16 q3,q3,q5 + VADD.I16 q2,q2,q0 + VDUP.16 q0,d6[0] + VDUP.16 q1,d6[1] + VDUP.16 q4,d6[2] + VDUP.16 q5,d6[3] + VDUP.16 q6,d7[0] + VDUP.16 q7,d7[1] + VDUP.16 q8,d7[2] + VDUP.16 q9,d7[3] + VADD.I16 q0,q2,q0 + VADD.I16 q1,q2,q1 + VADD.I16 q4,q2,q4 + VADD.I16 q5,q2,q5 + VADD.I16 q6,q2,q6 + VADD.I16 q7,q2,q7 + VADD.I16 q8,q2,q8 + VADD.I16 q9,q2,q9 + VQRSHRUN.S16 d0,q0,#5 + VQRSHRUN.S16 d1,q1,#5 + VQRSHRUN.S16 d2,q4,#5 + VQRSHRUN.S16 d3,q5,#5 + VQRSHRUN.S16 d4,q6,#5 + VQRSHRUN.S16 d5,q7,#5 + VQRSHRUN.S16 d6,q8,#5 + VQRSHRUN.S16 d7,q9,#5 +L0x28c: + ADD r9,r3,r5 + ADD r10,r5,r5 + VST1.8 {d0},[r3],r10 + VST1.8 {d1},[r9],r10 + VST1.8 {d2},[r3],r10 + VST1.8 {d3},[r9],r10 + VST1.8 {d4},[r3],r10 + VST1.8 {d5},[r9],r10 + VST1.8 {d6},[r3],r10 + VST1.8 {d7},[r9] + MOV r0,#0 + VPOP {d8-d15} + POP {r4-r10,pc} + .endfunc + + .end + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntra_16x16_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntra_16x16_s.S new file mode 100644 index 0000000000000000000000000000000000000000..53268f6190047fb331a11ba25b3c285158b7631a --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntra_16x16_s.S @@ -0,0 +1,239 @@ +/* + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + */ + + .eabi_attribute 24, 1 + .eabi_attribute 25, 1 + + .arm + .fpu neon + + .section .rodata + .align 4 +;//------------------------------------------------------- +;// This table for implementing switch case of C in asm by +;// the mehtod of two levels of indexing. +;//------------------------------------------------------- + +armVCM4P10_pIndexTable16x16: + .word OMX_VC_16X16_VERT, OMX_VC_16X16_HOR + .word OMX_VC_16X16_DC, OMX_VC_16X16_PLANE + + + +armVCM4P10_MultiplierTable16x16: + .hword 7, 6, 5, 4, 3, 2, 1, 8 + .hword 0, 1, 2, 3, 4, 5, 6, 7 + .hword 8, 9, 10, 11, 12, 13, 14, 15 + + .text + + .global omxVCM4P10_PredictIntra_16x16 + .func omxVCM4P10_PredictIntra_16x16 +omxVCM4P10_PredictIntra_16x16: + PUSH {r4-r12,lr} + VPUSH {d8-d15} + LDR r9, =armVCM4P10_pIndexTable16x16 + LDR r6,[sp,#0x70] + LDR r4,[sp,#0x68] + LDR r5,[sp,#0x6c] + LDR r7,[sp,#0x74] + MOV r12,#0x10 + LDR pc,[r9,r6,LSL #2] +OMX_VC_16X16_VERT: + VLD1.8 {d0,d1},[r1] + ADD r8,r3,r5 + ADD r10,r5,r5 + VST1.8 {d0,d1},[r3],r10 + VST1.8 {d0,d1},[r8],r10 + VST1.8 {d0,d1},[r3],r10 + VST1.8 {d0,d1},[r8],r10 + VST1.8 {d0,d1},[r3],r10 + VST1.8 {d0,d1},[r8],r10 + VST1.8 {d0,d1},[r3],r10 + VST1.8 {d0,d1},[r8],r10 + VST1.8 {d0,d1},[r3],r10 + VST1.8 {d0,d1},[r8],r10 + VST1.8 {d0,d1},[r3],r10 + VST1.8 {d0,d1},[r8],r10 + VST1.8 {d0,d1},[r3],r10 + VST1.8 {d0,d1},[r8],r10 + VST1.8 {d0,d1},[r3] + VST1.8 {d0,d1},[r8] + MOV r0,#0 + VPOP {d8-d15} + POP {r4-r12,pc} +OMX_VC_16X16_HOR: + ADD r8,r0,r4 + ADD r4,r4,r4 + ADD r11,r3,r5 + ADD r5,r5,r5 +L0x8c: + VLD1.8 {d2[],d3[]},[r0],r4 + VLD1.8 {d0[],d1[]},[r8],r4 + SUBS r12,r12,#8 + VST1.8 {d2,d3},[r3],r5 + VST1.8 {d0,d1},[r11],r5 + VLD1.8 {d2[],d3[]},[r0],r4 + VLD1.8 {d0[],d1[]},[r8],r4 + VST1.8 {d2,d3},[r3],r5 + VST1.8 {d0,d1},[r11],r5 + VLD1.8 {d2[],d3[]},[r0],r4 + VLD1.8 {d0[],d1[]},[r8],r4 + VST1.8 {d2,d3},[r3],r5 + VST1.8 {d0,d1},[r11],r5 + VLD1.8 {d2[],d3[]},[r0],r4 + VLD1.8 {d0[],d1[]},[r8],r4 + VST1.8 {d2,d3},[r3],r5 + VST1.8 {d0,d1},[r11],r5 + BNE L0x8c + MOV r0,#0 + VPOP {d8-d15} + POP {r4-r12,pc} +OMX_VC_16X16_DC: + MOV r11,#0 + TST r7,#2 + BEQ L0x14c + ADD r8,r0,r4 + ADD r10,r4,r4 + VLD1.8 {d2[0]},[r0],r10 + VLD1.8 {d2[1]},[r8],r10 + VLD1.8 {d2[2]},[r0],r10 + VLD1.8 {d2[3]},[r8],r10 + VLD1.8 {d2[4]},[r0],r10 + VLD1.8 {d2[5]},[r8],r10 + VLD1.8 {d2[6]},[r0],r10 + VLD1.8 {d2[7]},[r8],r10 + VLD1.8 {d3[0]},[r0],r10 + VLD1.8 {d3[1]},[r8],r10 + VLD1.8 {d3[2]},[r0],r10 + VLD1.8 {d3[3]},[r8],r10 + VLD1.8 {d3[4]},[r0],r10 + VLD1.8 {d3[5]},[r8],r10 + VLD1.8 {d3[6]},[r0],r10 + VLD1.8 {d3[7]},[r8] + VPADDL.U8 q0,q1 + ADD r11,r11,#1 + VPADD.I16 d0,d0,d1 + VPADDL.U16 d0,d0 + VPADDL.U32 d6,d0 + VRSHR.U64 d8,d6,#4 +L0x14c: + TST r7,#1 + BEQ L0x170 + VLD1.8 {d0,d1},[r1] + ADD r11,r11,#1 + VPADDL.U8 q0,q0 + VPADD.I16 d0,d0,d1 + VPADDL.U16 d0,d0 + VPADDL.U32 d7,d0 + VRSHR.U64 d8,d7,#4 +L0x170: + CMP r11,#2 + BNE L0x180 + VADD.I64 d8,d7,d6 + VRSHR.U64 d8,d8,#5 +L0x180: + VDUP.8 q3,d8[0] + CMP r11,#0 + ADD r8,r3,r5 + ADD r10,r5,r5 + BNE L0x198 + VMOV.I8 q3,#0x80 +L0x198: + VST1.8 {d6,d7},[r3],r10 + VST1.8 {d6,d7},[r8],r10 + VST1.8 {d6,d7},[r3],r10 + VST1.8 {d6,d7},[r8],r10 + VST1.8 {d6,d7},[r3],r10 + VST1.8 {d6,d7},[r8],r10 + VST1.8 {d6,d7},[r3],r10 + VST1.8 {d6,d7},[r8],r10 + VST1.8 {d6,d7},[r3],r10 + VST1.8 {d6,d7},[r8],r10 + VST1.8 {d6,d7},[r3],r10 + VST1.8 {d6,d7},[r8],r10 + VST1.8 {d6,d7},[r3],r10 + VST1.8 {d6,d7},[r8],r10 + VST1.8 {d6,d7},[r3],r10 + VST1.8 {d6,d7},[r8],r10 + MOV r0,#0 + VPOP {d8-d15} + POP {r4-r12,pc} +OMX_VC_16X16_PLANE: + LDR r9, =armVCM4P10_MultiplierTable16x16 + VLD1.8 {d0,d1},[r1] + VLD1.8 {d4[0]},[r2] + ADD r8,r0,r4 + ADD r10,r4,r4 + VLD1.8 {d2[0]},[r0],r10 + VLD1.8 {d2[1]},[r8],r10 + VLD1.8 {d2[2]},[r0],r10 + VLD1.8 {d2[3]},[r8],r10 + VLD1.8 {d2[4]},[r0],r10 + VLD1.8 {d2[5]},[r8],r10 + VLD1.8 {d2[6]},[r0],r10 + VLD1.8 {d2[7]},[r8],r10 + VLD1.8 {d3[0]},[r0],r10 + VLD1.8 {d3[1]},[r8],r10 + VLD1.8 {d3[2]},[r0],r10 + VLD1.8 {d3[3]},[r8],r10 + VLD1.8 {d3[4]},[r0],r10 + VLD1.8 {d3[5]},[r8],r10 + VLD1.8 {d3[6]},[r0],r10 + VLD1.8 {d3[7]},[r8] + VREV64.8 d5,d1 + VSUBL.U8 q3,d5,d4 + VSHR.U64 d5,d5,#8 + VSUBL.U8 q4,d5,d0 + VSHL.I64 d9,d9,#16 + VEXT.8 d9,d9,d6,#2 + VREV64.8 d12,d3 + VSUBL.U8 q7,d12,d4 + VSHR.U64 d12,d12,#8 + VSUBL.U8 q8,d12,d2 + VLD1.16 {d20,d21},[r9]! + VSHL.I64 d17,d17,#16 + VEXT.8 d17,d17,d14,#2 + VMULL.S16 q11,d8,d20 + VMULL.S16 q12,d16,d20 + VMLAL.S16 q11,d9,d21 + VMLAL.S16 q12,d17,d21 + VPADD.I32 d22,d23,d22 + VPADD.I32 d23,d25,d24 + VPADDL.S32 q11,q11 + VSHL.I64 q12,q11,#2 + VADD.I64 q11,q11,q12 + VRSHR.S64 q11,q11,#6 + VSHL.I64 q12,q11,#3 + VSUB.I64 q12,q12,q11 + VLD1.16 {d20,d21},[r9]! + VDUP.16 q6,d22[0] + VDUP.16 q7,d23[0] + VADDL.U8 q11,d1,d3 + VSHL.I16 q11,q11,#4 + VDUP.16 q11,d23[3] + VADD.I64 d1,d24,d25 + VLD1.16 {d24,d25},[r9] + VDUP.16 q13,d1[0] + VSUB.I16 q13,q11,q13 + VMUL.I16 q5,q6,q10 + VMUL.I16 q6,q6,q12 + VADD.I16 q0,q5,q13 + VADD.I16 q1,q6,q13 +L0x2d4: + VQRSHRUN.S16 d6,q0,#5 + VQRSHRUN.S16 d7,q1,#5 + SUBS r12,r12,#1 + VST1.8 {d6,d7},[r3],r5 + VADD.I16 q0,q0,q7 + VADD.I16 q1,q1,q7 + BNE L0x2d4 + MOV r0,#0 + VPOP {d8-d15} + POP {r4-r12,pc} + .endfunc + + .end + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntra_4x4_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntra_4x4_s.S new file mode 100644 index 0000000000000000000000000000000000000000..aa6d7ef839e715d542647dacf68ea29ac3c0ae0b --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntra_4x4_s.S @@ -0,0 +1,261 @@ +/* + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + */ + + .eabi_attribute 24, 1 + .eabi_attribute 25, 1 + + .arm + .fpu neon + + .section .rodata + .align 4 + +armVCM4P10_pSwitchTable4x4: + .word OMX_VC_4x4_VERT, OMX_VC_4x4_HOR + .word OMX_VC_4x4_DC, OMX_VC_4x4_DIAG_DL + .word OMX_VC_4x4_DIAG_DR, OMX_VC_4x4_VR + .word OMX_VC_4x4_HD, OMX_VC_4x4_VL + .word OMX_VC_4x4_HU + + .text + + .global omxVCM4P10_PredictIntra_4x4 + .func omxVCM4P10_PredictIntra_4x4 +omxVCM4P10_PredictIntra_4x4: + PUSH {r4-r12,lr} + VPUSH {d8-d12} + LDR r8, =armVCM4P10_pSwitchTable4x4 + LDRD r6,r7,[sp,#0x58] + LDRD r4,r5,[sp,#0x50] + LDR pc,[r8,r6,LSL #2] +OMX_VC_4x4_HOR: + ADD r9,r0,r4 + ADD r10,r4,r4 + VLD1.8 {d0[]},[r0],r10 + VLD1.8 {d1[]},[r9],r10 + VLD1.8 {d2[]},[r0] + VLD1.8 {d3[]},[r9] + ADD r11,r3,r5 + ADD r12,r5,r5 + VST1.32 {d0[0]},[r3],r12 + VST1.32 {d1[0]},[r11],r12 + VST1.32 {d2[0]},[r3] + VST1.32 {d3[0]},[r11] + B L0x348 +OMX_VC_4x4_VERT: + VLD1.32 {d0[0]},[r1] + ADD r11,r3,r5 + ADD r12,r5,r5 +L0x58: + VST1.32 {d0[0]},[r3],r12 + VST1.32 {d0[0]},[r11],r12 + VST1.32 {d0[0]},[r3] + VST1.32 {d0[0]},[r11] + B L0x348 +OMX_VC_4x4_DC: + TST r7,#2 + BEQ L0xdc + ADD r9,r0,r4 + ADD r10,r4,r4 + VLD1.8 {d0[0]},[r0],r10 + VLD1.8 {d0[1]},[r9],r10 + VLD1.8 {d0[2]},[r0] + VLD1.8 {d0[3]},[r9] + TST r7,#1 + BEQ L0xbc + VLD1.32 {d0[1]},[r1] + MOV r0,#0 + VPADDL.U8 d1,d0 + VPADDL.U16 d1,d1 + VPADDL.U32 d1,d1 + VRSHR.U64 d1,d1,#3 + ADD r11,r3,r5 + ADD r12,r5,r5 + VDUP.8 d0,d1[0] + B L0x58 +L0xbc: + MOV r0,#0 + VPADDL.U8 d1,d0 + VPADDL.U16 d1,d1 + VRSHR.U32 d1,d1,#2 + ADD r11,r3,r5 + ADD r12,r5,r5 + VDUP.8 d0,d1[0] + B L0x58 +L0xdc: + TST r7,#1 + BEQ L0x108 + VLD1.32 {d0[0]},[r1] + MOV r0,#0 + VPADDL.U8 d1,d0 + VPADDL.U16 d1,d1 + VRSHR.U32 d1,d1,#2 + ADD r11,r3,r5 + ADD r12,r5,r5 + VDUP.8 d0,d1[0] + B L0x58 +L0x108: + VMOV.I8 d0,#0x80 + MOV r0,#0 + ADD r11,r3,r5 + ADD r12,r5,r5 + B L0x58 +OMX_VC_4x4_DIAG_DL: + TST r7,#0x40 + BEQ L0x138 + VLD1.8 {d3},[r1] + VDUP.8 d2,d3[7] + VEXT.8 d4,d3,d2,#1 + VEXT.8 d5,d3,d2,#2 + B L0x14c +L0x138: + VLD1.32 {d0[1]},[r1] + VDUP.8 d2,d0[7] + VEXT.8 d3,d0,d2,#4 + VEXT.8 d4,d0,d2,#5 + VEXT.8 d5,d0,d2,#6 +L0x14c: + VHADD.U8 d6,d3,d5 + VRHADD.U8 d6,d6,d4 + VST1.32 {d6[0]},[r3],r5 + VEXT.8 d6,d6,d6,#1 + VST1.32 {d6[0]},[r3],r5 + VEXT.8 d6,d6,d6,#1 + VST1.32 {d6[0]},[r3],r5 + VEXT.8 d6,d6,d6,#1 + VST1.32 {d6[0]},[r3] + B L0x348 +OMX_VC_4x4_DIAG_DR: + VLD1.32 {d0[0]},[r1] + VLD1.8 {d1[7]},[r2] + ADD r9,r0,r4 + ADD r10,r4,r4 + ADD r1,r3,r5 + VLD1.8 {d1[6]},[r0],r10 + VLD1.8 {d1[5]},[r9],r10 + VLD1.8 {d1[4]},[r0] + VLD1.8 {d1[3]},[r9] + VEXT.8 d3,d1,d0,#3 + ADD r4,r1,r5 + VEXT.8 d4,d1,d0,#4 + ADD r6,r4,r5 + VEXT.8 d5,d1,d0,#5 + VHADD.U8 d6,d3,d5 + VRHADD.U8 d6,d6,d4 + VST1.32 {d6[0]},[r6] + VEXT.8 d6,d6,d6,#1 + VST1.32 {d6[0]},[r4] + VEXT.8 d6,d6,d6,#1 + VST1.32 {d6[0]},[r1] + VEXT.8 d6,d6,d6,#1 + VST1.32 {d6[0]},[r3] + B L0x348 +OMX_VC_4x4_VR: + VLD1.32 {d0[0]},[r1] + VLD1.8 {d0[7]},[r2] + VLD1.8 {d1[7]},[r0],r4 + VLD1.8 {d2[7]},[r0],r4 + VLD1.8 {d1[6]},[r0] + VEXT.8 d12,d0,d0,#7 + VEXT.8 d3,d1,d12,#6 + VEXT.8 d4,d2,d12,#7 + VEXT.8 d5,d1,d0,#7 + VEXT.8 d6,d2,d0,#7 + VEXT.8 d11,d1,d12,#7 + VHADD.U8 d8,d6,d12 + VRHADD.U8 d8,d8,d11 + VHADD.U8 d7,d3,d5 + VRHADD.U8 d7,d7,d4 + VEXT.8 d10,d8,d8,#1 + ADD r11,r3,r5 + ADD r12,r5,r5 + VEXT.8 d9,d7,d7,#1 + VST1.32 {d10[0]},[r3],r12 + VST1.32 {d9[0]},[r11],r12 + VST1.32 {d8[0]},[r3],r12 + VST1.32 {d7[0]},[r11] + B L0x348 +OMX_VC_4x4_HD: + VLD1.8 {d0},[r1] + VLD1.8 {d1[7]},[r2] + ADD r9,r0,r4 + ADD r10,r4,r4 + VLD1.8 {d1[6]},[r0],r10 + VLD1.8 {d1[5]},[r9],r10 + VLD1.8 {d1[4]},[r0] + VLD1.8 {d1[3]},[r9] + VEXT.8 d3,d1,d0,#3 + VEXT.8 d4,d1,d0,#2 + VEXT.8 d5,d1,d0,#1 + VHADD.U8 d7,d3,d5 + VRHADD.U8 d7,d7,d4 + VRHADD.U8 d8,d4,d3 + VSHL.I64 d8,d8,#24 + VSHL.I64 d6,d7,#16 + VZIP.8 d8,d6 + VEXT.8 d7,d7,d7,#6 + VEXT.8 d8,d6,d7,#2 + ADD r11,r3,r5 + ADD r12,r5,r5 + VST1.32 {d8[1]},[r3],r12 + VST1.32 {d6[1]},[r11],r12 + VST1.32 {d8[0]},[r3] + VST1.32 {d6[0]},[r11] + B L0x348 +OMX_VC_4x4_VL: + TST r7,#0x40 + BEQ L0x2b4 + VLD1.8 {d3},[r1] + VEXT.8 d4,d3,d3,#1 + VEXT.8 d5,d4,d4,#1 + B L0x2c8 +L0x2b4: + VLD1.32 {d0[1]},[r1] + VDUP.8 d2,d0[7] + VEXT.8 d3,d0,d2,#4 + VEXT.8 d4,d0,d2,#5 + VEXT.8 d5,d0,d2,#6 +L0x2c8: + VRHADD.U8 d7,d4,d3 + VHADD.U8 d10,d3,d5 + VRHADD.U8 d10,d10,d4 + VEXT.8 d8,d7,d7,#1 + ADD r11,r3,r5 + ADD r12,r5,r5 + VEXT.8 d9,d10,d8,#1 + VST1.32 {d7[0]},[r3],r12 + VST1.32 {d10[0]},[r11],r12 + VST1.32 {d8[0]},[r3] + VST1.32 {d9[0]},[r11] + B L0x348 +OMX_VC_4x4_HU: + ADD r9,r0,r4 + ADD r10,r4,r4 + VLD1.8 {d1[4]},[r0],r10 + VLD1.8 {d1[5]},[r9],r10 + VLD1.8 {d1[6]},[r0] + VLD1.8 {d1[7]},[r9] + VDUP.8 d2,d1[7] + VEXT.8 d3,d1,d2,#4 + VEXT.8 d4,d1,d2,#5 + VEXT.8 d5,d1,d2,#6 + VHADD.U8 d7,d3,d5 + VRHADD.U8 d7,d7,d4 + VRHADD.U8 d8,d4,d3 + VZIP.8 d8,d7 + VST1.32 {d8[0]},[r3],r5 + VEXT.8 d8,d8,d8,#2 + VST1.32 {d8[0]},[r3],r5 + VEXT.8 d8,d8,d8,#2 + VST1.32 {d8[0]},[r3],r5 + VST1.32 {d7[0]},[r3] +L0x348: + MOV r0,#0 + VPOP {d8-d12} + POP {r4-r12,pc} + .endfunc + + .end + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_TransformDequantChromaDCFromPair_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_TransformDequantChromaDCFromPair_s.S new file mode 100644 index 0000000000000000000000000000000000000000..28a89cb3f03384cf3ee82e57e9f1a029ff5bff32 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_TransformDequantChromaDCFromPair_s.S @@ -0,0 +1,54 @@ +/* + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + */ + + .eabi_attribute 24, 1 + .eabi_attribute 25, 1 + + .arm + .fpu neon + .text + + .global omxVCM4P10_TransformDequantChromaDCFromPair + .func omxVCM4P10_TransformDequantChromaDCFromPair +omxVCM4P10_TransformDequantChromaDCFromPair: + push {r4-r10, lr} + ldr r9, [r0,#0] + vmov.i16 d0, #0 + mov r8, #0x1f + vst1.16 {d0}, [r1] + ldrb r6, [r9], #1 +unpackLoop: + tst r6, #0x10 + ldrnesb r5, [r9, #1] + ldrneb r4, [r9], #2 + and r7, r8, r6, lsl #1 + ldreqsb r4, [r9], #1 + orrne r4, r4, r5, lsl #8 + tst r6, #0x20 + ldreqb r6, [r9], #1 + strh r4, [r1, r7] + beq unpackLoop + ldmia r1, {r3, r4} + str r9, [r0, #0] + ldr r5, =armVCM4P10_QPDivTable + ldr r6, =armVCM4P10_VMatrixQPModTable + saddsubx r3, r3, r3 + saddsubx r4, r4, r4 + ldrsb r9, [r5, r2] + ldrsb r2, [r6, r2] + sadd16 r5, r3, r4 + ssub16 r6, r3, r4 + lsl r2, r2, r9 + vmov d0, r5, r6 + vrev32.16 d0, d0 + vdup.16 d1, r2 + vmull.s16 q1, d0, d1 + vshrn.i32 d2, q1, #1 + vst1.16 {d2}, [r1] + mov r0, #0 + pop {r4-r10, pc} + .endfunc + + .end diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_TransformDequantLumaDCFromPair_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_TransformDequantLumaDCFromPair_s.S new file mode 100644 index 0000000000000000000000000000000000000000..a3a071539580efc16e8d0f16d9f6bc17cfe2ab1e --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_TransformDequantLumaDCFromPair_s.S @@ -0,0 +1,76 @@ +/* + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + */ + + .eabi_attribute 24, 1 + .eabi_attribute 25, 1 + + .arm + .fpu neon + .text + + .global armVCM4P10_InvTransformDequantLumaDC4x4 + .func armVCM4P10_InvTransformDequantLumaDC4x4 +armVCM4P10_InvTransformDequantLumaDC4x4: + PUSH {r4-r6,lr} + VPUSH {d8-d13} + VLD4.16 {d0,d1,d2,d3},[r0] + LDR r2, =armVCM4P10_QPDivTable + LDR r3, =armVCM4P10_VMatrixQPModTable + VADD.I16 d4,d0,d1 + VADD.I16 d5,d2,d3 + VSUB.I16 d6,d0,d1 + LDRSB r4,[r2,r1] + VSUB.I16 d7,d2,d3 + LDRSB r5,[r3,r1] + VADD.I16 d0,d4,d5 + VSUB.I16 d1,d4,d5 + VSUB.I16 d2,d6,d7 + LSL r5,r5,r4 + VADD.I16 d3,d6,d7 + VTRN.16 d0,d1 + VTRN.16 d2,d3 + VTRN.32 q0,q1 + VADD.I16 d4,d0,d1 + VADD.I16 d5,d2,d3 + VSUB.I16 d6,d0,d1 + VSUB.I16 d7,d2,d3 + VADD.I16 d0,d4,d5 + VSUB.I16 d1,d4,d5 + VSUB.I16 d2,d6,d7 + VADD.I16 d3,d6,d7 + VDUP.16 d5,r5 + VMOV.I32 q3,#0x2 + VMOV.I32 q4,#0x2 + VMOV.I32 q5,#0x2 + VMOV.I32 q6,#0x2 + VMLAL.S16 q3,d0,d5 + VMLAL.S16 q4,d1,d5 + VMLAL.S16 q5,d2,d5 + VMLAL.S16 q6,d3,d5 + VSHRN.I32 d0,q3,#2 + VSHRN.I32 d1,q4,#2 + VSHRN.I32 d2,q5,#2 + VSHRN.I32 d3,q6,#2 + VST1.16 {d0,d1,d2,d3},[r0] + VPOP {d8-d13} + POP {r4-r6,pc} + .endfunc + +.global omxVCM4P10_TransformDequantLumaDCFromPair +.func omxVCM4P10_TransformDequantLumaDCFromPair +omxVCM4P10_TransformDequantLumaDCFromPair: + PUSH {r4-r6,lr} + MOV r4,r1 + MOV r5,r2 + BL armVCM4P10_UnpackBlock4x4 + MOV r0,r4 + MOV r1,r5 + BL armVCM4P10_InvTransformDequantLumaDC4x4 + MOV r0,#0 + POP {r4-r6,pc} + .endfunc + + .end + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h new file mode 100755 index 0000000000000000000000000000000000000000..74b55054e76a2dec4698bd05498b410fe1a78264 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h @@ -0,0 +1,37 @@ +/** + * + * File Name: armVCM4P2_Huff_Tables_VLC.h + * OpenMAX DL: v1.0.2 + * Revision: 12290 + * Date: Wednesday, April 9, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * + * File: armVCM4P2_Huff_Tables.h + * Description: Declares Tables used for Hufffman coding and decoding + * in MP4P2 codec. + * + */ + +#ifndef _OMXHUFFTAB_H_ +#define _OMXHUFFTAB_H_ + + +extern const OMX_U16 armVCM4P2_IntraVlcL0L1[200]; + + +extern const OMX_U16 armVCM4P2_InterVlcL0L1[200]; + +extern const OMX_U16 armVCM4P2_aIntraDCLumaChromaIndex[64]; +//extern const OMX_U16 armVCM4P2_aIntraDCChromaIndex[32]; +extern const OMX_U16 armVCM4P2_aVlcMVD[124]; + +extern const OMX_U8 armVCM4P2_InterL0L1LMAX[73]; +extern const OMX_U8 armVCM4P2_InterL0L1RMAX[35]; +extern const OMX_U8 armVCM4P2_IntraL0L1LMAX[53]; +extern const OMX_U8 armVCM4P2_IntraL0L1RMAX[40] + +#endif /* _OMXHUFFTAB_H_ */ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/api/armVCM4P2_ZigZag_Tables.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/api/armVCM4P2_ZigZag_Tables.h new file mode 100755 index 0000000000000000000000000000000000000000..e95203af4027e82f3d9dfb931b250ffe001be86d --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/api/armVCM4P2_ZigZag_Tables.h @@ -0,0 +1,25 @@ +/** + * + * File Name: armVCM4P2_ZigZag_Tables.h + * OpenMAX DL: v1.0.2 + * Revision: 12290 + * Date: Wednesday, April 9, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * + * File: armVCM4P2_Zigzag_Tables.h + * Description: Declares Tables used for Zigzag scan in MP4P2 codec. + * + */ + +#ifndef _OMXZIGZAGTAB_H +#define _OMXZIGZAGTAB_H + +extern const OMX_U8 armVCM4P2_aClassicalZigzagScan [192]; +//extern const OMX_U8 armVCM4P2_aHorizontalZigzagScan [64]; +//extern const OMX_U8 armVCM4P2_aVerticalZigzagScan [64]; + +#endif /* _OMXZIGZAGTAB_H_ */ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_Clip8_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_Clip8_s.s new file mode 100755 index 0000000000000000000000000000000000000000..95fe6d237b8b312e0ef5dad6c40e9db7a5620544 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_Clip8_s.s @@ -0,0 +1,82 @@ +; /** +; * +; * File Name: armVCM4P2_Clip8_s.s +; * OpenMAX DL: v1.0.2 +; * Revision: 12290 +; * Date: Wednesday, April 9, 2008 +; * +; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +; * +; * +; * +; * Description: +; * Contains module for Clipping 16 bit value to [0,255] Range +; */ + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + + M_VARIANTS CortexA8 + + IF CortexA8 +;//Input Arguments + +pSrc RN 0 +pDst RN 1 +step RN 2 + +;// Neon Registers + +qx0 QN Q0.S16 +dx00 DN D0.S16 +dx01 DN D1.S16 +qx1 QN Q1.S16 +dx10 DN D2.S16 +dx11 DN D3.S16 + +qx2 QN Q2.S16 +dx20 DN D4.S16 +dx21 DN D5.S16 +qx3 QN Q3.S16 +dx30 DN D6.S16 +dx31 DN D7.S16 + + +dclip0 DN D0.U8 +dclip1 DN D2.U8 +dclip2 DN D4.U8 +dclip3 DN D6.U8 + + M_START armVCM4P2_Clip8 + + VLD1 {dx00,dx01,dx10,dx11},[pSrc]! ;// Load 16 entries from pSrc + VLD1 {dx20,dx21,dx30,dx31},[pSrc]! ;// Load next 16 entries from pSrc + VQSHRUN dclip0,qx0,#0 ;// dclip0[i]=clip qx0[i] to [0,255] + VQSHRUN dclip1,qx1,#0 ;// dclip1[i]=clip qx1[i] to [0,255] + VST1 {dclip0},[pDst],step ;// store 8 bytes and pDst=pDst+step + VST1 {dclip1},[pDst],step ;// store 8 bytes and pDst=pDst+step + VQSHRUN dclip2,qx2,#0 + VQSHRUN dclip3,qx3,#0 + VST1 {dclip2},[pDst],step + VST1 {dclip3},[pDst],step + + VLD1 {dx00,dx01,dx10,dx11},[pSrc]! ;// Load 16 entries from pSrc + VLD1 {dx20,dx21,dx30,dx31},[pSrc]! ;// Load next 16 entries from pSrc + VQSHRUN dclip0,qx0,#0 ;// dclip0[i]=clip qx0[i] to [0,255] + VQSHRUN dclip1,qx1,#0 ;// dclip1[i]=clip qx1[i] to [0,255] + VST1 {dclip0},[pDst],step ;// store 8 bytes and pDst=pDst+step + VST1 {dclip1},[pDst],step ;// store 8 bytes and pDst=pDst+step + VQSHRUN dclip2,qx2,#0 + VQSHRUN dclip3,qx3,#0 + VST1 {dclip2},[pDst],step + VST1 {dclip3},[pDst],step + + + + M_END + ENDIF + + + + END diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s new file mode 100755 index 0000000000000000000000000000000000000000..e4a7f331e6cbb1996b0680cf3ff1460693d93ea1 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s @@ -0,0 +1,398 @@ +;/** +; * +; * File Name: armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s +; * OpenMAX DL: v1.0.2 +; * Revision: 12290 +; * Date: Wednesday, April 9, 2008 +; * +; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +; * +; * +; * +; * Description: +; * Contains modules for zigzag scanning and VLC decoding +; * for inter, intra block. +; * +; * +; * +; * Function: omxVCM4P2_DecodeVLCZigzag_AC_unsafe +; * +; * Description: +; * Performs VLC decoding and inverse zigzag scan +; * +; * +; * +; * +; */ + + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + INCLUDE armCOMM_BitDec_s.h + + + M_VARIANTS ARM1136JS + + + + + + IF ARM1136JS + + + + + +;//Input Arguments + +ppBitStream RN 0 +pBitOffset RN 1 +pDst RN 2 +shortVideoHeader RN 3 + + +;//Local Variables + +Return RN 0 + +pVlcTableL0L1 RN 4 +pLMAXTableL0L1 RN 4 +pRMAXTableL0L1 RN 4 +pZigzagTable RN 4 + +ftype RN 0 +temp3 RN 4 +temp RN 5 +Count RN 6 +Escape RN 5 + +;// armVCM4P2_FillVLDBuffer +zigzag RN 0 +storeLevel RN 1 +temp2 RN 4 +temp1 RN 5 +sign RN 5 +Last RN 7 +storeRun RN 14 + + +packRetIndex RN 5 + + +markerbit RN 5 + +;// Scratch Registers + +RBitStream RN 8 +RBitBuffer RN 9 +RBitCount RN 10 + +T1 RN 11 +T2 RN 12 +LR RN 14 + + + + M_ALLOC4 pppBitStream,4 + M_ALLOC4 ppOffset,4 + M_ALLOC4 pLinkRegister,4 + + M_START armVCM4P2_DecodeVLCZigzag_AC_unsafe + + ;// get the table addresses from stack + M_ARG ppVlcTableL0L1,4 + M_ARG ppLMAXTableL0L1,4 + M_ARG ppRMAXTableL0L1,4 + M_ARG ppZigzagTable,4 + + ;// Store ALL zeros at pDst + + MOV temp1,#0 ;// Initialize Count to zero + MOV Last,#0 + M_STR LR,pLinkRegister ;// Store Link Register on Stack + MOV temp2,#0 + MOV LR,#0 + + ;// Initialize the Macro and Store all zeros to pDst + + STM pDst!,{temp2,temp1,Last,LR} + M_BD_INIT0 ppBitStream, pBitOffset, RBitStream, RBitBuffer, RBitCount + STM pDst!,{temp2,temp1,Last,LR} + M_BD_INIT1 T1, T2, T2 + STM pDst!,{temp2,temp1,Last,LR} + M_BD_INIT2 T1, T2, T2 + STM pDst!,{temp2,temp1,Last,LR} + M_STR ppBitStream,pppBitStream ;// Store ppBitstream on stack + STM pDst!,{temp2,temp1,Last,LR} + M_STR pBitOffset,ppOffset ;// Store pBitOffset on stack + STM pDst!,{temp2,temp1,Last,LR} + + STM pDst!,{temp2,temp1,Last,LR} + STM pDst!,{temp2,temp1,Last,LR} + + + SUB pDst,pDst,#128 ;// Restore pDst + + ;// The armVCM4P2_GetVLCBits begins + +getVLCbits + + M_BD_LOOK8 Escape,7 ;// Load Escape Value + LSR Escape,Escape,#25 + CMP Escape,#3 ;// check for escape mode + MOVNE ftype,#0 + BNE notEscapemode ;// Branch if not in Escape mode 3 + + M_BD_VSKIP8 #7,T1 + CMP shortVideoHeader,#0 ;// Check shortVideoHeader flag to know the type of Escape mode + BEQ endFillVLD + + ;// Escape Mode 4 + + M_BD_READ8 Last,1,T1 + M_BD_READ8 storeRun,6,T1 + M_BD_READ8 storeLevel,8,T1 + + + ;// Check whether the Reserved values for Level are used and Exit with an Error Message if it is so + + TEQ storeLevel,#0 + TEQNE storeLevel,#128 + BEQ ExitError + + ADD temp2,storeRun,Count + CMP temp2,#64 + BGE ExitError ;// error if Count+storeRun >= 64 + + + ;// Load address of zigzagTable + + M_LDR pZigzagTable,ppZigzagTable ;// Loading the Address of Zigzag table + + + ;// armVCM4P2_FillVLDBuffer + + SXTB storeLevel,storeLevel ;// Sign Extend storeLevel to 32 bits + + + ;// To Reflect Runlength + + ADD Count,Count,storeRun + LDRB zigzag,[pZigzagTable,Count] + ADD Count,Count,#1 + STRH storeLevel,[pDst,zigzag] ;// store Level + + B ExitOk + + + +endFillVLD + + + ;// Load Ftype( Escape Mode) value based on the two successive bits in the bitstream + + M_BD_READ8 temp1,1,T1 + CMP temp1,#0 + MOVEQ ftype,#1 + BEQ notEscapemode + M_BD_READ8 temp1,1,T1 + CMP temp1,#1 + MOVEQ ftype,#3 + MOVNE ftype,#2 + + +notEscapemode + + ;// Load optimized packed VLC table with last=0 and Last=1 + + M_LDR pVlcTableL0L1,ppVlcTableL0L1 ;// Load Combined VLC Table + + + CMP ftype,#3 ;// If ftype >=3 get perform Fixed Length Decoding (Escape Mode 3) + BGE EscapeMode3 ;// Else continue normal VLC Decoding + + ;// Variable lengh decoding, "armUnPackVLC32" + + + M_BD_VLD packRetIndex,T1,T2,pVlcTableL0L1,4,2 + + + LDR temp3,=0xFFF + + CMP packRetIndex,temp3 ;// Check for invalid symbol + BEQ ExitError ;// if invalid symbol occurs exit with an error message + + AND Last,packRetIndex,#2 ;// Get Last from packed Index + + + + + LSR storeRun,packRetIndex,#7 ;// Get Run Value from Packed index + AND storeLevel,packRetIndex,#0x7c ;// storeLevel=packRetIndex[2-6],storeLevel[0-1]=0 + + + M_LDR pLMAXTableL0L1,ppLMAXTableL0L1 ;// Load LMAX table + + + LSR storeLevel,storeLevel,#2 ;// Level value + + CMP ftype,#1 + BNE ftype2 + + ;// ftype==1; Escape mode =1 + + + ADD temp1, pLMAXTableL0L1, Last, LSL#4 ;// If the Last=1 add 32 to table address + LDRB temp1,[temp1,storeRun] + + + ADD storeLevel,temp1,storeLevel + +ftype2 + + ;// ftype =2; Escape mode =2 + + M_LDR pRMAXTableL0L1,ppRMAXTableL0L1 ;// Load RMAX Table + + CMP ftype,#2 + BNE FillVLDL1 + + ADD temp1, pRMAXTableL0L1, Last, LSL#4 ;// If Last=1 add 32 to table address + SUB temp2,storeLevel,#1 + LDRB temp1,[temp1,temp2] + + + ADD storeRun,storeRun,#1 + ADD storeRun,temp1 + +FillVLDL1 + + + ;// armVCM4P2_FillVLDBuffer + + M_LDR pZigzagTable,ppZigzagTable ;// Load address of zigzagTable + + M_BD_READ8 sign,1,T1 + + CMP sign,#1 + RSBEQ storeLevel,storeLevel,#0 + + ADD temp1,storeRun,Count ;// Exit with an error message if Run + Count exceeds 63 + CMP temp1,#64 + BGE ExitError + + + + + + + ;// To Reflect Runlenght + + ADD Count,Count,storeRun + +storeLevelL1 + + LDRB zigzag,[pZigzagTable,Count] + CMP Last,#2 ;// Check if the Level val is Last non zero val + ADD Count,Count,#1 + LSR Last,Last,#1 + STRH storeLevel,[pDst,zigzag] + + BNE end + + B ExitOk + + + + ;// Fixed Lengh Decoding Escape Mode 3 + +EscapeMode3 + + M_BD_READ8 Last,1,T1 + M_BD_READ8 storeRun,6,T1 + + ADD temp2,storeRun,Count ;// Exit with an error message if Run + Count exceeds 63 + CMP temp2,#64 + BGE ExitError + + M_BD_READ8 markerbit,1,T1 + TEQ markerbit,#0 ;// Exit with an error message if marker bit is zero + BEQ ExitError + + M_BD_READ16 storeLevel,12,T1 + + TST storeLevel,#0x800 ;// test if the level is negative + SUBNE storeLevel,storeLevel,#4096 + CMP storeLevel,#0 + CMPNE storeLevel,#-2048 + BEQ ExitError ;// Exit with an error message if Level==0 or -2048 + + M_LDR pZigzagTable,ppZigzagTable ;// Load address of zigzagTable + + M_BD_READ8 markerbit,1,T1 + + + ;// armVCM4P2_FillVLDBuffer ( Sign not used as storeLevel is preprocessed) + + + + ;// To Reflect Run Length + + ADD Count,Count,storeRun + + + +storeLevelLast + + LDRB zigzag,[pZigzagTable,Count] + CMP Last,#1 + ADD Count,Count,#1 + STRH storeLevel,[pDst,zigzag] + + BNE end + + B ExitOk + +end + + CMP Count,#64 ;//Run the Loop untill Count reaches 64 + + BLT getVLCbits + + +ExitOk + ;// Exit When VLC Decoding is done Successfully + + ;// Loading ppBitStream and pBitOffset from stack + + CMP Last,#1 + M_LDR ppBitStream,pppBitStream + M_LDR pBitOffset,ppOffset + + ;//Ending the macro + + M_BD_FINI ppBitStream,pBitOffset + + MOVEQ Return,#OMX_Sts_NoErr + MOVNE Return,#OMX_Sts_Err + M_LDR LR,pLinkRegister ;// Load the Link Register Back + B exit2 + +ExitError + ;// Exit When an Error occurs + + M_LDR ppBitStream,pppBitStream + M_LDR pBitOffset,ppOffset + ;//Ending the macro + + M_BD_FINI ppBitStream,pBitOffset + M_LDR LR,pLinkRegister + MOV Return,#OMX_Sts_Err + +exit2 + + + M_END + ENDIF + + END diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c new file mode 100755 index 0000000000000000000000000000000000000000..38af9758691e1a4e1b23ce60722389d655c0eca3 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c @@ -0,0 +1,211 @@ + /** + * + * File Name: armVCM4P2_Huff_Tables_VLC.c + * OpenMAX DL: v1.0.2 + * Revision: 12290 + * Date: Wednesday, April 9, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * File: armVCM4P2_Huff_Tables_VLC.c + * Description: Contains all the Huffman tables used in MPEG4 codec + * + */ + +#include "omxtypes.h" +#include "armOMX.h" + +#include "armCOMM_Bitstream.h" + + + + +// Contains optimized and Packed VLC tables with Last=0 and Last=1 + +// optimized Packed VLC table Entry Format +// --------------------------------------- +// +// 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 +// +------------------------------------------------+ +// | Len | Run | Level |L | 1 | +// +------------------------------------------------+ +// | Offset | 0 | +// +------------------------------------------------+ +// If the table entry is a leaf entry then bit 0 set: +// Len = Number of bits overread (0 to 7) 3 bits +// Run = RunLength of the Symbol (0 to 63) 6 bits +// Level = Level of the Symbol (0 to 31) 5 bits +// L = Last Value of the Symbol (0 or 1) 1 bit +// +// If the table entry is an internal node then bit 0 is clear: +// Offset = Number of (16-bit) half words from the table +// start to the next table node +// +// The table is accessed by successive lookup up on the +// next Step bits of the input bitstream until a leaf node +// is obtained. The Step sizes are supplied to the VLD macro. + +// The VLC tables used for Intra and non inta coefficients in non Escape mode +// contains symbols with both Last=0 and Last=1. +// If a symbol is not found in the table it will be coded as 0xFFF + + +const OMX_U16 armVCM4P2_InterVlcL0L1[200] = { + 0x0020, 0x0108, 0x0148, 0x0170, 0x0178, 0x0180, 0x0188, 0x1b09, + 0x4009, 0x4009, 0x4009, 0x4009, 0x2109, 0x2109, 0x0209, 0x0011, + 0x0028, 0x0060, 0x00b8, 0x00e0, 0x0030, 0x0048, 0x0050, 0x0058, + 0x3fff, 0x3fff, 0x0038, 0x0040, 0x2115, 0x2115, 0x201d, 0x201d, + 0x2059, 0x2059, 0x2051, 0x2051, 0x1c0d, 0x1b0d, 0x1a0d, 0x190d, + 0x0911, 0x0811, 0x0711, 0x0611, 0x0511, 0x0319, 0x0219, 0x0121, + 0x0068, 0x0090, 0x3fff, 0x3fff, 0x0070, 0x0078, 0x0080, 0x0088, + 0x2061, 0x2061, 0x2129, 0x2129, 0x3709, 0x3709, 0x3809, 0x3809, + 0x3d0d, 0x3d0d, 0x3e0d, 0x3e0d, 0x3f0d, 0x3f0d, 0x200d, 0x200d, + 0x0098, 0x00a0, 0x00a8, 0x00b0, 0x0131, 0x0221, 0x0419, 0x0519, + 0x0619, 0x0a11, 0x1909, 0x1a09, 0x210d, 0x220d, 0x230d, 0x240d, + 0x250d, 0x260d, 0x270d, 0x280d, 0x00c0, 0x00c8, 0x00d0, 0x00d8, + 0x0049, 0x0041, 0x380d, 0x380d, 0x370d, 0x370d, 0x360d, 0x360d, + 0x350d, 0x350d, 0x340d, 0x340d, 0x330d, 0x330d, 0x320d, 0x320d, + 0x00e8, 0x00f0, 0x00f8, 0x0100, 0x310d, 0x310d, 0x2015, 0x2015, + 0x3609, 0x3609, 0x3509, 0x3509, 0x3409, 0x3409, 0x3309, 0x3309, + 0x3209, 0x3209, 0x3109, 0x3109, 0x0110, 0x0130, 0x0138, 0x0140, + 0x0118, 0x0120, 0x0128, 0x100d, 0x3009, 0x3009, 0x2f09, 0x2f09, + 0x2411, 0x2411, 0x2311, 0x2311, 0x2039, 0x2039, 0x2031, 0x2031, + 0x0f0d, 0x0e0d, 0x0d0d, 0x0c0d, 0x0b0d, 0x0a0d, 0x090d, 0x0e09, + 0x0d09, 0x0211, 0x0119, 0x0029, 0x0150, 0x0158, 0x0160, 0x0168, + 0x280d, 0x280d, 0x270d, 0x270d, 0x260d, 0x260d, 0x250d, 0x250d, + 0x2c09, 0x2c09, 0xb759, 0xb759, 0x2a09, 0x2a09, 0x2021, 0x2021, + 0x040d, 0x030d, 0x0b35, 0x010d, 0x0909, 0x0809, 0x0709, 0x0609, + 0x0111, 0x0019, 0x2509, 0x2509, 0x2409, 0x2409, 0x2309, 0x2309 +}; + + +const OMX_U16 armVCM4P2_IntraVlcL0L1[200] = { + 0x0020, 0x0108, 0x0148, 0x0170, 0x0178, 0x0180, 0x0188, 0x0f09, + 0x4009, 0x4009, 0x4009, 0x4009, 0x2011, 0x2011, 0x0109, 0x0019, + 0x0028, 0x0060, 0x00b8, 0x00e0, 0x0030, 0x0048, 0x0050, 0x0058, + 0x3fff, 0x3fff, 0x0038, 0x0040, 0x203d, 0x203d, 0x2035, 0x2035, + 0x20b1, 0x20b1, 0x20a9, 0x20a9, 0x0215, 0x011d, 0x002d, 0x0d09, + 0x0519, 0x0811, 0x0419, 0x0321, 0x0221, 0x0139, 0x00a1, 0x0099, + 0x0068, 0x0090, 0x3fff, 0x3fff, 0x0070, 0x0078, 0x0080, 0x0088, + 0x20b9, 0x20b9, 0x20c1, 0x20c1, 0x2141, 0x2141, 0x2911, 0x2911, + 0x2315, 0x2315, 0x2415, 0x2415, 0x2f0d, 0x2f0d, 0x300d, 0x300d, + 0x0098, 0x00a0, 0x00a8, 0x00b0, 0x00c9, 0x00d1, 0x00d9, 0x0149, + 0x0619, 0x0151, 0x0229, 0x0719, 0x0e09, 0x0045, 0x0515, 0x0615, + 0x110d, 0x120d, 0x130d, 0x140d, 0x00c0, 0x00c8, 0x00d0, 0x00d8, + 0x0091, 0x0089, 0x2e0d, 0x2e0d, 0x2d0d, 0x2d0d, 0x2c0d, 0x2c0d, + 0x2b0d, 0x2b0d, 0x2a0d, 0x2a0d, 0x2115, 0x2115, 0x2025, 0x2025, + 0x00e8, 0x00f0, 0x00f8, 0x0100, 0x2c09, 0x2c09, 0x2b09, 0x2b09, + 0x2711, 0x2711, 0x2611, 0x2611, 0x2511, 0x2511, 0x2319, 0x2319, + 0x2219, 0x2219, 0x2131, 0x2131, 0x0110, 0x0130, 0x0138, 0x0140, + 0x0118, 0x0120, 0x0128, 0x080d, 0x2129, 0x2129, 0x2081, 0x2081, + 0x2411, 0x2411, 0x2079, 0x2079, 0x2071, 0x2071, 0x2069, 0x2069, + 0x1bb5, 0x060d, 0x001d, 0xd3f9, 0x0909, 0x0809, 0x090d, 0x0311, + 0x0121, 0x0061, 0x0059, 0x0051, 0x0150, 0x0158, 0x0160, 0x0168, + 0x240d, 0x240d, 0x230d, 0x230d, 0x2609, 0x2609, 0x250d, 0x250d, + 0x2709, 0x2709, 0x2211, 0x2211, 0x2119, 0x2119, 0x2049, 0x2049, + 0x0015, 0x0509, 0x020d, 0x010d, 0x0409, 0x0309, 0x0041, 0x0039, + 0x0111, 0x0031, 0x2209, 0x2209, 0x2029, 0x2029, 0x2021, 0x2021 +}; + +const OMX_U16 armVCM4P2_aIntraDCLumaChromaIndex[64] = { + 0x0020, 0x000b, 0x2009, 0x2009, 0x2007, 0x2007, 0x2001, 0x2001, + 0x4005, 0x4005, 0x4005, 0x4005, 0x4003, 0x4003, 0x4003, 0x4003, + 0x0028, 0x000f, 0x200d, 0x200d, 0x0030, 0x0013, 0x2011, 0x2011, + 0x0038, 0x0017, 0x2015, 0x2015, 0x3fff, 0x3fff, 0x2019, 0x2019, + + 0x0020, 0x0009, 0x2007, 0x2007, 0x4005, 0x4005, 0x4005, 0x4005, + 0x4003, 0x4003, 0x4003, 0x4003, 0x4001, 0x4001, 0x4001, 0x4001, + 0x0028, 0x000d, 0x200b, 0x200b, 0x0030, 0x0011, 0x200f, 0x200f, + 0x0038, 0x0015, 0x2013, 0x2013, 0x1fff, 0x0019, 0x2017, 0x2017 +}; + + +const OMX_U16 armVCM4P2_aVlcMVD[124] = { + 0x0010, 0x00f0, 0x0043, 0x003f, 0x4041, 0x4041, 0x4041, 0x4041, + 0x0018, 0x00d8, 0x0047, 0x003b, 0x0020, 0x0080, 0x00a8, 0x00d0, + 0x0028, 0x0048, 0x0070, 0x0078, 0x1fff, 0x0030, 0x0038, 0x0040, + 0x0081, 0x0001, 0x007f, 0x0003, 0x207d, 0x207d, 0x2005, 0x2005, + 0x207b, 0x207b, 0x2007, 0x2007, 0x0050, 0x0058, 0x0060, 0x0068, + 0x2079, 0x2079, 0x2009, 0x2009, 0x2077, 0x2077, 0x200b, 0x200b, + 0x2075, 0x2075, 0x200d, 0x200d, 0x2073, 0x2073, 0x200f, 0x200f, + 0x0071, 0x0011, 0x006f, 0x0013, 0x006d, 0x0015, 0x006b, 0x0017, + 0x0088, 0x0090, 0x0098, 0x00a0, 0x0069, 0x0019, 0x0067, 0x001b, + 0x0065, 0x001d, 0x0063, 0x001f, 0x0061, 0x0021, 0x005f, 0x0023, + 0x005d, 0x0025, 0x005b, 0x0027, 0x00b0, 0x00b8, 0x00c0, 0x00c8, + 0x0059, 0x0029, 0x0057, 0x002b, 0x2055, 0x2055, 0x202d, 0x202d, + 0x2053, 0x2053, 0x202f, 0x202f, 0x2051, 0x2051, 0x2031, 0x2031, + 0x204f, 0x204f, 0x2033, 0x2033, 0x00e0, 0x00e8, 0x0049, 0x0039, + 0x204d, 0x204d, 0x2035, 0x2035, 0x204b, 0x204b, 0x2037, 0x2037, + 0x2045, 0x2045, 0x203d, 0x203d +}; + +/* LMAX table for non Inter (Last == 0 and Last=1) + Level - 1 Indexed + padded armVCM4P2_InterL0L1LMAX[27-31] with zeros to acess entries for Last=1 effectively + +*/ +const OMX_U8 armVCM4P2_InterL0L1LMAX[73] = +{ + 12, 6, 4, 3, 3, 3, 3, 2, + 2, 2, 2, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 0, 0, 0, 0, 0, + 3, 2, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1 +}; + +/* RMAX table for non Inter (Last == 0 and Last=1) + Level - 1 Indexed + padded armVCM4P2_InterL0L1RMAX[12-31] with zeros to access entries for Last=1 table effectively */ + + +const OMX_U8 armVCM4P2_InterL0L1RMAX[35] = +{ + 26, 10, 6, 2, 1, 1, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, 40, 1, 0 +}; + +/* LMAX table for non Intra (Last == 0 and Last=1) + Level - 1 Indexed + padded armVCM4P2_IntraL0L1LMAX[15-31] with zeros to acess entries for Last=1 effectively + +*/ +const OMX_U8 armVCM4P2_IntraL0L1LMAX[53] = +{ + 27, 10, 5, 4, 3, 3, 3, + 3, 2, 2, 1, 1, 1, 1, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + + 8, 3, 2, 2, 2, 2, 2, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1 +}; + + +/* RMAX table for non Inter (Last == 0 and Last=1) + Level - 1 Indexed + padded armVCM4P2_IntraL0L1RMAX[27-31] with zeros to access entries for Last=1 table effectively */ + + +const OMX_U8 armVCM4P2_IntraL0L1RMAX[40] = +{ + 14, 9, 7, 3, 2, 1, 1, + 1, 1, 1, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, + + 20, 6, 1, 0, 0, 0, 0, 0 + +}; diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_Lookup_Tables.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_Lookup_Tables.c new file mode 100755 index 0000000000000000000000000000000000000000..6948f8062aae6e0a200e51bdbe54a752f570ee30 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_Lookup_Tables.c @@ -0,0 +1,75 @@ + /** + * + * File Name: armVCM4P2_Lookup_Tables.c + * OpenMAX DL: v1.0.2 + * Revision: 12290 + * Date: Wednesday, April 9, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * File: armVCM4P2_Lookup_Tables.c + * Description: Contains all the Lookup tables used in MPEG4 codec + * + */ + +#include "omxtypes.h" +#include "armOMX.h" + + /* * Table Entries contain Dc Scaler values + * armVCM4P2_DCScaler[i]= 8 for i=1 to 4 and i=33 to 36 + * = 2*i for i=5 to 8 + * = i+8 for i=9 to 25 + * = 2*i-16 for i=26 to 31 + * = (i-32+13)/2 for i=37 to 59 + * = i-6-32 for i=60 to 63 + * = 255 for i=0 and i=32 + */ + +const OMX_U8 armVCM4P2_DCScaler[64]={ + 0xff, 0x8, 0x8, 0x8, 0x8, 0xa, 0xc, 0xe, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + 0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e, + 0xff, 0x8, 0x8, 0x8, 0x8, 0x9, 0x9, 0xa, + 0xa, 0xb, 0xb, 0xc, 0xc, 0xd, 0xd, 0xe, + 0xe, 0xf, 0xf, 0x10, 0x10, 0x11, 0x11, 0x12, + 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, + +}; + + + /* Table Entries Contain reciprocal of 1 to 63 + * armVCM4P2_Reciprocal_QP_S16[i]=round(32767/i) + * armVCM4P2_Reciprocal_QP_S16[0]= 0 + */ + +const OMX_S16 armVCM4P2_Reciprocal_QP_S16[64]={ + 0x0000,0x7fff,0x4000,0x2aaa,0x2000,0x1999,0x1555,0x1249, + 0x1000,0x0e39,0x0ccd,0x0ba3,0x0aab,0x09d9,0x0925,0x0888, + 0x0800,0x0787,0x071c,0x06bd,0x0666,0x0618,0x05d1,0x0591, + 0x0555,0x051f,0x04ec,0x04be,0x0492,0x046a,0x0444,0x0421, + 0x0400,0x03e1,0x03c4,0x03a8,0x038e,0x0376,0x035e,0x0348, + 0x0333,0x031f,0x030c,0x02fa,0x02e9,0x02d8,0x02c8,0x02b9, + 0x02ab,0x029d,0x028f,0x0282,0x0276,0x026a,0x025f,0x0254, + 0x0249,0x023f,0x0235,0x022b,0x0222,0x0219,0x0211,0x0208 + +}; + + /* Table Entries Contain reciprocal of 1 to 63 + * armVCM4P2_Reciprocal_QP_S32[i]=round(131071/i) + * armVCM4P2_Reciprocal_QP_S32[0]= 0 + */ + +const OMX_S32 armVCM4P2_Reciprocal_QP_S32[64]={ + 0x00000000,0x0001ffff,0x00010000,0x0000aaaa, 0x00008000, 0x00006666, 0x00005555, 0x00004924, + 0x00004000,0x000038e3,0x00003333,0x00002e8c, 0x00002aab, 0x00002762, 0x00002492, 0x00002222, + 0x00002000,0x00001e1e,0x00001c72,0x00001af2, 0x0000199a, 0x00001861, 0x00001746, 0x00001643, + 0x00001555,0x0000147b,0x000013b1,0x000012f6, 0x00001249, 0x000011a8, 0x00001111, 0x00001084, + 0x00001000,0x00000f84,0x00000f0f,0x00000ea1, 0x00000e39, 0x00000dd6, 0x00000d79, 0x00000d21, + 0x00000ccd,0x00000c7d,0x00000c31,0x00000be8, 0x00000ba3, 0x00000b61, 0x00000b21, 0x00000ae5, + 0x00000aab,0x00000a73,0x00000a3d,0x00000a0a, 0x000009d9, 0x000009a9, 0x0000097b, 0x0000094f, + 0x00000925,0x000008fb,0x000008d4,0x000008ae, 0x00000889, 0x00000865, 0x00000842, 0x00000820 + +}; diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_SetPredDir_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_SetPredDir_s.s new file mode 100755 index 0000000000000000000000000000000000000000..44f246052ce80c0b2f2ce1082b8e6d272520f8bf --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_SetPredDir_s.s @@ -0,0 +1,104 @@ +;// +;// +;// File Name: armVCM4P2_SetPredDir_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 12290 +;// Date: Wednesday, April 9, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + +; ** +; * Function: armVCM4P2_SetPredDir +; * +; * Description: +; * Performs detecting the prediction direction +; * +; * Remarks: +; * +; * Parameters: +; * [in] blockIndex block index indicating the component type and +; * position as defined in subclause 6.1.3.8, of ISO/IEC +; * 14496-2. Furthermore, indexes 6 to 9 indicate the +; * alpha blocks spatially corresponding to luminance +; * blocks 0 to 3 in the same macroblock. +; * [in] pCoefBufRow pointer to the coefficient row buffer +; * [in] pQpBuf pointer to the quantization parameter buffer +; * [out]predQP quantization parameter of the predictor block +; * [out]predDir indicates the prediction direction which takes one +; * of the following values: +; * OMX_VC_HORIZONTAL predict horizontally +; * OMX_VC_VERTICAL predict vertically +; * +; * Return Value: +; * Standard OMXResult result. See enumeration for possible result codes. +; * +; */ + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + INCLUDE omxVC_s.h + + + M_VARIANTS ARM1136JS + + + IF ARM1136JS + +;// Input Arguments +BlockIndex RN 0 +pCoefBufRow RN 1 +pCoefBufCol RN 2 +predDir RN 3 +predQP RN 4 +pQpBuf RN 5 + +;// Local Variables + +Return RN 0 +blockDCLeft RN 6 +blockDCTop RN 7 +blockDCTopLeft RN 8 +temp1 RN 9 +temp2 RN 14 + + M_START armVCM4P2_SetPredDir,r9 + + M_ARG ppredQP,4 + M_ARG ppQpBuf,4 + + LDRH blockDCTopLeft,[pCoefBufRow,#-16] + LDRH blockDCLeft,[pCoefBufCol] + + TEQ BlockIndex,#3 + LDREQH blockDCTop,[pCoefBufCol,#-16] + LDRNEH blockDCTop,[pCoefBufRow] + + SUBS temp1,blockDCLeft,blockDCTopLeft + RSBLT temp1,temp1,#0 + SUBS temp2,blockDCTopLeft,blockDCTop + RSBLT temp2,temp2,#0 + + M_LDR pQpBuf,ppQpBuf + M_LDR predQP,ppredQP + CMP temp1,temp2 + MOV temp2,#OMX_VC_VERTICAL + LDRLTB temp1,[pQpBuf,#1] + STRLT temp2,[predDir] + STRLT temp1,[predQP] + MOV temp2,#OMX_VC_HORIZONTAL + LDRGEB temp1,[pQpBuf] + STRGE temp2,[predDir] + MOV Return,#OMX_Sts_NoErr + STRGE temp1,[predQP] + + + + M_END + + ENDIF + + END + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c new file mode 100755 index 0000000000000000000000000000000000000000..21fa7153f65c26e7ef82e9dd8d19d4446e1efeae --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c @@ -0,0 +1,61 @@ +/** + * + * File Name: armVCM4P2_Zigzag_Tables.c + * OpenMAX DL: v1.0.2 + * Revision: 12290 + * Date: Wednesday, April 9, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * File: armVCM4P2_ZigZag_Tables.c + * Description: Contains the zigzag tables + * + */ + +#include "omxtypes.h" + +/* Contains Double the values in the reference Zigzag Table + * Contains Classical,Vetical and Horizontal Zigzagscan tables in one array + */ + +const OMX_U8 armVCM4P2_aClassicalZigzagScan [192] = +{ + 0, 2, 16, 32, 18, 4, 6, 20, + 34, 48, 64, 50, 36, 22, 8, 10, + 24, 38, 52, 66, 80, 96, 82, 68, + 54, 40, 26, 12, 14, 28, 42, 56, + 70, 84, 98, 112, 114, 100, 86, 72, + 58, 44, 30, 46, 60, 74, 88, 102, + 116, 118, 104, 90, 76, 62, 78, 92, + 106, 120, 122, 104, 94, 110, 124, 126, + + 0, 16, 32, 48, 2, 18, 4, 20, + 34, 50, 64, 80, 96, 112, 114, 98, + 82, 66, 52, 36, 6, 22, 8, 24, + 38, 54, 68, 84, 100, 116, 70, 86, + 102, 118, 40, 56, 10, 26, 12, 28, + 42, 58, 72, 88, 104, 120, 74, 90, + 106, 122, 44, 60, 14, 30, 46, 62, + 76, 92, 108, 124, 78, 94, 110, 126, + + 0, 2, 4, 6, 16, 18, 32, 34, + 20, 22, 8, 10, 12, 14, 30, 28, + 26, 24, 38, 36, 48, 50, 64, 66, + 52, 54, 40, 42, 44, 46, 56, 58, + 60, 62, 68, 70, 80, 82, 96, 98, + 84, 86, 72, 74, 76, 78, 88, 90, + 92, 94, 100, 102, 112, 114, 116, 118, + 104, 106, 108, 110, 120, 122, 124, 126 + + +}; + + + + + +/* End of file */ + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c new file mode 100755 index 0000000000000000000000000000000000000000..796ad6eab2b7ba691e302db71f1a2678f76b7f42 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c @@ -0,0 +1,102 @@ +/** + * + * File Name: omxVCM4P2_DecodeBlockCoef_Inter.c + * OpenMAX DL: v1.0.2 + * Revision: 12290 + * Date: Wednesday, April 9, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Contains modules for inter reconstruction + * + */ + + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" + + +/** + * Function: omxVCM4P2_DecodeBlockCoef_Inter + * + * Description: + * Decodes the INTER block coefficients. Inverse quantization, inversely zigzag + * positioning and IDCT, with appropriate clipping on each step, are performed + * on the coefficients. The results (residuals) are placed in a contiguous array + * of 64 elements. For INTER block, the output buffer holds the residuals for + * further reconstruction. + * + * Remarks: + * + * Parameters: + * [in] ppBitStream pointer to the pointer to the current byte in + * the bit stream buffer. There is no boundary + * check for the bit stream buffer. + * [in] pBitOffset pointer to the bit position in the byte pointed + * to by *ppBitStream. *pBitOffset is valid within + * [0-7] + * [in] QP quantization parameter + * [in] shortVideoHeader a flag indicating presence of short_video_header; + * shortVideoHeader==1 indicates using quantization method defined in short + * video header mode, and shortVideoHeader==0 indicates normail quantization method. + * [out] ppBitStream *ppBitStream is updated after the block is decoded, so that it points to the + * current byte in the bit stream buffer. + * [out] pBitOffset *pBitOffset is updated so that it points to the current bit position in the + * byte pointed by *ppBitStream + * [out] pDst pointer to the decoded residual buffer (a contiguous array of 64 elements of + * OMX_S16 data type). Must be 16-byte aligned. + * + * Return Value: + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments + * - At least one of the following pointers is Null: ppBitStream, *ppBitStream, pBitOffset , pDst + * - At least one of the below case: + * - *pBitOffset exceeds [0,7], QP <= 0; + * - pDst not 16-byte aligned + * OMX_Sts_Err - status error + * + */ +OMXResult omxVCM4P2_DecodeBlockCoef_Inter( + const OMX_U8 ** ppBitStream, + OMX_INT * pBitOffset, + OMX_S16 * pDst, + OMX_INT QP, + OMX_INT shortVideoHeader +) +{ + /* 64 elements are needed but to align it to 16 bytes need + 15 more elements of padding */ + OMX_S16 tempBuf[79]; + OMX_S16 *pTempBuf1; + OMXResult errorCode; + /* Aligning the local buffers */ + pTempBuf1 = armAlignTo16Bytes(tempBuf); + + + /* VLD and zigzag */ + errorCode = omxVCM4P2_DecodeVLCZigzag_Inter(ppBitStream, pBitOffset, + pTempBuf1,shortVideoHeader); + armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode); + + /* Dequantization */ + errorCode = omxVCM4P2_QuantInvInter_I( + pTempBuf1, + QP); + armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode); + + /* Inverse transform */ + errorCode = omxVCM4P2_IDCT8x8blk(pTempBuf1, pDst); + armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode); + + return OMX_Sts_NoErr; +} + +/* End of file */ + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c new file mode 100755 index 0000000000000000000000000000000000000000..b28657c308a15f64ac399ef50f436f85eff09d08 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c @@ -0,0 +1,214 @@ +/** + * + * File Name: omxVCM4P2_DecodeBlockCoef_Intra.c + * OpenMAX DL: v1.0.2 + * Revision: 12290 + * Date: Wednesday, April 9, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Contains modules for intra reconstruction + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armVC.h" + +/* Function for saturating 16 bit values to the [0,255] range and */ +/* writing out as 8 bit values. Does 64 entries */ +void armVCM4P2_Clip8(OMX_S16 *pSrc, OMX_U8 *pDst, OMX_INT dstStep ); + + + +/** + * Function: omxVCM4P2_DecodeBlockCoef_Intra + * + * Description: + * Decodes the INTRA block coefficients. Inverse quantization, inversely zigzag + * positioning, and IDCT, with appropriate clipping on each step, are performed + * on the coefficients. The results are then placed in the output frame/plane on + * a pixel basis. For INTRA block, the output values are clipped to [0, 255] and + * written to corresponding block buffer within the destination plane. + * + * Remarks: + * + * Parameters: + * [in] ppBitStream pointer to the pointer to the current byte in + * the bit stream buffer. There is no boundary + * check for the bit stream buffer. + * [in] pBitOffset pointer to the bit position in the byte pointed + * to by *ppBitStream. *pBitOffset is valid within + * [0-7]. + * [in] step width of the destination plane + * [in/out] pCoefBufRow [in] pointer to the coefficient row buffer + * [out] updated coefficient rwo buffer + * [in/out] pCoefBufCol [in] pointer to the coefficient column buffer + * [out] updated coefficient column buffer + * [in] curQP quantization parameter of the macroblock which + * the current block belongs to + * [in] pQpBuf Pointer to a 2-element QP array. pQpBuf[0] holds the QP of the 8x8 block left to + * the current block(QPa). pQpBuf[1] holds the QP of the 8x8 block just above the + * current block(QPc). + * Note, in case the corresponding block is out of VOP bound, the QP value will have + * no effect to the intra-prediction process. Refer to subclause "7.4.3.3 Adaptive + * ac coefficient prediction" of ISO/IEC 14496-2(MPEG4 Part2) for accurate description. + * [in] blockIndex block index indicating the component type and + * position as defined in subclause 6.1.3.8, + * Figure 6-5 of ISO/IEC 14496-2. + * [in] intraDCVLC a code determined by intra_dc_vlc_thr and QP. + * This allows a mechanism to switch between two VLC + * for coding of Intra DC coefficients as per Table + * 6-21 of ISO/IEC 14496-2. + * [in] ACPredFlag a flag equal to ac_pred_flag (of luminance) indicating + * if the ac coefficients of the first row or first + * column are differentially coded for intra coded + * macroblock. + * [in] shortVideoHeader a flag indicating presence of short_video_header; + * shortVideoHeader==1 selects linear intra DC mode, + * and shortVideoHeader==0 selects nonlinear intra DC mode. + * [out] ppBitStream *ppBitStream is updated after the block is + * decoded, so that it points to the current byte + * in the bit stream buffer + * [out] pBitOffset *pBitOffset is updated so that it points to the + * current bit position in the byte pointed by + * *ppBitStream + * [out] pDst pointer to the block in the destination plane. + * pDst should be 16-byte aligned. + * [out] pCoefBufRow pointer to the updated coefficient row buffer. + * + * Return Value: + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments + * - At least one of the following pointers is NULL: ppBitStream, *ppBitStream, pBitOffset, + * pCoefBufRow, pCoefBufCol, pQPBuf, pDst. + * or + * - At least one of the below case: *pBitOffset exceeds [0,7], curQP exceeds (1, 31), + * blockIndex exceeds [0,9], step is not the multiple of 8, intraDCVLC is zero while + * blockIndex greater than 5. + * or + * - pDst is not 16-byte aligned + * OMX_Sts_Err - status error + * + */ + +OMXResult omxVCM4P2_DecodeBlockCoef_Intra( + const OMX_U8 ** ppBitStream, + OMX_INT *pBitOffset, + OMX_U8 *pDst, + OMX_INT step, + OMX_S16 *pCoefBufRow, + OMX_S16 *pCoefBufCol, + OMX_U8 curQP, + const OMX_U8 *pQPBuf, + OMX_INT blockIndex, + OMX_INT intraDCVLC, + OMX_INT ACPredFlag, + OMX_INT shortVideoHeader + ) +{ + OMX_S16 tempBuf1[79], tempBuf2[79]; + OMX_S16 *pTempBuf1, *pTempBuf2; + OMX_INT predDir, predACDir; + OMX_INT predQP; + OMXVCM4P2VideoComponent videoComp; + OMXResult errorCode; + + + /* Aligning the local buffers */ + pTempBuf1 = armAlignTo16Bytes(tempBuf1); + pTempBuf2 = armAlignTo16Bytes(tempBuf2); + + /* Setting the AC prediction direction and prediction direction */ + armVCM4P2_SetPredDir( + blockIndex, + pCoefBufRow, + pCoefBufCol, + &predDir, + &predQP, + pQPBuf); + + predACDir = predDir; + + + if (ACPredFlag == 0) + { + predACDir = OMX_VC_NONE; + } + + /* Setting the videoComp */ + if (blockIndex <= 3) + { + videoComp = OMX_VC_LUMINANCE; + } + else + { + videoComp = OMX_VC_CHROMINANCE; + } + + + /* VLD and zigzag */ + if (intraDCVLC == 1) + { + errorCode = omxVCM4P2_DecodeVLCZigzag_IntraDCVLC( + ppBitStream, + pBitOffset, + pTempBuf1, + predACDir, + shortVideoHeader, + videoComp); + armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode); + } + else + { + errorCode = omxVCM4P2_DecodeVLCZigzag_IntraACVLC( + ppBitStream, + pBitOffset, + pTempBuf1, + predACDir, + shortVideoHeader); + armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode); + } + + /* AC DC prediction */ + errorCode = omxVCM4P2_PredictReconCoefIntra( + pTempBuf1, + pCoefBufRow, + pCoefBufCol, + curQP, + predQP, + predDir, + ACPredFlag, + videoComp); + armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode); + + /* Dequantization */ + errorCode = omxVCM4P2_QuantInvIntra_I( + pTempBuf1, + curQP, + videoComp, + shortVideoHeader); + armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode); + + /* Inverse transform */ + errorCode = omxVCM4P2_IDCT8x8blk (pTempBuf1, pTempBuf2); + armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode); + + /* Placing the linear array into the destination plane and clipping + it to 0 to 255 */ + + armVCM4P2_Clip8(pTempBuf2,pDst,step); + + + return OMX_Sts_NoErr; +} + +/* End of file */ + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP_s.s new file mode 100755 index 0000000000000000000000000000000000000000..cc16f5a230b6be2fdcbdb39c597a001fac7f148c --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP_s.s @@ -0,0 +1,364 @@ +; ********** +; * +; * File Name: omxVCM4P2_DecodePadMV_PVOP_s.s +; * OpenMAX DL: v1.0.2 +; * Revision: 12290 +; * Date: Wednesday, April 9, 2008 +; * +; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +; * +; * +; * +; ** +; * Function: omxVCM4P2_DecodePadMV_PVOP +; * +; * Description: +; * Decodes and pads four motion vectors of the non-intra macroblock in P-VOP. +; * The motion vector padding process is specified in subclause 7.6.1.6 of +; * ISO/IEC 14496-2. +; * +; * Remarks: +; * +; * +; * Parameters: +; * [in] ppBitStream pointer to the pointer to the current byte in +; * the bit stream buffer +; * [in] pBitOffset pointer to the bit position in the byte pointed +; * to by *ppBitStream. *pBitOffset is valid within +; * [0-7]. +; * [in] pSrcMVLeftMB pointers to the motion vector buffers of the +; * macroblocks specially at the left side of the current macroblock +; * respectively. +; * [in] pSrcMVUpperMB pointers to the motion vector buffers of the +; * macroblocks specially at the upper side of the current macroblock +; * respectively. +; * [in] pSrcMVUpperRightMB pointers to the motion vector buffers of the +; * macroblocks specially at the upper-right side of the current macroblock +; * respectively. +; * [in] fcodeForward a code equal to vop_fcode_forward in MPEG-4 +; * bit stream syntax +; * [in] MBType the type of the current macroblock. If MBType +; * is not equal to OMX_VC_INTER4V, the destination +; * motion vector buffer is still filled with the +; * same decoded vector. +; * [out] ppBitStream *ppBitStream is updated after the block is decoded, +; * so that it points to the current byte in the bit +; * stream buffer +; * [out] pBitOffset *pBitOffset is updated so that it points to the +; * current bit position in the byte pointed by +; * *ppBitStream +; * [out] pDstMVCurMB pointer to the motion vector buffer of the current +; * macroblock which contains four decoded motion vectors +; * +; * Return Value: +; * OMX_Sts_NoErr -no error +; * +; * +; * OMX_Sts_Err - status error +; * +; * + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + INCLUDE armCOMM_BitDec_s.h + INCLUDE omxVC_s.h + + M_VARIANTS ARM1136JS + + + + + IF ARM1136JS + +;//Input Arguments + +ppBitStream RN 0 +pBitOffset RN 1 +pSrcMVLeftMB RN 2 +pSrcMVUpperMB RN 3 +pSrcMVUpperRightMB RN 4 +pDstMVCurMB RN 5 +fcodeForward RN 6 +MBType RN 7 + +;//Local Variables + +zero RN 4 +one RN 4 +scaleFactor RN 1 + + +Return RN 0 + +VlcMVD RN 0 +index RN 4 +Count RN 7 + +mvHorData RN 4 +mvHorResidual RN 0 + +mvVerData RN 4 +mvVerResidual RN 0 + +temp RN 1 + +temp1 RN 3 +High RN 4 +Low RN 2 +Range RN 1 + +BlkCount RN 14 + +diffMVdx RN 0 +diffMVdy RN 1 + +;// Scratch Registers + +RBitStream RN 8 +RBitCount RN 9 +RBitBuffer RN 10 + +T1 RN 11 +T2 RN 12 +LR RN 14 + + IMPORT armVCM4P2_aVlcMVD + IMPORT omxVCM4P2_FindMVpred + + ;// Allocate stack memory + + M_ALLOC4 ppDstMVCurMB,4 + M_ALLOC4 pDstMVPredME,4 + M_ALLOC4 pBlkCount,4 + + M_ALLOC4 pppBitStream,4 + M_ALLOC4 ppBitOffset,4 + M_ALLOC4 ppSrcMVLeftMB,4 + M_ALLOC4 ppSrcMVUpperMB,4 + + M_ALLOC4 pdiffMVdx,4 + M_ALLOC4 pdiffMVdy,4 + M_ALLOC4 pHigh,4 + + + + + M_START omxVCM4P2_DecodePadMV_PVOP,r11 + + M_ARG pSrcMVUpperRightMBonStack,4 ;// pointer to pSrcMVUpperRightMB on stack + M_ARG pDstMVCurMBonStack,4 ;// pointer to pDstMVCurMB on stack + M_ARG fcodeForwardonStack,4 ;// pointer to fcodeForward on stack + M_ARG MBTypeonStack,4 ;// pointer to MBType on stack + + + + + + ;// Initializing the BitStream Macro + + M_BD_INIT0 ppBitStream, pBitOffset, RBitStream, RBitBuffer, RBitCount + M_LDR MBType,MBTypeonStack ;// Load MBType from stack + M_LDR pDstMVCurMB,pDstMVCurMBonStack ;// Load pDstMVCurMB from stack + MOV zero,#0 + + TEQ MBType,#OMX_VC_INTRA ;// Check if MBType=OMX_VC_INTRA + TEQNE MBType,#OMX_VC_INTRA_Q ;// check if MBType=OMX_VC_INTRA_Q + STREQ zero,[pDstMVCurMB] + M_BD_INIT1 T1, T2, T2 + STREQ zero,[pDstMVCurMB,#4] + M_BD_INIT2 T1, T2, T2 + STREQ zero,[pDstMVCurMB,#4] + MOVEQ Return,#OMX_Sts_NoErr + MOV BlkCount,#0 + STREQ zero,[pDstMVCurMB,#4] + + BEQ ExitOK + + TEQ MBType,#OMX_VC_INTER4V ;// Check if MBType=OMX_VC_INTER4V + TEQNE MBType,#OMX_VC_INTER4V_Q ;// Check if MBType=OMX_VC_INTER4V_Q + MOVEQ Count,#4 + + TEQ MBType,#OMX_VC_INTER ;// Check if MBType=OMX_VC_INTER + TEQNE MBType,#OMX_VC_INTER_Q ;// Check if MBType=OMX_VC_INTER_Q + MOVEQ Count,#1 + + M_LDR fcodeForward,fcodeForwardonStack ;// Load fcodeForward from stack + + ;// Storing the values temporarily on stack + + M_STR ppBitStream,pppBitStream + M_STR pBitOffset,ppBitOffset + + + SUB temp,fcodeForward,#1 ;// temp=fcodeForward-1 + MOV one,#1 + M_STR pSrcMVLeftMB,ppSrcMVLeftMB + LSL scaleFactor,one,temp ;// scaleFactor=1<<(fcodeForward-1) + M_STR pSrcMVUpperMB,ppSrcMVUpperMB + LSL scaleFactor,scaleFactor,#5 + M_STR scaleFactor,pHigh ;// [pHigh]=32*scaleFactor + + ;// VLD Decoding + + +Loop + + LDR VlcMVD, =armVCM4P2_aVlcMVD ;// Load the optimized MVD VLC table + + ;// Horizontal Data and Residual calculation + + LDR temp,=0xFFF + M_BD_VLD index,T1,T2,VlcMVD,3,2 ;// variable lenght decoding using the macro + + TEQ index,temp + BEQ ExitError ;// Exit with an Error Message if the decoded symbol is an invalied symbol + + SUB mvHorData,index,#32 ;// mvHorData=index-32 + MOV mvHorResidual,#1 ;// mvHorResidual=1 + CMP fcodeForward,#1 + TEQNE mvHorData,#0 + MOVEQ diffMVdx,mvHorData ;// if scaleFactor=1(fcodeForward=1) or mvHorData=0 diffMVdx=mvHorData + BEQ VerticalData + + SUB temp,fcodeForward,#1 + M_BD_VREAD8 mvHorResidual,temp,T1,T2 ;// get mvHorResidual from bitstream if fcodeForward>1 and mvHorData!=0 + + CMP mvHorData,#0 + RSBLT mvHorData,mvHorData,#0 ;// mvHorData=abs(mvHorData) + SUB mvHorResidual,mvHorResidual,fcodeForward + SMLABB diffMVdx,mvHorData,fcodeForward,mvHorResidual ;// diffMVdx=abs(mvHorData)*fcodeForward+mvHorResidual-fcodeForward + ADD diffMVdx,diffMVdx,#1 + RSBLT diffMVdx,diffMVdx,#0 + + ;// Vertical Data and Residual calculation + +VerticalData + + M_STR diffMVdx,pdiffMVdx ;// Store the diffMVdx on stack + LDR VlcMVD, =armVCM4P2_aVlcMVD ;// Loading the address of optimized VLC tables + + LDR temp,=0xFFF + M_BD_VLD index,T1,T2,VlcMVD,3,2 ;// VLC decoding using the macro + + TEQ index,temp + BEQ ExitError ;// Exit with an Error Message if an Invalied Symbol occurs + + SUB mvVerData,index,#32 ;// mvVerData=index-32 + MOV mvVerResidual,#1 + CMP fcodeForward,#1 + TEQNE mvVerData,#0 + MOVEQ diffMVdy,mvVerData ;// diffMVdy = mvVerData if scaleFactor=1(fcodeForward=1) or mvVerData=0 + BEQ FindMVPred + + SUB temp,fcodeForward,#1 + M_BD_VREAD8 mvVerResidual,temp,T1,T2 ;// Get mvVerResidual from bit stream if fcodeForward>1 and mnVerData!=0 + + + CMP mvVerData,#0 + RSBLT mvVerData,mvVerData,#0 + SUB mvVerResidual,mvVerResidual,fcodeForward + SMLABB diffMVdy,mvVerData,fcodeForward,mvVerResidual ;// diffMVdy=abs(mvVerData)*fcodeForward+mvVerResidual-fcodeForward + ADD diffMVdy,diffMVdy,#1 + RSBLT diffMVdy,diffMVdy,#0 + + ;//Calling the Function omxVCM4P2_FindMVpred + +FindMVPred + + M_STR diffMVdy,pdiffMVdy + ADD temp,pDstMVCurMB,BlkCount,LSL #2 ;// temp=pDstMVCurMB[BlkCount] + M_STR temp,ppDstMVCurMB ;// store temp on stack for passing as an argument to FindMVPred + + MOV temp,#0 + M_STR temp,pDstMVPredME ;// Pass pDstMVPredME=NULL as an argument + M_STR BlkCount,pBlkCount ;// Passs BlkCount as Argument through stack + + MOV temp,pSrcMVLeftMB ;// temp (RN 1)=pSrcMVLeftMB + M_LDR pSrcMVUpperRightMB,pSrcMVUpperRightMBonStack + MOV pSrcMVLeftMB,pSrcMVUpperMB ;// pSrcMVLeftMB ( RN 2) = pSrcMVUpperMB + MOV ppBitStream,pDstMVCurMB ;// ppBitStream ( RN 0) = pDstMVCurMB + MOV pSrcMVUpperMB,pSrcMVUpperRightMB ;// pSrcMVUpperMB( RN 3) = pSrcMVUpperRightMB + BL omxVCM4P2_FindMVpred ;// Branch to subroutine omxVCM4P2_FindMVpred + + ;// Store Horizontal Motion Vector + + M_LDR BlkCount,pBlkCount ;// Load BlkCount from stack + M_LDR High,pHigh ;// High=32*scaleFactor + LSL temp1,BlkCount,#2 ;// temp=BlkCount*4 + M_LDR diffMVdx,pdiffMVdx ;// Laad diffMVdx + + LDRSH temp,[pDstMVCurMB,temp1] ;// temp=pDstMVCurMB[BlkCount] + + + RSB Low,High,#0 ;// Low = -32*scaleFactor + ADD diffMVdx,temp,diffMVdx ;// diffMVdx=pDstMVCurMB[BlkCount]+diffMVdx + ADD Range,High,High ;// Range=64*ScaleFactor + SUB High,High,#1 ;// High= 32*scaleFactor-1 + + CMP diffMVdx,Low ;// If diffMVdx High diffMVdx-=Range + STRH diffMVdx,[pDstMVCurMB,temp1] + + ;// Store Vertical + + ADD temp1,temp1,#2 ;// temp1=4*BlkCount+2 + M_LDR diffMVdx,pdiffMVdy ;// Laad diffMVdy + LDRSH temp,[pDstMVCurMB,temp1] ;// temp=pDstMVCurMB[BlkCount].diffMVdy + ADD BlkCount,BlkCount,#1 ;// BlkCount=BlkCount+1 + ADD diffMVdx,temp,diffMVdx + CMP diffMVdx,Low + ADDLT diffMVdx,diffMVdx,Range ;// If diffMVdy High diffMVdy-=Range + STRH diffMVdx,[pDstMVCurMB,temp1] + + CMP BlkCount,Count + M_LDR pSrcMVLeftMB,ppSrcMVLeftMB + M_LDR pSrcMVUpperMB,ppSrcMVUpperMB + + BLT Loop ;// If BlkCount31, +; * predQP > 31, preDir exceeds [1,2]. +; * - At least one of the pointers pSrcDst, pPredBufRow, or pPredBufCol is not +; * 4-byte aligned. +; * +; ********* + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS CortexA8 + + + + IMPORT armVCM4P2_Reciprocal_QP_S32 + IMPORT armVCM4P2_Reciprocal_QP_S16 + IMPORT armVCM4P2_DCScaler + + IF CortexA8 +;// Input Arguments + +pSrcDst RN 0 +pPredBufRow RN 1 +pPredBufCol RN 2 +curQP RN 3 +QP RN 3 +predQP RN 4 +predDir RN 5 +ACPredFlag RN 6 +videoComp RN 7 + +;// Local Variables + +shortVideoHeader RN 4 +dcScaler RN 4 +index RN 6 +predCoeffTable RN 7 +temp1 RN 6 +temp2 RN 9 +temp RN 14 +Const RN 8 +temppPredColBuf RN 8 +tempPred RN 9 + +absCoeffDC RN 8 +negdcScaler RN 10 +Rem RN 11 +temp3 RN 12 + +dcRowbufCoeff RN 10 +dcColBuffCoeff RN 11 +Return RN 0 + +;//NEON Registers + +qPredRowBuf QN Q0.S16 +dPredRowBuf0 DN D0.S16 +dPredRowBuf1 DN D1.S16 + + + + +qCoeffTab QN Q1.S32 + +qPredQP QN Q2.S16 +dPredQP0 DN D4.S16 +dPredQP1 DN D5.S16 + + +qtemp1 QN Q3.S32 +qtemp QN Q3.S16 + +dtemp0 DN D6.S16 +dtemp1 DN D7.S16 + +dtemp2 DN D8.S16 +dtemp3 DN D9.S16 + +dtemp4 DN D2.S16 +dtemp5 DN D3.S16 +dtemp6 DN D4.S16 +dtemp7 DN D5.S16 + +qtempPred1 QN Q5.S32 +qtempPred QN Q5.S16 + +dtempPred0 DN D10.S16 +dtempPred1 DN D11.S16 + + + + M_START omxVCM4P2_PredictReconCoefIntra,r11,d11 + + ;// Assigning pointers to Input arguments on Stack + + M_ARG predQPonStack,4 + M_ARG predDironStack,4 + M_ARG ACPredFlagonStack,4 + M_ARG videoComponStack,4 + + ;// DC Prediction + + M_LDR videoComp,videoComponStack ;// Load videoComp From Stack + + M_LDR predDir,predDironStack ;// Load Prediction direction + ;// DC Scaler calculation + LDR index, =armVCM4P2_DCScaler + ADD index,index,videoComp,LSL #5 + LDRB dcScaler,[index,QP] + + + LDR predCoeffTable, =armVCM4P2_Reciprocal_QP_S16 ;// Loading the table with entries 32767/(1 to 63) + CMP predDir,#2 ;// Check if the Prediction direction is vertical + + ;// Caulucate tempPred + + LDREQSH absCoeffDC,[pPredBufRow] ;// If vetical load the coeff from Row Prediction Buffer + LDRNESH absCoeffDC,[pPredBufCol] ;// If horizontal load the coeff from column Prediction Buffer + + RSB negdcScaler,dcScaler,#0 ;// negdcScaler=-dcScaler + MOV temp1,absCoeffDC ;// Load the Prediction coeff to temp for comparision + CMP temp1,#0 + RSBLT absCoeffDC,temp1,#0 ;// calculate absolute val of prediction coeff + + ADD temp,dcScaler,dcScaler + LDRH temp,[predCoeffTable,temp] ;// Load value from coeff table for performing division using multiplication + SMULBB tempPred,temp,absCoeffDC ;// tempped=pPredBufRow(Col)[0]*32767/dcScaler + ADD temp3,dcScaler,#1 + LSR tempPred,tempPred,#15 ;// tempped=pPredBufRow(Col)[0]/dcScaler + LSR temp3,temp3,#1 ;// temp3=round(dcScaler/2) + MLA Rem,negdcScaler,tempPred,absCoeffDC ;// Remainder Rem=abs(pPredBufRow(Col)[0])-tempPred*dcScaler + + LDRH dcRowbufCoeff,[pPredBufCol] + + CMP Rem,temp3 ;// compare Rem with (dcScaler/2) + ADDGE tempPred,#1 ;// tempPred=tempPred+1 if Rem>=(dcScaler/2) + CMP temp1,#0 + RSBLT tempPred,tempPred,#0 ;// tempPred=-tempPred if + + STRH dcRowbufCoeff,[pPredBufRow,#-16] + + + LDRH temp,[pSrcDst] ;// temp=pSrcDst[0] + ADD temp,temp,tempPred ;// temp=pSrcDst[0]+tempPred + SSAT16 temp,#12,temp ;// clip temp to [-2048,2047] + SMULBB dcColBuffCoeff,temp,dcScaler ;// temp1=clipped(pSrcDst[0])*dcScaler + M_LDR ACPredFlag,ACPredFlagonStack + STRH dcColBuffCoeff,[pPredBufCol] + + + ;// AC Prediction + + M_LDR predQP,predQPonStack + + CMP ACPredFlag,#1 ;// Check if the AC prediction flag is set or not + BNE Exit ;// If not set Exit + CMP predDir,#2 ;// Check the Prediction direction + LDR predCoeffTable, =armVCM4P2_Reciprocal_QP_S32 ;// Loading the table with entries 0x1ffff/(1 to 63) + MOV Const,#4 + MUL curQP,curQP,Const ;// curQP=4*curQP + VDUP dPredQP0,predQP + LDR temp2,[predCoeffTable,curQP] ;// temp=0x1ffff/curQP + VDUP qCoeffTab,temp2 + BNE Horizontal ;// If the Prediction direction is horizontal branch to Horizontal + + + + ;// Vertical + ;//Calculating tempPred + + VLD1 {dPredRowBuf0,dPredRowBuf1},[pPredBufRow] ;// Loading pPredBufRow[i]:i=0 t0 7 + + VMULL qtemp1,dPredRowBuf0,dPredQP0 ;//qtemp1[i]=pPredBufRow[i]*dPredQP[i]: i=0 t0 3 + VMUL qtempPred1,qtemp1,qCoeffTab ;//qtempPred1[i]=pPredBufRow[i]*dPredQP[i]*0x1ffff/curQP : i=0 t0 3 + + VMULL qtemp1,dPredRowBuf1,dPredQP0 ;//qtemp1[i]=pPredBufRow[i]*dPredQP[i] : i=4 t0 7 + + VRSHR qtempPred1,qtempPred1,#17 ;//qtempPred1[i]=round(pPredBufRow[i]*dPredQP[i]/curQP) : i=0 t0 3 + VSHRN dPredQP1,qtempPred1,#0 ;// narrow qtempPred1[i] to 16 bits + + + VMUL qtempPred1,qtemp1,qCoeffTab ;//qtempPred1[i]=pPredBufRow[i]*dPredQP[i]*0x1ffff/curQP : i=4 t0 7 + VRSHR qtempPred1,qtempPred1,#17 ;//qtempPred1[i]=round(pPredBufRow[i]*dPredQP[i]/curQP) : i=4 t0 7 + VLD1 {dtemp0,dtemp1},[pSrcDst] ;//Loading pSrcDst[i] : i=0 to 7 + VSHRN dtempPred1,qtempPred1,#0 ;// narrow qtempPred1[i] to 16 bits + VMOV dtempPred0,dPredQP1 + + ;//updating source and row prediction buffer contents + VADD qtemp,qtemp,qtempPred ;//pSrcDst[i]=pSrcDst[i]+qtempPred[i]: i=0 to 7 + VQSHL qtemp,qtemp,#4 ;//Clip to [-2048,2047] + LDRH dcRowbufCoeff,[pPredBufRow] ;//Loading Dc Value of Row Prediction buffer + VSHR qtemp,qtemp,#4 + + VST1 {dtemp0,dtemp1},[pSrcDst] ;//storing back the updated values + VST1 {dtemp0,dtemp1},[pPredBufRow] ;//storing back the updated row prediction values + STRH dcRowbufCoeff,[pPredBufRow] ;// storing the updated DC Row Prediction coeff + + B Exit + +Horizontal + + ;// Calculating Temppred + + + + VLD1 {dPredRowBuf0,dPredRowBuf1},[pPredBufCol] ;// Loading pPredBufCol[i]:i=0 t0 7 + VMULL qtemp1,dPredRowBuf0,dPredQP0 ;//qtemp1[i]=pPredBufCol[i]*dPredQP[i]: i=0 t0 3 + VMUL qtempPred1,qtemp1,qCoeffTab ;//qtempPred1[i]=pPredBufCol[i]*dPredQP[i]*0x1ffff/curQP : i=0 t0 3 + + VMULL qtemp1,dPredRowBuf1,dPredQP0 ;//qtemp1[i]=pPredBufCol[i]*dPredQP[i] : i=4 t0 7 + + VRSHR qtempPred1,qtempPred1,#17 ;//qtempPred1[i]=round(pPredBufCol[i]*dPredQP[i]/curQP) : i=0 t0 3 + VSHRN dPredQP1,qtempPred1,#0 ;// narrow qtempPred1[i] to 16 bits + + + VMUL qtempPred1,qtemp1,qCoeffTab ;//qtempPred1[i]=pPredBufCol[i]*dPredQP[i]*0x1ffff/curQP : i=4 t0 7 + + MOV temppPredColBuf,pPredBufCol + VRSHR qtempPred1,qtempPred1,#17 ;//qtempPred1[i]=round(pPredBufCol[i]*dPredQP[i]/curQP) : i=4 t0 7 + VLD4 {dtemp0,dtemp1,dtemp2,dtemp3},[pSrcDst] ;// Loading coefficients Interleaving by 4 + VSHRN dtempPred1,qtempPred1,#0 ;// narrow qtempPred1[i] to 16 bits + VMOV dtempPred0,dPredQP1 + + ;// Updating source and column prediction buffer contents + ADD temp2,pSrcDst,#32 + VLD4 {dtemp4,dtemp5,dtemp6,dtemp7},[temp2] ;// Loading next 16 coefficients Interleaving by 4 + VUZP dtemp0,dtemp4 ;// Interleaving by 8 + VADD dtemp0,dtemp0,dtempPred0 ;// Adding tempPred to coeffs + VQSHL dtemp0,dtemp0,#4 ;// Clip to [-2048,2047] + VSHR dtemp0,dtemp0,#4 + VST1 {dtemp0},[pPredBufCol]! ;// Updating Pridiction column buffer + VZIP dtemp0,dtemp4 ;// deinterleaving + VST4 {dtemp0,dtemp1,dtemp2,dtemp3},[pSrcDst] ;// Updating source coeffs + VST4 {dtemp4,dtemp5,dtemp6,dtemp7},[temp2]! + + MOV temp1,temp2 + VLD4 {dtemp0,dtemp1,dtemp2,dtemp3},[temp2]! ;// Loading coefficients Interleaving by 4 + + VLD4 {dtemp4,dtemp5,dtemp6,dtemp7},[temp2] + VUZP dtemp0,dtemp4 ;// Interleaving by 8 + VADD dtemp0,dtemp0,dtempPred1 + VQSHL dtemp0,dtemp0,#4 ;// Clip to [-2048,2047] + VSHR dtemp0,dtemp0,#4 + VST1 {dtemp0},[pPredBufCol]! + VZIP dtemp0,dtemp4 + VST4 {dtemp0,dtemp1,dtemp2,dtemp3},[temp1] + STRH dcColBuffCoeff,[temppPredColBuf] + VST4 {dtemp4,dtemp5,dtemp6,dtemp7},[temp2] + +Exit + + STRH temp,[pSrcDst] + + + MOV Return,#OMX_Sts_NoErr + + M_END + ENDIF + + + END + + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_QuantInvInter_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_QuantInvInter_I_s.s new file mode 100755 index 0000000000000000000000000000000000000000..bd0ad1fefd4b958110d9bdb175c7f21492bcb633 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_QuantInvInter_I_s.s @@ -0,0 +1,162 @@ +;/** +; * +; * File Name: omxVCM4P2_QuantInvInter_I_s.s +; * OpenMAX DL: v1.0.2 +; * Revision: 12290 +; * Date: Wednesday, April 9, 2008 +; * +; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +; * +; * +; * +; * Description: +; * Contains modules for inter reconstruction +; * +; * +; * +; * +; * +; * Function: omxVCM4P2_QuantInvInter_I +; * +; * Description: +; * Performs inverse quantization on intra/inter coded block. +; * This function supports bits_per_pixel = 8. Mismatch control +; * is performed for the first MPEG-4 mode inverse quantization method. +; * The output coefficients are clipped to the range: [-2048, 2047]. +; * Mismatch control is performed for the first inverse quantization method. +; * +; * Remarks: +; * +; * Parameters: +; * [in] pSrcDst pointer to the input (quantized) intra/inter block. Must be 16-byte aligned. +; * [in] QP quantization parameter (quantiser_scale) +; * [in] videoComp (Intra version only.) Video component type of the +; * current block. Takes one of the following flags: +; * OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE, +; * OMX_VC_ALPHA. +; * [in] shortVideoHeader a flag indicating presence of short_video_header; +; * shortVideoHeader==1 selects linear intra DC mode, +; * and shortVideoHeader==0 selects nonlinear intra DC mode. +; * [out] pSrcDst pointer to the output (dequantized) intra/inter block. Must be 16-byte aligned. +; * +; * Return Value: +; * OMX_Sts_NoErr - no error +; * OMX_Sts_BadArgErr - bad arguments +; * - If pSrcDst is NULL or is not 16-byte aligned. +; * or +; * - If QP <= 0. +; * or +; * - videoComp is none of OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE and OMX_VC_ALPHA. +; * +; */ + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS CortexA8 + + IF CortexA8 + + +;//Input Arguments +pSrcDst RN 0 +QP RN 1 + + +;//Local Variables +Count RN 3 +doubleQP RN 4 +Return RN 0 +;// Neon registers + + +dQP10 DN D0.S32[0] +qQP1 QN Q0.S32 + +dQP1 DN D0.S16 +dMinusQP1 DN D1.S16 + +dCoeff0 DN D2.S16 +dCoeff1 DN D3.S16 + +qResult0 QN Q3.S32 +dResult0 DN D7.S16 +qSign0 QN Q3.S32 +dSign0 DN D6.S16 + +qResult1 QN Q4.S32 +dResult1 DN D8.S16 +qSign1 QN Q4.S32 +dSign1 DN D8.S16 + +d2QP0 DN D10.S32[0] +q2QP0 QN Q5.S32 +d2QP DN D10.S16 + +dZero0 DN D11.S16 +dZero1 DN D12.S16 +dConst0 DN D13.S16 + + + M_START omxVCM4P2_QuantInvInter_I,r4,d13 + + + + ADD doubleQP,QP,QP ;// doubleQP= 2*QP + VMOV d2QP0,doubleQP + VDUP q2QP0,d2QP0 ;// Move doubleQP in to a scalar + TST QP,#1 + VLD1 {dCoeff0,dCoeff1},[pSrcDst] ;// Load first 8 values to Coeff0,Coeff1 + SUBEQ QP,QP,#1 + VMOV dQP10,QP ;// If QP is even then QP1=QP-1 else QP1=QP + MOV Count,#64 + VDUP qQP1,dQP10 ;// Duplicate tempResult with QP1 + VSHRN d2QP,q2QP0,#0 + VEOR dConst0,dConst0,dConst0 + VSHRN dQP1,qQP1,#0 ;// QP1 truncated to 16 bits + VSUB dMinusQP1,dConst0,dQP1 ;// dMinusQP1=-QP1 + +Loop + + ;//Performing Inverse Quantization + + VCLT dSign0,dCoeff0, #0 ;// Compare Coefficient 0 against 0 + VCLT dSign1,dCoeff1, #0 ;// Compare Coefficient 1 against 0 + VCEQ dZero0,dCoeff0,#0 ;// Compare Coefficient 0 against zero + VBSL dSign0,dMinusQP1,dQP1 ;// dSign0 = -QP1 if Coeff0< 0 else QP1 + VCEQ dZero1,dCoeff1,#0 ;// Compare Coefficient 1 against zero + VBSL dSign1,dMinusQP1,dQP1 ;// dSign1 = -QP1 if Coeff1< 0 else QP1 + VMOVL qSign0,dSign0 ;// Sign extend qSign0 to 32 bits + VMOVL qSign1,dSign1 + VMLAL qResult0,dCoeff0,d2QP ;// qResult0[i]= qCoeff0[i]+qCoeff0[i]*(-2) if Coeff <0 + ;// qResult0[i]= qCoeff0[i] if Coeff >=0 + VMLAL qResult1,dCoeff1,d2QP ;// qResult1[i]= qCoeff1[i]+qCoeff1[i]*(-2) if Coeff <0 + ;// qResult1[i]= qCoeff1[i] if Coeff >=0 + ;// Clip Result to [-2048,2047] + + VQSHL qResult0,qResult0,#20 ;// clip to [-2048,2047] + VQSHL qResult1,qResult1,#20 + + VSHR qResult0,qResult0,#4 + VSHR qResult1,qResult1,#4 + VSHRN dResult0,qResult0,#16 ;// Narrow the clipped Value to Halfword + VSHRN dResult1,qResult1,#16 + VBIT dResult0,dConst0,dZero0 + VBIT dResult1,dConst0,dZero1 + + VST1 {dResult0,dResult1},[pSrcDst]! ;// Store the result + SUBS Count,Count,#8 + VLD1 {dCoeff0,dCoeff1},[pSrcDst] + + + BGT Loop + + MOV Return,#OMX_Sts_NoErr + + + M_END + ENDIF + + + END + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I_s.s new file mode 100755 index 0000000000000000000000000000000000000000..e00591fc18efdb4561f07b879d1de4175bd584c2 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I_s.s @@ -0,0 +1,210 @@ +;/** +; * +; * File Name: omxVCM4P2_QuantInvIntra_I_s.s +; * OpenMAX DL: v1.0.2 +; * Revision: 12290 +; * Date: Wednesday, April 9, 2008 +; * +; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +; * +; * +; * +; * Description: +; * Contains modules for inter reconstruction +; * +; * +; * +; * +; * +; * +; * Function: omxVCM4P2_QuantInvIntra_I +; * +; * Description: +; * Performs inverse quantization on intra/inter coded block. +; * This function supports bits_per_pixel = 8. Mismatch control +; * is performed for the first MPEG-4 mode inverse quantization method. +; * The output coefficients are clipped to the range: [-2048, 2047]. +; * Mismatch control is performed for the first inverse quantization method. +; * +; * Remarks: +; * +; * Parameters: +; * [in] pSrcDst pointer to the input (quantized) intra/inter block. Must be 16-byte aligned. +; * [in] QP quantization parameter (quantiser_scale) +; * [in] videoComp (Intra version only.) Video component type of the +; * current block. Takes one of the following flags: +; * OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE, +; * OMX_VC_ALPHA. +; * [in] shortVideoHeader a flag indicating presence of short_video_header; +; * shortVideoHeader==1 selects linear intra DC mode, +; * and shortVideoHeader==0 selects nonlinear intra DC mode. +; * [out] pSrcDst pointer to the output (dequantized) intra/inter block. Must be 16-byte aligned. +; * +; * Return Value: +; * OMX_Sts_NoErr - no error +; * OMX_Sts_BadArgErr - bad arguments +; * - If pSrcDst is NULL or is not 16-byte aligned. +; * or +; * - If QP <= 0. +; * or +; * - videoComp is none of OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE and OMX_VC_ALPHA. +; * + + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS CortexA8 + + + IMPORT armVCM4P2_DCScaler + + IF CortexA8 + + +;//Input Arguments +pSrcDst RN 0 +QP RN 1 +videoComp RN 2 +shortVideoHeader RN 3 + + +;//Local Variables + +dcScaler RN 4 +temp RN 14 +index RN 5 + + +Count RN 5 +doubleQP RN 4 +Return RN 0 + + +;// Neon registers + + +dQP10 DN D0.S32[0] +qQP1 QN Q0.S32 + +dQP1 DN D0.S16 +dMinusQP1 DN D1.S16 + +dCoeff0 DN D2.S16 +dCoeff1 DN D3.S16 + +qResult0 QN Q3.S32 +dResult0 DN D7.S16 +qSign0 QN Q3.S32 +dSign0 DN D6.S16 + +qResult1 QN Q4.S32 +dResult1 DN D8.S16 +qSign1 QN Q4.S32 +dSign1 DN D8.S16 + +d2QP0 DN D10.S32[0] +q2QP0 QN Q5.S32 +d2QP DN D10.S16 + +dZero0 DN D11.S16 +dZero1 DN D4.S16 +dConst0 DN D5.S16 + + + + + + + M_START omxVCM4P2_QuantInvIntra_I,r5,d11 + + + ;// Perform Inverse Quantization for DC coefficient + + TEQ shortVideoHeader,#0 ;// Test if short Video Header flag =0 + MOVNE dcScaler,#8 ;// if shortVideoHeader is non zero dcScaler=8 + BNE calDCVal + + LDR index, =armVCM4P2_DCScaler + ADD index,index,videoComp,LSL #5 + LDRB dcScaler,[index,QP] + + ;//M_CalDCScalar shortVideoHeader,videoComp, QP + +calDCVal + + LDRH temp,[pSrcDst] + SMULBB temp,temp,dcScaler ;// dcCoeff = dcScaler * Quantized DC coefficient(from memory) + SSAT temp,#12,temp ;// Saturating to 12 bits + + + + ;// Perform Inverse Quantization for Ac Coefficients + + + + ADD doubleQP,QP,QP ;// doubleQP= 2*QP + VMOV d2QP0,doubleQP + VDUP q2QP0,d2QP0 ;// Move doubleQP in to a scalar + TST QP,#1 + VLD1 {dCoeff0,dCoeff1},[pSrcDst] ;// Load first 8 values to Coeff0,Coeff1 + SUBEQ QP,QP,#1 + VMOV dQP10,QP ;// If QP is even then QP1=QP-1 else QP1=QP + MOV Count,#64 + VDUP qQP1,dQP10 ;// Duplicate tempResult with QP1 + VSHRN d2QP,q2QP0,#0 + VEOR dConst0,dConst0,dConst0 + VSHRN dQP1,qQP1,#0 ;// QP1 truncated to 16 bits + VSUB dMinusQP1,dConst0,dQP1 ;// dMinusQP1=-QP1 + +Loop + + ;//Performing Inverse Quantization + + VCLT dSign0,dCoeff0, #0 ;// Compare Coefficient 0 against 0 + VCLT dSign1,dCoeff1, #0 ;// Compare Coefficient 1 against 0 + VCEQ dZero0,dCoeff0,#0 ;// Compare Coefficient 0 against zero + VBSL dSign0,dMinusQP1,dQP1 ;// dSign0 = -QP1 if Coeff0< 0 else QP1 + VCEQ dZero1,dCoeff1,#0 ;// Compare Coefficient 1 against zero + VBSL dSign1,dMinusQP1,dQP1 ;// dSign1 = -QP1 if Coeff1< 0 else QP1 + VMOVL qSign0,dSign0 ;// Sign extend qSign0 to 32 bits + VMOVL qSign1,dSign1 + VMLAL qResult0,dCoeff0,d2QP ;// qResult0[i]= qCoeff0[i]+qCoeff0[i]*(-2) if Coeff <0 + ;// qResult0[i]= qCoeff0[i] if Coeff >=0 + VMLAL qResult1,dCoeff1,d2QP ;// qResult1[i]= qCoeff1[i]+qCoeff1[i]*(-2) if Coeff <0 + ;// qResult1[i]= qCoeff1[i] if Coeff >=0 + ;// Clip Result to [-2048,2047] + + VQSHL qResult0,qResult0,#20 ;// clip to [-2048,2047] + VQSHL qResult1,qResult1,#20 + + VSHR qResult0,qResult0,#4 + VSHR qResult1,qResult1,#4 + VSHRN dResult0,qResult0,#16 ;// Narrow the clipped Value to Halfword + VSHRN dResult1,qResult1,#16 + VBIT dResult0,dConst0,dZero0 + VBIT dResult1,dConst0,dZero1 + + VST1 {dResult0,dResult1},[pSrcDst]! ;// Store the result + SUBS Count,Count,#8 + VLD1 {dCoeff0,dCoeff1},[pSrcDst] + + + BGT Loop + + SUB pSrcDst,pSrcDst,#128 + + ;// Store the Inverse quantized Dc coefficient + + STRH temp,[pSrcDst],#2 + + MOV Return,#OMX_Sts_NoErr + + + + M_END + ENDIF + + + END + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/src/armVC_Version.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/src/armVC_Version.c new file mode 100755 index 0000000000000000000000000000000000000000..5d9368140505b5ca7168766b15bdd1b7f518b722 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/src/armVC_Version.c @@ -0,0 +1,6 @@ +#include "omxtypes.h" +#include "armCOMM_Version.h" + +#ifdef ARM_INCLUDE_VERSION_DESCRIPTIONS +const char * const omxVC_VersionDescription = "ARM OpenMAX DL v" ARM_VERSION_STRING " Rel=" OMX_ARM_RELEASE_TAG " Arch=" OMX_ARM_BUILD_ARCHITECTURE " Tools=" OMX_ARM_BUILD_TOOLCHAIN ; +#endif /* ARM_INCLUDE_VERSION_DESCRIPTIONS */ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/ARM_DELIVERY.TXT b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/ARM_DELIVERY.TXT new file mode 100644 index 0000000000000000000000000000000000000000..7801f3ddb6da365816edb874bccb5d936cc939de --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/ARM_DELIVERY.TXT @@ -0,0 +1,63 @@ +The contents of this transaction was created by Hedley Francis +of ARM on 19-Feb-2008. + +It contains the ARM data versions listed below. + +This data, unless otherwise stated, is ARM Proprietary and access to it +is subject to the agreements indicated below. + +If you experience problems with this data, please contact ARM support +quoting transaction reference <97412>. + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +- OX000-SW-98010-r0p0-00bet1 + Video codecs - sample code + Sample code release for Hantro (Ver 1.0.2) + internal access + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +This transaction contains deliverables which are designated as being of +beta release status (BET). + +Beta release status has a particular meaning to ARM of which the recipient +must be aware. Beta is a pre-release status indicating that the deliverable +so described is believed to robustly demonstrate specified behaviour, to be +consistent across its included aspects and be ready for general deployment. +But Beta also indicates that pre-release reliability trials are ongoing and +that it is possible residual defects or errors in operation, consistency +and documentation may still be encountered. The recipient should consider +this position when using this Beta material supplied. ARM will normally +attempt to provide fixes or a work-around for defects identified by the +recipient, but the provision or timeliness of this support cannot be +guaranteed. ARM shall not be responsible for direct or consequential +damages as a result of encountering one or more of these residual defects. +By accepting a Beta release, the recipient agrees to these constraints and +to providing reasonable information to ARM to enable the replication of the +defects identified by the recipient. The specific Beta version supplied +will not be supported after release of a later or higher status version. +It should be noted that Support for the Beta release of the deliverable +will only be provided by ARM to a recipient who has a current support and +maintenance contract for the deliverable. + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +In addition to the data versions listed above, this transaction contains +two additional files at the top level. + +The first is this file, ARM_DELIVERY_97412.TXT, which is the delivery +note. + +The second is ARM_MANIFEST_97412.TXT which contains a manifest of all the +files included in this transaction, together with their checksums. + +The checksums provided are calculated using the RSA Data Security, Inc. +MD5 Message-Digest Algorithm. + +The checksums can be used to verify the integrity of this data using the +"md5sum" tool (which is part of the GNU "textutils" package) by running: + + % md5sum --check ARM_MANIFEST_97412.TXT + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/ARM_MANIFEST.TXT b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/ARM_MANIFEST.TXT new file mode 100644 index 0000000000000000000000000000000000000000..8e01b1e31dc02d27bf82d459d84e6ef609713035 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/ARM_MANIFEST.TXT @@ -0,0 +1,140 @@ + OX000-SW-98010-r0p0-00bet1/ + OX000-SW-98010-r0p0-00bet1/api/ +8971932d56eed6b1ad1ba507f0bff5f0 OX000-SW-98010-r0p0-00bet1/api/armCOMM_Bitstream.h +e88ec84e122534092b90c67841549d6f OX000-SW-98010-r0p0-00bet1/api/armCOMM_Version.h +43cf46c2cf2fe1f93c615b57bcbe4809 OX000-SW-98010-r0p0-00bet1/api/armCOMM.h +f87fedd9ca432fefa757008176864ef8 OX000-SW-98010-r0p0-00bet1/api/armOMX.h +8e49899a428822c36ef9dd94e0e05f18 OX000-SW-98010-r0p0-00bet1/api/omxtypes.h +a06983abb39c476b081e87ea271361a5 OX000-SW-98010-r0p0-00bet1/build_vc.pl +c01f8b93ab73d8c00ddf2499f01da5ff OX000-SW-98010-r0p0-00bet1/filelist_vc.txt + OX000-SW-98010-r0p0-00bet1/src/ +26e2ff3f633764eb720deb340978dc2d OX000-SW-98010-r0p0-00bet1/src/armCOMM_Bitstream.c +79aa23d9817efd11d0c4c2be36ec1e5c OX000-SW-98010-r0p0-00bet1/src/armCOMM.c + OX000-SW-98010-r0p0-00bet1/vc/ + OX000-SW-98010-r0p0-00bet1/vc/m4p10/ + OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/ +e45297704d72302d4a947d0798c666fb OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_CAVLCTables.c +205dfafe1fe7bb160bf36d2600e1100a OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c +bf92641e8548577b77e04e03ec04c358 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_MEGetBufSize.c +f5ee6f7be822d87471cef3b1801dbfc2 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I.c +28110b3a13cecf4f216d10bcc761c401 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_Interpolate_Chroma.c +9a1a25245c975d641e1c6378834aea4d OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_InterpolateHalfVer_Luma.c +3a643eaaaeb12e8d274dc59a7357a586 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I.c +4c4de5973a6b74250ce91ac0b317a617 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_InvTransformDequant_LumaDC.c +4ecdbe9193aaba1f9bb0e24c938b34f9 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_UnpackBlock2x2.c +66e912f8c88f6019cba3ede27150a407 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_InterpolateLuma.c +266da42f4e3015e67b2cbb58169d437f OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateHalfDiag_Luma.c +d905247eeaa52d4e2cf5f6bc3f61b348 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair.c +5b29448db0495cd1717a4b925f13377c OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_PredictIntra_16x16.c +f6451df27f6dcc99036b4b1253c23bb6 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I.c +892787d850eef09dc2148d45b416b062 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_TransformQuant_ChromaDC.c +33da1c01a31f47c0f3aea9a7a5eaa9be OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_BlockMatch_Quarter.c +e9fb11b066775283dcfeae8d12a6c97a OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateHalfVer_Luma.c +add97bec08e5e1a538aa8607168e61ba OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8.c +b695ecfc917b39470d1f40773b923972 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_PredictIntraDC4x4.c +51bc596fd2ff61ad5450d7138461f4a1 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_MEInit.c +dc6baa0a388dc5ea8ff65c24b179e670 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_GetVLCInfo.c +a5499902996576f6712849db34d5ad65 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I.c +0c3b76745d53e74a8e64e80def31faba OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_PredictIntra_4x4.c +4f2742ba5a3c2208f53bc0f6a443be14 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_SADQuar_4x.c +b4ae2dc948e8ca64831fe3bbfbd89523 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_InterpolateChroma.c +e15118cbe372db7cadba225c9456f189 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_SADQuar.c +623cf336cfce7d0174f4e54072456f33 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_InvTransformResidualAndAdd.c +89e452c80e30357cadfb04c05b6fe00c OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateHalfHor_Luma.c +3a5551cc54e85bbe34fc966c7dc00f1c OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_BlockMatch_Integer.c +114030fa0d8f00af6d3289f47a5e85bf OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c +9e373ab296fb85bb45565a6c384f6ed8 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_DequantTables.c +2d200f7cc230f302da48c589da42c02f OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_UnpackBlock4x4.c +ea3f1d1d1507b55610b1349c7b5946e8 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair.c +bd2bf1743aef2a9396545ed025362be2 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_Average_4x.c +ca68e809567bf89044631b67d228c7ce OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c +77caf2b5cbee96d360a919f27e1f14f4 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_QuantTables.c +26081e384ec627fedad474a0e7dad877 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_SADQuar_8x.c +1c83ae9207a54944936f4a63c665bd99 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_MotionEstimationMB.c +4c36e04db20200f4ec72e5aba57446fd OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_SATD_4x4.c +f75b7c5a80d8bf33e315380e4ef0ab8a OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_InterpolateHalfHor_Luma.c +488925bb7aeeae0ccf93ec44af9fce35 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_SADQuar_16x.c +c91a5345b5f877b3831ed1abcc60d579 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_BlockMatch_Half.c +35515a115a32fcac8479072a9a5b0db9 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_Interpolate_Luma.c +fdcf4622bc5f0ae75bdb0a51dcd03397 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_TransformQuant_LumaDC.c +74c9278177400a1f7cc6d799a8c8ab34 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_SAD_4x.c +56aa2d506d0cfdb4ebd366c07adb2d85 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_DeBlockPixel.c +36b2165fd4d2a7f3f3e1f8daff4f94e5 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_SubAndTransformQDQResidual.c +4b6b1b933fc7bc8f14a184c02c028085 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_TransformResidual4x4.c +cf0ff093a9b372dd3271e3e5c28984d4 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair.c +9ccad9f894fbd32194f5b53da217072a OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_InvTransformDequant_ChromaDC.c +4943a7a2df7e9d700675f8c1debf4d90 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_CompareMotionCostToMV.c +29e4a7f38f8c2e8246ed756db03c012e OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_FwdTransformResidual4x4.c +27bc64e7c18da0aab9c987a388f61608 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c +859185614bb9d0013861e454d7b918f2 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_DequantTransformResidualFromPairAndAdd.c + OX000-SW-98010-r0p0-00bet1/vc/m4p10/api/ +63e3b64b96cc42a235c04f3a0f991316 OX000-SW-98010-r0p0-00bet1/vc/m4p10/api/armVCM4P10_CAVLCTables.h + OX000-SW-98010-r0p0-00bet1/vc/m4p2/ + OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/ +0aae4f683d8903cba9956d3301ed9ffe OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_ACDCPredict.c +8d6c1b44915329165df643081cc11a97 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_MCReconBlock.c +0435eca930eacda0f2a59e843d405eff OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_MotionEstimationMB.c +9a82dd0b1f05f798567436a009d02969 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP.c +e1e24646c4bd03f5df78295452dd4eb2 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_BlockMatch_Integer_8x8.c +746e6b334e4a26d4a9bfae6d735826f6 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DCT8x8blk.c +8b1d87b74d80ff13a16215b61d5e52ba OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_IDCT8x8blk.c +309358d357baafc38d2b37bf1e9768a9 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_QuantInvInter_I.c +cc77c7242b53c153f8d09527583f2771 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_FindMVpred.c +7cd8e7796017e3dd00b494d34f629f3f OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_MEGetBufSize.c +a4905cb5f8d4b244454ee4f60d18358b OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_PutVLCBits.c +5596b31e433222c1e4860deebfa98ef2 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_DCT_Table.c +365d072be6eab201f6e040058a3bacfc OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_BlockMatch_Integer.c +78ed2212585b0cca75913a473b2ec430 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_EncodeVLCZigzag_IntraACVLC.c +50b2d8da8f20f6b1d39b8d3df38af55d OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_EncodeMV.c +4a851a2ad6d357cdc233d9c0bf475e02 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_BlockMatch_Half.c +0d6d63878f2827e00e5f85b1e8e26017 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c +48b865a983fe5bf3075eddf652950722 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra.c +5f48fa7941835c46ac767e63fc29403b OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_EncodeVLCZigzag_IntraDCVLC.c +bbaf454b64b32b2c42a76a7ec393d977 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_CompareMV.c +eebff772f87a414436c5c5286f2cd213 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_MEInit.c +65ae242eb8cb6d1027677c8ef8f77ca0 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_FillVLCBuffer.c +125642b1ea0c1256d79af1e0ddecae93 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_QuantInter_I.c +ce24ba3d83da4cb791485d3128268bf6 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_TransRecBlockCoef_intra.c +09bc09a2e6fd962e719944582e38a8fd OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c +6b0ee7a116471a4dadbe5bc8dbf425b0 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_FillVLDBuffer.c +21322dca027c28353e3e7eb8f3620062 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_CheckVLCEscapeMode.c +ef353d83244288d8c37e0f70249177cc OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_GetVLCBits.c +541de824f8aebe4a5cac6f15da943efa OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_BlockMatch_Half_8x8.c +0b40b154b591c7f8842cffe4042d17c5 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c +2ffcec88d3fcb372543a8f4508ea1ac6 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_QuantIntra_I.c +e06d85ca000afcbb50580f98f0203ac8 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC.c +ae82b6fcfcf731a61d70e1aa42e6277a OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_BlockMatch_Integer_16x16.c +1d04395e231b597562257e98cda6cfb0 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c +72c0a36327b6b9b436d3bce7c896c520 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC.c +1b65aa7f311124ea6fb47e384ec06a50 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I.c +714957104a6ef71341fbe6a9ec65c136 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_SetPredDir.c +86493f0ee853f653354a7389f1727f73 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_TransRecBlockCoef_inter.c +5de8afcfb3052968794782a7c3a0b41a OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_EncodeVLCZigzag_Inter.c +50bcc228cc660dbda037725309de3f8b OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_EncodeVLCZigzag_intra.c +4f5cfa1ecc668913dde94e3caf97a2e1 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_intra.c +c2ec804ddf64ee841146e39c3a783451 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_BlockMatch_Half_16x16.c +4087f6a827912ee5b45ed4217f1a6d77 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter.c + OX000-SW-98010-r0p0-00bet1/vc/m4p2/api/ +5c711702dddcec85298003860d760cec OX000-SW-98010-r0p0-00bet1/vc/m4p2/api/armVCM4P2_DCT_Table.h +1b92c94b785c03ec76d4fae2f2bbdb8a OX000-SW-98010-r0p0-00bet1/vc/m4p2/api/armVCM4P2_ZigZag_Tables.h +ad9c6986d2a3200dd5e1f6103a54a99b OX000-SW-98010-r0p0-00bet1/vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h + OX000-SW-98010-r0p0-00bet1/vc/src/ +e627b3346b0dc9aff14446005ce0fa43 OX000-SW-98010-r0p0-00bet1/vc/src/armVC_Version.c + OX000-SW-98010-r0p0-00bet1/vc/api/ +7ca94b1c33ac0211e17d38baadd7d1dd OX000-SW-98010-r0p0-00bet1/vc/api/armVC.h +12cf7596edbbf6048b626d15e8d0ed48 OX000-SW-98010-r0p0-00bet1/vc/api/omxVC.h + OX000-SW-98010-r0p0-00bet1/vc/comm/ + OX000-SW-98010-r0p0-00bet1/vc/comm/src/ +3a6df0085736cbcbe2e3f45d08af4221 OX000-SW-98010-r0p0-00bet1/vc/comm/src/armVCCOMM_Average.c +0bf3cb52863c829b28c0352835170211 OX000-SW-98010-r0p0-00bet1/vc/comm/src/omxVCCOMM_Copy8x8.c +538b62f510b5a8bdced4a39fa12d9a23 OX000-SW-98010-r0p0-00bet1/vc/comm/src/omxVCCOMM_ComputeTextureErrorBlock_SAD.c +66993edd9d441bf3b5b6c912f6400b6e OX000-SW-98010-r0p0-00bet1/vc/comm/src/omxVCCOMM_ExpandFrame_I.c +8e526a9007eb0d43ebf362c498b37415 OX000-SW-98010-r0p0-00bet1/vc/comm/src/omxVCCOMM_LimitMVToRect.c +87f8f26e6e9178df0ab7419334d5a3db OX000-SW-98010-r0p0-00bet1/vc/comm/src/omxVCCOMM_SAD_16x.c +1a8577646132ad9b63a1477fdaec2464 OX000-SW-98010-r0p0-00bet1/vc/comm/src/omxVCCOMM_Average_16x.c +48529c4f70c7e954e832eece1aee57bd OX000-SW-98010-r0p0-00bet1/vc/comm/src/omxVCCOMM_SAD_8x.c +252977764d4f38282b6a56c59ccf4f09 OX000-SW-98010-r0p0-00bet1/vc/comm/src/armVCCOMM_SAD.c +cc78cfaed9502c2e0282c91fb95eeac4 OX000-SW-98010-r0p0-00bet1/vc/comm/src/omxVCCOMM_Average_8x.c +e468751c15a581ebd22da031e22117d1 OX000-SW-98010-r0p0-00bet1/vc/comm/src/omxVCCOMM_Copy16x16.c +3f448d191eaeb82ecb7e27ef8ba27875 OX000-SW-98010-r0p0-00bet1/vc/comm/src/omxVCCOMM_ComputeTextureErrorBlock.c +b1291c307808631fa833684abb9c34ce ARM_DELIVERY_97412.TXT diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/api/armCOMM.h b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/api/armCOMM.h new file mode 100644 index 0000000000000000000000000000000000000000..2ed86a466390016f8c431bd255030e4fec86aa71 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/api/armCOMM.h @@ -0,0 +1,785 @@ +/** + * + * File Name: armCOMM.h + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * File: armCOMM.h + * Brief: Declares Common APIs/Data Types used across OpenMAX API's + * + */ + + +#ifndef _armCommon_H_ +#define _armCommon_H_ + +#include "omxtypes.h" + +typedef struct +{ + OMX_F32 Re; /** Real part */ + OMX_F32 Im; /** Imaginary part */ + +} OMX_FC32; /** single precision floating point complex number */ + +typedef struct +{ + OMX_F64 Re; /** Real part */ + OMX_F64 Im; /** Imaginary part */ + +} OMX_FC64; /** double precision floating point complex number */ + + +/* Used by both IP and IC domains for 8x8 JPEG blocks. */ +typedef OMX_S16 ARM_BLOCK8x8[64]; + + +#include "armOMX.h" + +#define armPI (OMX_F64)(3.1415926535897932384626433832795) + +/***********************************************************************/ + +/* Compiler extensions */ +#ifdef ARM_DEBUG +/* debug version */ +#include +#include +#include +#define armError(str) {printf((str)); printf("\n"); exit(-1);} +#define armWarn(str) {printf((str)); printf("\n");} +#define armIgnore(a) ((void)a) +#define armAssert(a) assert(a) +#else +/* release version */ +#define armError(str) ((void) (str)) +#define armWarn(str) ((void) (str)) +#define armIgnore(a) ((void) (a)) +#define armAssert(a) ((void) (a)) +#endif /* ARM_DEBUG */ + +/* Arithmetic operations */ + +#define armMin(a,b) ( (a) > (b) ? (b):(a) ) +#define armMax(a,b) ( (a) > (b) ? (a):(b) ) +#define armAbs(a) ( (a) < 0 ? -(a):(a) ) + +/* Alignment operation */ + +#define armAlignToBytes(Ptr,N) (Ptr + ( ((N-(int)Ptr)&(N-1)) / sizeof(*Ptr) )) +#define armAlignTo2Bytes(Ptr) armAlignToBytes(Ptr,2) +#define armAlignTo4Bytes(Ptr) armAlignToBytes(Ptr,4) +#define armAlignTo8Bytes(Ptr) armAlignToBytes(Ptr,8) +#define armAlignTo16Bytes(Ptr) armAlignToBytes(Ptr,16) + +/* Error and Alignment check */ + +#define armRetArgErrIf(condition, code) if(condition) { return (code); } +#define armRetDataErrIf(condition, code) if(condition) { return (code); } + +#ifndef ALIGNMENT_DOESNT_MATTER +#define armIsByteAligned(Ptr,N) ((((int)(Ptr)) % N)==0) +#define armNotByteAligned(Ptr,N) ((((int)(Ptr)) % N)!=0) +#else +#define armIsByteAligned(Ptr,N) (1) +#define armNotByteAligned(Ptr,N) (0) +#endif + +#define armIs2ByteAligned(Ptr) armIsByteAligned(Ptr,2) +#define armIs4ByteAligned(Ptr) armIsByteAligned(Ptr,4) +#define armIs8ByteAligned(Ptr) armIsByteAligned(Ptr,8) +#define armIs16ByteAligned(Ptr) armIsByteAligned(Ptr,16) + +#define armNot2ByteAligned(Ptr) armNotByteAligned(Ptr,2) +#define armNot4ByteAligned(Ptr) armNotByteAligned(Ptr,4) +#define armNot8ByteAligned(Ptr) armNotByteAligned(Ptr,8) +#define armNot16ByteAligned(Ptr) armNotByteAligned(Ptr,16) +#define armNot32ByteAligned(Ptr) armNotByteAligned(Ptr,32) + +/** + * Function: armRoundFloatToS16_ref/armRoundFloatToS32_ref/armRoundFloatToS64 + * + * Description: + * Converts a double precision value into a short int/int after rounding + * + * Parameters: + * [in] Value Float value to be converted + * + * Return Value: + * [out] converted value in OMX_S16/OMX_S32 format + * + */ + +OMX_S16 armRoundFloatToS16 (OMX_F64 Value); +OMX_S32 armRoundFloatToS32 (OMX_F64 Value); +OMX_S64 armRoundFloatToS64 (OMX_F64 Value); + +/** + * Function: armSatRoundFloatToS16_ref/armSatRoundFloatToS32 + * + * Description: + * Converts a double precision value into a short int/int after rounding and saturation + * + * Parameters: + * [in] Value Float value to be converted + * + * Return Value: + * [out] converted value in OMX_S16/OMX_S32 format + * + */ + +OMX_S16 armSatRoundFloatToS16 (OMX_F64 Value); +OMX_S32 armSatRoundFloatToS32 (OMX_F64 Value); + +/** + * Function: armSatRoundFloatToU16_ref/armSatRoundFloatToU32 + * + * Description: + * Converts a double precision value into a unsigned short int/int after rounding and saturation + * + * Parameters: + * [in] Value Float value to be converted + * + * Return Value: + * [out] converted value in OMX_U16/OMX_U32 format + * + */ + +OMX_U16 armSatRoundFloatToU16 (OMX_F64 Value); +OMX_U32 armSatRoundFloatToU32 (OMX_F64 Value); + +/** + * Function: armSignCheck + * + * Description: + * Checks the sign of a variable: + * returns 1 if it is Positive + * returns 0 if it is 0 + * returns -1 if it is Negative + * + * Remarks: + * + * Parameters: + * [in] var Variable to be checked + * + * Return Value: + * OMX_INT -- returns 1 if it is Positive + * returns 0 if it is 0 + * returns -1 if it is Negative + */ + +OMX_INT armSignCheck (OMX_S16 var); + +/** + * Function: armClip + * + * Description: Clips the input between MAX and MIN value + * + * + * Remarks: + * + * Parameters: + * [in] Min lower bound + * [in] Max upper bound + * [in] src variable to the clipped + * + * Return Value: + * OMX_S32 -- returns clipped value + */ + +OMX_S32 armClip ( + OMX_INT min, + OMX_INT max, + OMX_S32 src + ); + +/** + * Function: armClip_F32 + * + * Description: Clips the input between MAX and MIN value + * + * + * Remarks: + * + * Parameters: + * [in] Min lower bound + * [in] Max upper bound + * [in] src variable to the clipped + * + * Return Value: + * OMX_F32 -- returns clipped value + */ + +OMX_F32 armClip_F32 ( + OMX_F32 min, + OMX_F32 max, + OMX_F32 src + ); + +/** + * Function: armShiftSat_F32 + * + * Description: Divides a float value by 2^shift and + * saturates it for unsigned value range for satBits. + * Second parameter is like "shifting" the corresponding + * integer value. Takes care of rounding while clipping the final + * value. + * + * Parameters: + * [in] v Number to be operated upon + * [in] shift Divides the input "v" by "2^shift" + * [in] satBits Final range is [0, 2^satBits) + * + * Return Value: + * OMX_S32 -- returns "shifted" saturated value + */ + +OMX_U32 armShiftSat_F32( + OMX_F32 v, + OMX_INT shift, + OMX_INT satBits + ); + +/** + * Functions: armSwapElem + * + * Description: + * This function swaps two elements at the specified pointer locations. + * The size of each element could be anything as specified by + * + * Return Value: + * OMXResult -- Error status from the function + */ +OMXResult armSwapElem(OMX_U8 *pBuf1, OMX_U8 *pBuf2, OMX_INT elemSize); + + +/** + * Function: armMedianOf3 + * + * Description: Finds the median of three numbers + * + * Remarks: + * + * Parameters: + * [in] fEntry First entry + * [in] sEntry second entry + * [in] tEntry Third entry + * + * Return Value: + * OMX_S32 -- returns the median value + */ + +OMX_S32 armMedianOf3 ( + OMX_S32 fEntry, + OMX_S32 sEntry, + OMX_S32 tEntry + ); + +/** + * Function: armLogSize + * + * Description: Finds the size of a positive value and returns the same + * + * Remarks: + * + * Parameters: + * [in] value Positive value + * + * Return Value: + * OMX_U8 -- returns the size of the positive value + */ + +OMX_U8 armLogSize ( + OMX_U16 value + ); + +/***********************************************************************/ + /* Saturating Arithmetic operations */ + +/** + * Function :armSatAdd_S32() + * + * Description : + * Returns the result of saturated addition of the two inputs Value1, Value2 + * + * Parametrs: + * [in] Value1 First Operand + * [in] Value2 Second Operand + * + * Return: + * [out] Result of operation + * + * + **/ + +OMX_S32 armSatAdd_S32( + OMX_S32 Value1, + OMX_S32 Value2 + ); + +/** + * Function :armSatAdd_S64() + * + * Description : + * Returns the result of saturated addition of the two inputs Value1, Value2 + * + * Parametrs: + * [in] Value1 First Operand + * [in] Value2 Second Operand + * + * Return: + * [out] Result of operation + * + * + **/ + +OMX_S64 armSatAdd_S64( + OMX_S64 Value1, + OMX_S64 Value2 + ); + +/** Function :armSatSub_S32() + * + * Description : + * Returns the result of saturated substraction of the two inputs Value1, Value2 + * + * Parametrs: + * [in] Value1 First Operand + * [in] Value2 Second Operand + * + * Return: + * [out] Result of operation + * + **/ + +OMX_S32 armSatSub_S32( + OMX_S32 Value1, + OMX_S32 Value2 + ); + +/** + * Function :armSatMac_S32() + * + * Description : + * Returns the result of Multiplication of Value1 and Value2 and subesquent saturated + * accumulation with Mac + * + * Parametrs: + * [in] Value1 First Operand + * [in] Value2 Second Operand + * [in] Mac Accumulator + * + * Return: + * [out] Result of operation + **/ + +OMX_S32 armSatMac_S32( + OMX_S32 Mac, + OMX_S16 Value1, + OMX_S16 Value2 + ); + +/** + * Function :armSatMac_S16S32_S32 + * + * Description : + * Returns the result of saturated MAC operation of the three inputs delayElem, filTap , mac + * + * mac = mac + Saturate_in_32Bits(delayElem * filTap) + * + * Parametrs: + * [in] delayElem First 32 bit Operand + * [in] filTap Second 16 bit Operand + * [in] mac Result of MAC operation + * + * Return: + * [out] mac Result of operation + * + **/ + +OMX_S32 armSatMac_S16S32_S32( + OMX_S32 mac, + OMX_S32 delayElem, + OMX_S16 filTap ); + +/** + * Function :armSatRoundRightShift_S32_S16 + * + * Description : + * Returns the result of rounded right shift operation of input by the scalefactor + * + * output = Saturate_in_16Bits( ( RightShift( (Round(input) , scaleFactor ) ) + * + * Parametrs: + * [in] input The input to be operated on + * [in] scaleFactor The shift number + * + * Return: + * [out] Result of operation + * + **/ + + +OMX_S16 armSatRoundRightShift_S32_S16( + OMX_S32 input, + OMX_INT scaleFactor); + +/** + * Function :armSatRoundLeftShift_S32() + * + * Description : + * Returns the result of saturating left-shift operation on input + * Or rounded Right shift if the input Shift is negative. + * + * Parametrs: + * [in] Value Operand + * [in] shift Operand for shift operation + * + * Return: + * [out] Result of operation + * + **/ + +OMX_S32 armSatRoundLeftShift_S32( + OMX_S32 Value, + OMX_INT shift + ); + +/** + * Function :armSatRoundLeftShift_S64() + * + * Description : + * Returns the result of saturating left-shift operation on input + * Or rounded Right shift if the input Shift is negative. + * + * Parametrs: + * [in] Value Operand + * [in] shift Operand for shift operation + * + * Return: + * [out] Result of operation + * + **/ + +OMX_S64 armSatRoundLeftShift_S64( + OMX_S64 Value, + OMX_INT shift + ); + +/** + * Function :armSatMulS16S32_S32() + * + * Description : + * Returns the result of a S16 data type multiplied with an S32 data type + * in a S32 container + * + * Parametrs: + * [in] input1 Operand 1 + * [in] input2 Operand 2 + * + * Return: + * [out] Result of operation + * + **/ + + +OMX_S32 armSatMulS16S32_S32( + OMX_S16 input1, + OMX_S32 input2); + +/** + * Function :armSatMulS32S32_S32() + * + * Description : + * Returns the result of a S32 data type multiplied with an S32 data type + * in a S32 container + * + * Parametrs: + * [in] input1 Operand 1 + * [in] input2 Operand 2 + * + * Return: + * [out] Result of operation + * + **/ + +OMX_S32 armSatMulS32S32_S32( + OMX_S32 input1, + OMX_S32 input2); + + +/** + * Function :armIntDivAwayFromZero() + * + * Description : Integer division with rounding to the nearest integer. + * Half-integer values are rounded away from zero + * unless otherwise specified. For example 3//2 is rounded + * to 2, and -3//2 is rounded to -2. + * + * Parametrs: + * [in] Num Operand 1 + * [in] Deno Operand 2 + * + * Return: + * [out] Result of operation input1//input2 + * + **/ + +OMX_S32 armIntDivAwayFromZero (OMX_S32 Num, OMX_S32 Deno); + + +/***********************************************************************/ +/* + * Debugging macros + * + */ + + +/* + * Definition of output stream - change to stderr if necessary + */ +#define DEBUG_STREAM stdout + +/* + * Debug printf macros, one for each argument count. + * Add more if needed. + */ +#ifdef DEBUG_ON +#include + +#define DEBUG_PRINTF_0(a) fprintf(DEBUG_STREAM, a) +#define DEBUG_PRINTF_1(a, b) fprintf(DEBUG_STREAM, a, b) +#define DEBUG_PRINTF_2(a, b, c) fprintf(DEBUG_STREAM, a, b, c) +#define DEBUG_PRINTF_3(a, b, c, d) fprintf(DEBUG_STREAM, a, b, c, d) +#define DEBUG_PRINTF_4(a, b, c, d, e) fprintf(DEBUG_STREAM, a, b, c, d, e) +#define DEBUG_PRINTF_5(a, b, c, d, e, f) fprintf(DEBUG_STREAM, a, b, c, d, e, f) +#define DEBUG_PRINTF_6(a, b, c, d, e, f, g) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g) +#define DEBUG_PRINTF_7(a, b, c, d, e, f, g, h) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h) +#define DEBUG_PRINTF_8(a, b, c, d, e, f, g, h, i) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i) +#define DEBUG_PRINTF_9(a, b, c, d, e, f, g, h, i, j) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j) +#define DEBUG_PRINTF_10(a, b, c, d, e, f, g, h, i, j, k) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k) +#define DEBUG_PRINTF_11(a, b, c, d, e, f, g, h, i, j, k, l) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k, l) +#define DEBUG_PRINTF_12(a, b, c, d, e, f, g, h, i, j, k, l, m) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k, l, m) +#define DEBUG_PRINTF_13(a, b, c, d, e, f, g, h, i, j, k, l, m, n) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k, l, m, n) +#define DEBUG_PRINTF_14(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o) +#else /* DEBUG_ON */ +#define DEBUG_PRINTF_0(a) +#define DEBUG_PRINTF_1(a, b) +#define DEBUG_PRINTF_2(a, b, c) +#define DEBUG_PRINTF_3(a, b, c, d) +#define DEBUG_PRINTF_4(a, b, c, d, e) +#define DEBUG_PRINTF_5(a, b, c, d, e, f) +#define DEBUG_PRINTF_6(a, b, c, d, e, f, g) +#define DEBUG_PRINTF_7(a, b, c, d, e, f, g, h) +#define DEBUG_PRINTF_8(a, b, c, d, e, f, g, h, i) +#define DEBUG_PRINTF_9(a, b, c, d, e, f, g, h, i, j) +#define DEBUG_PRINTF_10(a, b, c, d, e, f, g, h, i, j, k) +#define DEBUG_PRINTF_11(a, b, c, d, e, f, g, h, i, j, k, l) +#define DEBUG_PRINTF_12(a, b, c, d, e, f, g, h, i, j, k, l, m) +#define DEBUG_PRINTF_13(a, b, c, d, e, f, g, h, i, j, k, l, m, n) +#define DEBUG_PRINTF_14(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o) +#endif /* DEBUG_ON */ + + +/* + * Domain and sub domain definitions + * + * In order to turn on debug for an entire domain or sub-domain + * at compile time, one of the DEBUG_DOMAIN_* below may be defined, + * which will activate debug in all of the defines it contains. + */ + +#ifdef DEBUG_DOMAIN_AC +#define DEBUG_OMXACAAC_DECODECHANPAIRELT_MPEG4 +#define DEBUG_OMXACAAC_DECODECHANPAIRELT +#define DEBUG_OMXACAAC_DECODEDATSTRELT +#define DEBUG_OMXACAAC_DECODEFILLELT +#define DEBUG_OMXACAAC_DECODEISSTEREO_S32 +#define DEBUG_OMXACAAC_DECODEMSPNS_S32 +#define DEBUG_OMXACAAC_DECODEMSSTEREO_S32_I +#define DEBUG_OMXACAAC_DECODEPRGCFGELT +#define DEBUG_OMXACAAC_DECODETNS_S32_I +#define DEBUG_OMXACAAC_DEINTERLEAVESPECTRUM_S32 +#define DEBUG_OMXACAAC_ENCODETNS_S32_I +#define DEBUG_OMXACAAC_LONGTERMPREDICT_S32 +#define DEBUG_OMXACAAC_LONGTERMRECONSTRUCT_S32 +#define DEBUG_OMXACAAC_MDCTFWD_S32 +#define DEBUG_OMXACAAC_MDCTINV_S32_S16 +#define DEBUG_OMXACAAC_NOISELESSDECODE +#define DEBUG_OMXACAAC_QUANTINV_S32_I +#define DEBUG_OMXACAAC_UNPACKADIFHEADER +#define DEBUG_OMXACAAC_UNPACKADTSFRAMEHEADER +#define DEBUG_OMXACMP3_HUFFMANDECODESFBMBP_S32 +#define DEBUG_OMXACMP3_HUFFMANDECODESFB_S32 +#define DEBUG_OMXACMP3_HUFFMANDECODE_S32 +#define DEBUG_OMXACMP3_MDCTINV_S32 +#define DEBUG_OMXACMP3_REQUANTIZESFB_S32_I +#define DEBUG_OMXACMP3_REQUANTIZE_S32_I +#define DEBUG_OMXACMP3_SYNTHPQMF_S32_S16 +#define DEBUG_OMXACMP3_UNPACKFRAMEHEADER +#define DEBUG_OMXACMP3_UNPACKSCALEFACTORS_S8 +#define DEBUG_OMXACMP3_UNPACKSIDEINFO +#endif /* DEBUG_DOMAIN_AC */ + + +#ifdef DEBUG_DOMAIN_VC +#define DEBUG_OMXVCM4P10_AVERAGE_16X +#define DEBUG_OMXVCM4P10_AVERAGE_4X +#define DEBUG_OMXVCM4P10_AVERAGE_8X +#define DEBUG_OMXVCM4P10_DEBLOCKCHROMA_U8_C1IR +#define DEBUG_OMXVCM4P10_DEBLOCKLUMA_U8_C1IR +#define DEBUG_OMXVCM4P10_DECODECHROMADCCOEFFSTOPAIRCAVLC_U8 +#define DEBUG_OMXVCM4P10_DECODECOEFFSTOPAIRCAVLC_U8 +#define DEBUG_OMXVCM4P10_DEQUANTTRANSFORMACFROMPAIR_U8_S16_C1_DLX +#define DEBUG_OMXVCM4P10_EXPANDFRAME +#define DEBUG_OMXVCM4P10_FILTERDEBLOCKINGCHROMA_HOREDGE_U8_C1IR +#define DEBUG_OMXVCM4P10_FILTERDEBLOCKINGCHROMA_VEREDGE_U8_C1IR +#define DEBUG_OMXVCM4P10_FILTERDEBLOCKINGLUMA_HOREDGE_U8_C1IR +#define DEBUG_OMXVCM4P10_FILTERDEBLOCKINGLUMA_VEREDGE_U8_C1IR +#define DEBUG_OMXVCM4P10_PREDICTINTRACHROMA8X8_U8_C1R +#define DEBUG_OMXVCM4P10_PREDICTINTRA_16X16_U8_C1R +#define DEBUG_OMXVCM4P10_PREDICTINTRA_4X4_U8_C1R +#define DEBUG_OMXVCM4P10_SADQUAR_16X +#define DEBUG_OMXVCM4P10_SADQUAR_4X +#define DEBUG_OMXVCM4P10_SADQUAR_8X +#define DEBUG_OMXVCM4P10_SAD_16X +#define DEBUG_OMXVCM4P10_SAD_4X +#define DEBUG_OMXVCM4P10_SAD_8X +#define DEBUG_OMXVCM4P10_SATD_4X4 +#define DEBUG_OMXVCM4P10_TRANSFORMDEQUANTCHROMADCFROMPAIR_U8_S16_C1 +#define DEBUG_OMXVCM4P10_TRANSFORMDEQUANTLUMADCFROMPAIR_U8_S16_C1 +#define DEBUG_OMXVCM4P10_TRANSFORMQUANT_CHROMADC +#define DEBUG_OMXVCM4P10_TRANSFORMQUANT_LUMADC +#define DEBUG_OMXVCM4P2_BLOCKMATCH_HALF_16X16 +#define DEBUG_OMXVCM4P2_BLOCKMATCH_HALF_8X8 +#define DEBUG_OMXVCM4P2_BLOCKMATCH_INTEGER_16X16 +#define DEBUG_OMXVCM4P2_BLOCKMATCH_INTEGER_8X8 +#define DEBUG_OMXVCM4P2_COMPUTETEXTUREERRORBLOCK_SAD_U8_S16 +#define DEBUG_OMXVCM4P2_COMPUTETEXTUREERRORBLOCK_U8_S16 +#define DEBUG_OMXVCM4P2_DCT8X8BLKDLX +#define DEBUG_OMXVCM4P2_DECODEBLOCKCOEF_INTER_S16 +#define DEBUG_OMXVCM4P2_DECODEPADMV_PVOP +#define DEBUG_OMXVCM4P2_DECODEVLCZIGZAG_INTER_S16 +#define DEBUG_OMXVCM4P2_DECODEVLCZIGZAG_INTRAACVLC_S16 +#define DEBUG_OMXVCM4P2_DECODEVLCZIGZAG_INTRADCVLC_S16 +#define DEBUG_OMXVCM4P2_ENCODEMV_U8_S16 +#define DEBUG_OMXVCM4P2_ENCODEVLCZIGZAG_INTER_S16 +#define DEBUG_OMXVCM4P2_ENCODEVLCZIGZAG_INTRAACVLC_S16 +#define DEBUG_OMXVCM4P2_ENCODEVLCZIGZAG_INTRADCVLC_S16 +#define DEBUG_OMXVCM4P2_FINDMVPRED +#define DEBUG_OMXVCM4P2_IDCT8X8BLKDLX +#define DEBUG_OMXVCM4P2_LIMITMVTORECT +#define DEBUG_OMXVCM4P2_MOTIONESTIMATIONMB +#define DEBUG_OMXVCM4P2_PADMBGRAY_U8 +#define DEBUG_OMXVCM4P2_PADMBHORIZONTAL_U8 +#define DEBUG_OMXVCM4P2_PADMBVERTICAL_U8 +#define DEBUG_OMXVCM4P2_PADMV +#define DEBUG_OMXVCM4P2_QUANTINTER_S16_I +#define DEBUG_OMXVCM4P2_QUANTINTRA_S16_I +#define DEBUG_OMXVCM4P2_QUANTINVINTER_S16_I +#define DEBUG_OMXVCM4P2_QUANTINVINTRA_S16_I +#define DEBUG_OMXVCM4P2_TRANSRECBLOCKCEOF_INTER +#define DEBUG_OMXVCM4P2_TRANSRECBLOCKCEOF_INTRA +#endif /* DEBUG_DOMAIN_VC */ + + +#ifdef DEBUG_DOMAIN_IC +/* To be filled in */ +#endif /* DEBUG_DOMAIN_IC */ + + +#ifdef DEBUG_DOMAIN_SP +#define DEBUG_OMXACSP_DOTPROD_S16 +#define DEBUG_OMXACSP_BLOCKEXP_S16 +#define DEBUG_OMXACSP_BLOCKEXP_S32 +#define DEBUG_OMXACSP_COPY_S16 +#define DEBUG_OMXACSP_DOTPROD_S16 +#define DEBUG_OMXACSP_DOTPROD_S16_SFS +#define DEBUG_OMXACSP_FFTFWD_CTOC_SC16_SFS +#define DEBUG_OMXACSP_FFTFWD_CTOC_SC32_SFS +#define DEBUG_OMXACSP_FFTFWD_RTOCCS_S16S32_SFS +#define DEBUG_OMXACSP_FFTFWD_RTOCCS_S32_SFS +#define DEBUG_OMXACSP_FFTGETBUFSIZE_C_SC16 +#define DEBUG_OMXACSP_FFTGETBUFSIZE_C_SC32 +#define DEBUG_OMXACSP_FFTGETBUFSIZE_R_S16_S32 +#define DEBUG_OMXACSP_FFTGETBUFSIZE_R_S32 +#define DEBUG_OMXACSP_FFTINIT_C_SC16 +#define DEBUG_OMXACSP_FFTINIT_C_SC32 +#define DEBUG_OMXACSP_FFTINIT_R_S16_S32 +#define DEBUG_OMXACSP_FFTINIT_R_S32 +#define DEBUG_OMXACSP_FFTINV_CCSTOR_S32S16_SFS +#define DEBUG_OMXACSP_FFTINV_CCSTOR_S32_SFS +#define DEBUG_OMXACSP_FFTINV_CTOC_SC16_SFS +#define DEBUG_OMXACSP_FFTINV_CTOC_SC32_SFS +#define DEBUG_OMXACSP_FILTERMEDIAN_S32_I +#define DEBUG_OMXACSP_FILTERMEDIAN_S32 +#define DEBUG_OMXACSP_FIRONE_DIRECT_S16_ISFS +#define DEBUG_OMXACSP_FIRONE_DIRECT_S16_I +#define DEBUG_OMXACSP_FIRONE_DIRECT_S16 +#define DEBUG_OMXACSP_FIRONE_DIRECT_S16_SFS +#define DEBUG_OMXACSP_FIR_DIRECT_S16_ISFS +#define DEBUG_OMXACSP_FIR_DIRECT_S16_I +#define DEBUG_OMXACSP_FIR_DIRECT_S16 +#define DEBUG_OMXACSP_FIR_DIRECT_S16_SFS +#define DEBUG_OMXACSP_IIRONE_BIQUADDIRECT_S16_I +#define DEBUG_OMXACSP_IIRONE_BIQUADDIRECT_S16 +#define DEBUG_OMXACSP_IIRONE_DIRECT_S16_I +#define DEBUG_OMXACSP_IIRONE_DIRECT_S16 +#define DEBUG_OMXACSP_IIR_BIQUADDIRECT_S16_I +#define DEBUG_OMXACSP_IIR_BIQUADDIRECT_S16 +#define DEBUG_OMXACSP_IIR_DIRECT_S16_I +#define DEBUG_OMXACSP_IIR_DIRECT_S16 +#endif /* DEBUG_DOMAIN_SP */ + + +#ifdef DEBUG_DOMAIN_IP +#define DEBUG_OMXIPBM_ADDC_U8_C1R_SFS +#define DEBUG_OMXIPBM_COPY_U8_C1R +#define DEBUG_OMXIPBM_COPY_U8_C3R +#define DEBUG_OMXIPBM_MIRROR_U8_C1R +#define DEBUG_OMXIPBM_MULC_U8_C1R_SFS +#define DEBUG_OMXIPCS_COLORTWISTQ14_U8_C3R +#define DEBUG_OMXIPCS_RGB565TOYCBCR420LS_MCU_U16_S16_C3P3R +#define DEBUG_OMXIPCS_RGB565TOYCBCR422LS_MCU_U16_S16_C3P3R +#define DEBUG_OMXIPCS_RGB565TOYCBCR444LS_MCU_U16_S16_C3P3R +#define DEBUG_OMXIPCS_RGBTOYCBCR420LS_MCU_U8_S16_C3P3R +#define DEBUG_OMXIPCS_RGBTOYCBCR422LS_MCU_U8_S16_C3P3R +#define DEBUG_OMXIPCS_RGBTOYCBCR444LS_MCU_U8_S16_C3P3R +#define DEBUG_OMXIPCS_YCBCR420RSZROT_U8_P3R +#define DEBUG_OMXIPCS_YCBCR420TORGB565LS_MCU_S16_U16_P3C3R +#define DEBUG_OMXIPCS_YCBCR420TORGB565_U8_U16_P3C3R +#define DEBUG_OMXIPCS_YCBCR420TORGBLS_MCU_S16_U8_P3C3R +#define DEBUG_OMXIPCS_YCBCR422RSZCSCROTRGB_U8_C2R +#define DEBUG_OMXIPCS_YCBCR422RSZROT_U8_P3R +#define DEBUG_OMXIPCS_YCBCR422TORGB565LS_MCU_S16_U16_P3C3R +#define DEBUG_OMXIPCS_YCBCR422TORGB565_U8_U16_C2C3R +#define DEBUG_OMXIPCS_YCBCR422TORGBLS_MCU_S16_U8_P3C3R +#define DEBUG_OMXIPCS_YCBCR422TORGB_U8_C2C3R +#define DEBUG_OMXIPCS_YCBCR422TOYCBCR420ROTATE_U8_C2P3R +#define DEBUG_OMXIPCS_YCBCR422TOYCBCR420ROTATE_U8_P3R +#define DEBUG_OMXIPCS_YCBCR444TORGB565LS_MCU_S16_U16_P3C3R +#define DEBUG_OMXIPCS_YCBCR444TORGBLS_MCU_S16_U8_P3C3R +#define DEBUG_OMXIPCS_YCBCRTORGB565_U8_U16_C3R +#define DEBUG_OMXIPCS_YCBCRTORGB565_U8_U16_P3C3R +#define DEBUG_OMXIPCS_YCBCRTORGB_U8_C3R +#define DEBUG_OMXIPPP_GETCENTRALMOMENT_S64 +#define DEBUG_OMXIPPP_GETSPATIALMOMENT_S64 +#define DEBUG_OMXIPPP_MOMENTGETSTATESIZE_S64 +#define DEBUG_OMXIPPP_MOMENTINIT_S64 +#define DEBUG_OMXIPPP_MOMENTS64S_U8_C1R +#define DEBUG_OMXIPPP_MOMENTS64S_U8_C3R +#endif /* DEBUG_DOMAIN_IP */ + + +#endif /* _armCommon_H_ */ + +/*End of File*/ + + + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/api/armCOMM_Bitstream.h b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/api/armCOMM_Bitstream.h new file mode 100644 index 0000000000000000000000000000000000000000..4f9bc3bee8b79fd63e8958e60d35e17211d60229 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/api/armCOMM_Bitstream.h @@ -0,0 +1,212 @@ +/** + * + * File Name: armCOMM_Bitstream.h + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * File: armCOMM_Bitstream.h + * Brief: Declares common API's/Data types used across the OpenMax Encoders/Decoders. + * + */ + +#ifndef _armCodec_H_ +#define _armCodec_H_ + +#include "omxtypes.h" + +typedef struct { + OMX_U8 codeLen; + OMX_U32 codeWord; +} ARM_VLC32; + +/* The above should be renamed as "ARM_VLC32" */ + +/** + * Function: armLookAheadBits() + * + * Description: + * Get the next N bits from the bitstream without advancing the bitstream pointer + * + * Parameters: + * [in] **ppBitStream + * [in] *pOffset + * [in] N=1...32 + * + * Returns Value + */ + +OMX_U32 armLookAheadBits(const OMX_U8 **ppBitStream, OMX_INT *pOffset, OMX_INT N); + +/** + * Function: armGetBits() + * + * Description: + * Read N bits from the bitstream + * + * Parameters: + * [in] *ppBitStream + * [in] *pOffset + * [in] N=1..32 + * + * [out] *ppBitStream + * [out] *pOffset + * Returns Value + */ + +OMX_U32 armGetBits(const OMX_U8 **ppBitStream, OMX_INT *pOffset, OMX_INT N); + +/** + * Function: armByteAlign() + * + * Description: + * Align the pointer *ppBitStream to the next byte boundary + * + * Parameters: + * [in] *ppBitStream + * [in] *pOffset + * + * [out] *ppBitStream + * [out] *pOffset + * + **/ + +OMXVoid armByteAlign(const OMX_U8 **ppBitStream,OMX_INT *pOffset); + +/** + * Function: armSkipBits() + * + * Description: + * Skip N bits from the value at *ppBitStream + * + * Parameters: + * [in] *ppBitStream + * [in] *pOffset + * [in] N + * + * [out] *ppBitStream + * [out] *pOffset + * + **/ + +OMXVoid armSkipBits(const OMX_U8 **ppBitStream,OMX_INT *pOffset,OMX_INT N); + +/*************************************** + * Variable bit length Decode + ***************************************/ + +/** + * Function: armUnPackVLC32() + * + * Description: + * Variable length decode of variable length symbol (max size 32 bits) read from + * the bit stream pointed by *ppBitStream at *pOffset by using the table + * pointed by pCodeBook + * + * Parameters: + * [in] **ppBitStream + * [in] *pOffset + * [in] pCodeBook + * + * [out] **ppBitStream + * [out] *pOffset + * + * Returns : Code Book Index if successfull. + * : "ARM_NO_CODEBOOK_INDEX = 0xFFFF" if search fails. + **/ + +#define ARM_NO_CODEBOOK_INDEX (OMX_U16)(0xFFFF) + +OMX_U16 armUnPackVLC32( + const OMX_U8 **ppBitStream, + OMX_INT *pOffset, + const ARM_VLC32 *pCodeBook +); + +/*************************************** + * Fixed bit length Encode + ***************************************/ + +/** + * Function: armPackBits + * + * Description: + * Pack a VLC code word into the bitstream + * + * Remarks: + * + * Parameters: + * [in] ppBitStream pointer to the pointer to the current byte + * in the bit stream. + * [in] pOffset pointer to the bit position in the byte + * pointed by *ppBitStream. Valid within 0 + * to 7. + * [in] codeWord Code word that need to be inserted in to the + * bitstream + * [in] codeLength Length of the code word valid range 1...32 + * + * [out] ppBitStream *ppBitStream is updated after the block is encoded, + * so that it points to the current byte in the bit + * stream buffer. + * [out] pBitOffset *pBitOffset is updated so that it points to the + * current bit position in the byte pointed by + * *ppBitStream. + * + * Return Value: + * Standard OMX_RESULT result. See enumeration for possible result codes. + * + */ + +OMXResult armPackBits ( + OMX_U8 **ppBitStream, + OMX_INT *pOffset, + OMX_U32 codeWord, + OMX_INT codeLength +); + +/*************************************** + * Variable bit length Encode + ***************************************/ + +/** + * Function: armPackVLC32 + * + * Description: + * Pack a VLC code word into the bitstream + * + * Remarks: + * + * Parameters: + * [in] ppBitStream pointer to the pointer to the current byte + * in the bit stream. + * [in] pBitOffset pointer to the bit position in the byte + * pointed by *ppBitStream. Valid within 0 + * to 7. + * [in] code VLC code word that need to be inserted in to the + * bitstream + * + * [out] ppBitStream *ppBitStream is updated after the block is encoded, + * so that it points to the current byte in the bit + * stream buffer. + * [out] pBitOffset *pBitOffset is updated so that it points to the + * current bit position in the byte pointed by + * *ppBitStream. + * + * Return Value: + * Standard OMX_RESULT result. See enumeration for possible result codes. + * + */ + +OMXResult armPackVLC32 ( + OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + ARM_VLC32 code +); + +#endif /*_armCodec_H_*/ + +/*End of File*/ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/api/armCOMM_Version.h b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/api/armCOMM_Version.h new file mode 100644 index 0000000000000000000000000000000000000000..e99a4506cff3f7411119f3b897bba519b23d5bad --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/api/armCOMM_Version.h @@ -0,0 +1,43 @@ +/* Guard the header against multiple inclusion. */ +#ifndef __ARM_COMM_VERSION_H__ +#define __ARM_COMM_VERSION_H__ + + +/* The following line should be in omxtypes.h but hasn't been approved by OpenMAX yet */ +#define OMX_VERSION 102 + +/* We need to define these macros in order to convert a #define number into a #define string. */ +#define ARM_QUOTE(a) #a +#define ARM_INDIRECT(A) ARM_QUOTE(A) + +/* Convert the OMX_VERSION number into a string that can be used, for example, to print it out. */ +#define ARM_VERSION_STRING ARM_INDIRECT(OMX_VERSION) + + +/* Define this in order to turn on ARM version/release/build strings in each domain */ +#define ARM_INCLUDE_VERSION_DESCRIPTIONS + +#ifdef ARM_INCLUDE_VERSION_DESCRIPTIONS + extern const char * const omxAC_VersionDescription; + extern const char * const omxIC_VersionDescription; + extern const char * const omxIP_VersionDescription; + extern const char * const omxSP_VersionDescription; + extern const char * const omxVC_VersionDescription; +#endif /* ARM_INCLUDE_VERSION_DESCRIPTIONS */ + + +/* The following entries should be automatically updated by the release script */ +/* They are used in the ARM version strings defined for each domain. */ + +/* The release tag associated with this release of the library. - used for source and object releases */ +#define OMX_ARM_RELEASE_TAG "r0p0-00bet1" + +/* The ARM architecture used to build any objects or executables in this release. */ +#define OMX_ARM_BUILD_ARCHITECTURE "ANSI C" + +/* The ARM Toolchain used to build any objects or executables in this release. */ +#define OMX_ARM_BUILD_TOOLCHAIN "ARM RVCT 3.1" + + +#endif /* __ARM_COMM_VERSION_H__ */ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/api/armOMX.h b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/api/armOMX.h new file mode 100644 index 0000000000000000000000000000000000000000..f629f72b25db0da7e01d1f917bfed611ae517d7a --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/api/armOMX.h @@ -0,0 +1,274 @@ +/* + * + * File Name: armOMX_ReleaseVersion.h + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * This file allows a version of the OMX DL libraries to be built where some or + * all of the function names can be given a user specified suffix. + * + * You might want to use it where: + * + * - you want to rename a function "out of the way" so that you could replace + * a function with a different version (the original version would still be + * in the library just with a different name - so you could debug the new + * version by comparing it to the output of the old) + * + * - you want to rename all the functions to versions with a suffix so that + * you can include two versions of the library and choose between functions + * at runtime. + * + * e.g. omxIPBM_Copy_U8_C1R could be renamed omxIPBM_Copy_U8_C1R_CortexA8 + * + */ + + +#ifndef _armOMX_H_ +#define _armOMX_H_ + + +/* We need to define these two macros in order to expand and concatenate the names */ +#define OMXCAT2BAR(A, B) omx ## A ## B +#define OMXCATBAR(A, B) OMXCAT2BAR(A, B) + +/* Define the suffix to add to all functions - the default is no suffix */ +#define BARE_SUFFIX + + + +/* Define what happens to the bare suffix-less functions, down to the sub-domain accuracy */ +#define OMXACAAC_SUFFIX BARE_SUFFIX +#define OMXACMP3_SUFFIX BARE_SUFFIX +#define OMXICJP_SUFFIX BARE_SUFFIX +#define OMXIPBM_SUFFIX BARE_SUFFIX +#define OMXIPCS_SUFFIX BARE_SUFFIX +#define OMXIPPP_SUFFIX BARE_SUFFIX +#define OMXSP_SUFFIX BARE_SUFFIX +#define OMXVCCOMM_SUFFIX BARE_SUFFIX +#define OMXVCM4P10_SUFFIX BARE_SUFFIX +#define OMXVCM4P2_SUFFIX BARE_SUFFIX + + + + +/* Define what the each bare, un-suffixed OpenMAX API function names is to be renamed */ +#define omxACAAC_DecodeChanPairElt OMXCATBAR(ACAAC_DecodeChanPairElt, OMXACAAC_SUFFIX) +#define omxACAAC_DecodeDatStrElt OMXCATBAR(ACAAC_DecodeDatStrElt, OMXACAAC_SUFFIX) +#define omxACAAC_DecodeFillElt OMXCATBAR(ACAAC_DecodeFillElt, OMXACAAC_SUFFIX) +#define omxACAAC_DecodeIsStereo_S32 OMXCATBAR(ACAAC_DecodeIsStereo_S32, OMXACAAC_SUFFIX) +#define omxACAAC_DecodeMsPNS_S32_I OMXCATBAR(ACAAC_DecodeMsPNS_S32_I, OMXACAAC_SUFFIX) +#define omxACAAC_DecodeMsStereo_S32_I OMXCATBAR(ACAAC_DecodeMsStereo_S32_I, OMXACAAC_SUFFIX) +#define omxACAAC_DecodePrgCfgElt OMXCATBAR(ACAAC_DecodePrgCfgElt, OMXACAAC_SUFFIX) +#define omxACAAC_DecodeTNS_S32_I OMXCATBAR(ACAAC_DecodeTNS_S32_I, OMXACAAC_SUFFIX) +#define omxACAAC_DeinterleaveSpectrum_S32 OMXCATBAR(ACAAC_DeinterleaveSpectrum_S32, OMXACAAC_SUFFIX) +#define omxACAAC_EncodeTNS_S32_I OMXCATBAR(ACAAC_EncodeTNS_S32_I, OMXACAAC_SUFFIX) +#define omxACAAC_LongTermPredict_S32 OMXCATBAR(ACAAC_LongTermPredict_S32, OMXACAAC_SUFFIX) +#define omxACAAC_LongTermReconstruct_S32_I OMXCATBAR(ACAAC_LongTermReconstruct_S32_I, OMXACAAC_SUFFIX) +#define omxACAAC_MDCTFwd_S32 OMXCATBAR(ACAAC_MDCTFwd_S32, OMXACAAC_SUFFIX) +#define omxACAAC_MDCTInv_S32_S16 OMXCATBAR(ACAAC_MDCTInv_S32_S16, OMXACAAC_SUFFIX) +#define omxACAAC_NoiselessDecode OMXCATBAR(ACAAC_NoiselessDecode, OMXACAAC_SUFFIX) +#define omxACAAC_QuantInv_S32_I OMXCATBAR(ACAAC_QuantInv_S32_I, OMXACAAC_SUFFIX) +#define omxACAAC_UnpackADIFHeader OMXCATBAR(ACAAC_UnpackADIFHeader, OMXACAAC_SUFFIX) +#define omxACAAC_UnpackADTSFrameHeader OMXCATBAR(ACAAC_UnpackADTSFrameHeader, OMXACAAC_SUFFIX) + + +#define omxACMP3_HuffmanDecode_S32 OMXCATBAR(ACMP3_HuffmanDecode_S32, OMXACMP3_SUFFIX) +#define omxACMP3_HuffmanDecodeSfb_S32 OMXCATBAR(ACMP3_HuffmanDecodeSfb_S32, OMXACMP3_SUFFIX) +#define omxACMP3_HuffmanDecodeSfbMbp_S32 OMXCATBAR(ACMP3_HuffmanDecodeSfbMbp_S32, OMXACMP3_SUFFIX) +#define omxACMP3_MDCTInv_S32 OMXCATBAR(ACMP3_MDCTInv_S32, OMXACMP3_SUFFIX) +#define omxACMP3_ReQuantize_S32_I OMXCATBAR(ACMP3_ReQuantize_S32_I, OMXACMP3_SUFFIX) +#define omxACMP3_ReQuantizeSfb_S32_I OMXCATBAR(ACMP3_ReQuantizeSfb_S32_I, OMXACMP3_SUFFIX) +#define omxACMP3_SynthPQMF_S32_S16 OMXCATBAR(ACMP3_SynthPQMF_S32_S16, OMXACMP3_SUFFIX) +#define omxACMP3_UnpackFrameHeader OMXCATBAR(ACMP3_UnpackFrameHeader, OMXACMP3_SUFFIX) +#define omxACMP3_UnpackScaleFactors_S8 OMXCATBAR(ACMP3_UnpackScaleFactors_S8, OMXACMP3_SUFFIX) +#define omxACMP3_UnpackSideInfo OMXCATBAR(ACMP3_UnpackSideInfo, OMXACMP3_SUFFIX) + +#define omxICJP_CopyExpand_U8_C3 OMXCATBAR(ICJP_CopyExpand_U8_C3, OMXICJP_SUFFIX) +#define omxICJP_DCTFwd_S16 OMXCATBAR(ICJP_DCTFwd_S16, OMXICJP_SUFFIX) +#define omxICJP_DCTFwd_S16_I OMXCATBAR(ICJP_DCTFwd_S16_I, OMXICJP_SUFFIX) +#define omxICJP_DCTInv_S16 OMXCATBAR(ICJP_DCTInv_S16, OMXICJP_SUFFIX) +#define omxICJP_DCTInv_S16_I OMXCATBAR(ICJP_DCTInv_S16_I, OMXICJP_SUFFIX) +#define omxICJP_DCTQuantFwd_Multiple_S16 OMXCATBAR(ICJP_DCTQuantFwd_Multiple_S16, OMXICJP_SUFFIX) +#define omxICJP_DCTQuantFwd_S16 OMXCATBAR(ICJP_DCTQuantFwd_S16, OMXICJP_SUFFIX) +#define omxICJP_DCTQuantFwd_S16_I OMXCATBAR(ICJP_DCTQuantFwd_S16_I, OMXICJP_SUFFIX) +#define omxICJP_DCTQuantFwdTableInit OMXCATBAR(ICJP_DCTQuantFwdTableInit, OMXICJP_SUFFIX) +#define omxICJP_DCTQuantInv_Multiple_S16 OMXCATBAR(ICJP_DCTQuantInv_Multiple_S16, OMXICJP_SUFFIX) +#define omxICJP_DCTQuantInv_S16 OMXCATBAR(ICJP_DCTQuantInv_S16, OMXICJP_SUFFIX) +#define omxICJP_DCTQuantInv_S16_I OMXCATBAR(ICJP_DCTQuantInv_S16_I, OMXICJP_SUFFIX) +#define omxICJP_DCTQuantInvTableInit OMXCATBAR(ICJP_DCTQuantInvTableInit, OMXICJP_SUFFIX) +#define omxICJP_DecodeHuffman8x8_Direct_S16_C1 OMXCATBAR(ICJP_DecodeHuffman8x8_Direct_S16_C1, OMXICJP_SUFFIX) +#define omxICJP_DecodeHuffmanSpecGetBufSize_U8 OMXCATBAR(ICJP_DecodeHuffmanSpecGetBufSize_U8, OMXICJP_SUFFIX) +#define omxICJP_DecodeHuffmanSpecInit_U8 OMXCATBAR(ICJP_DecodeHuffmanSpecInit_U8, OMXICJP_SUFFIX) +#define omxICJP_EncodeHuffman8x8_Direct_S16_U1_C1 OMXCATBAR(ICJP_EncodeHuffman8x8_Direct_S16_U1_C1, OMXICJP_SUFFIX) +#define omxICJP_EncodeHuffmanSpecGetBufSize_U8 OMXCATBAR(ICJP_EncodeHuffmanSpecGetBufSize_U8, OMXICJP_SUFFIX) +#define omxICJP_EncodeHuffmanSpecInit_U8 OMXCATBAR(ICJP_EncodeHuffmanSpecInit_U8, OMXICJP_SUFFIX) + +#define omxIPBM_AddC_U8_C1R_Sfs OMXCATBAR(IPBM_AddC_U8_C1R_Sfs, OMXIPBM_SUFFIX) +#define omxIPBM_Copy_U8_C1R OMXCATBAR(IPBM_Copy_U8_C1R, OMXIPBM_SUFFIX) +#define omxIPBM_Copy_U8_C3R OMXCATBAR(IPBM_Copy_U8_C3R, OMXIPBM_SUFFIX) +#define omxIPBM_Mirror_U8_C1R OMXCATBAR(IPBM_Mirror_U8_C1R, OMXIPBM_SUFFIX) +#define omxIPBM_MulC_U8_C1R_Sfs OMXCATBAR(IPBM_MulC_U8_C1R_Sfs, OMXIPBM_SUFFIX) + +#define omxIPCS_ColorTwistQ14_U8_C3R OMXCATBAR(IPCS_ColorTwistQ14_U8_C3R, OMXIPCS_SUFFIX) +#define omxIPCS_BGR565ToYCbCr420LS_MCU_U16_S16_C3P3R OMXCATBAR(IPCS_BGR565ToYCbCr420LS_MCU_U16_S16_C3P3R, OMXIPCS_SUFFIX) +#define omxIPCS_BGR565ToYCbCr422LS_MCU_U16_S16_C3P3R OMXCATBAR(IPCS_BGR565ToYCbCr422LS_MCU_U16_S16_C3P3R, OMXIPCS_SUFFIX) +#define omxIPCS_BGR565ToYCbCr444LS_MCU_U16_S16_C3P3R OMXCATBAR(IPCS_BGR565ToYCbCr444LS_MCU_U16_S16_C3P3R, OMXIPCS_SUFFIX) +#define omxIPCS_BGR888ToYCbCr420LS_MCU_U8_S16_C3P3R OMXCATBAR(IPCS_BGR888ToYCbCr420LS_MCU_U8_S16_C3P3R, OMXIPCS_SUFFIX) +#define omxIPCS_BGR888ToYCbCr422LS_MCU_U8_S16_C3P3R OMXCATBAR(IPCS_BGR888ToYCbCr422LS_MCU_U8_S16_C3P3R, OMXIPCS_SUFFIX) +#define omxIPCS_BGR888ToYCbCr444LS_MCU_U8_S16_C3P3R OMXCATBAR(IPCS_BGR888ToYCbCr444LS_MCU_U8_S16_C3P3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr420RszCscRotBGR_U8_P3C3R OMXCATBAR(IPCS_YCbCr420RszCscRotBGR_U8_P3C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr420RszRot_U8_P3R OMXCATBAR(IPCS_YCbCr420RszRot_U8_P3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr420ToBGR565_U8_U16_P3C3R OMXCATBAR(IPCS_YCbCr420ToBGR565_U8_U16_P3C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr420ToBGR565LS_MCU_S16_U16_P3C3R OMXCATBAR(IPCS_YCbCr420ToBGR565LS_MCU_S16_U16_P3C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr420ToBGR888LS_MCU_S16_U8_P3C3R OMXCATBAR(IPCS_YCbCr420ToBGR888LS_MCU_S16_U8_P3C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr422RszCscRotBGR_U8_P3C3R OMXCATBAR(IPCS_YCbCr422RszCscRotBGR_U8_P3C3R, OMXIPCS_SUFFIX) +#define omxIPCS_CbYCrY422RszCscRotBGR_U8_U16_C2R OMXCATBAR(IPCS_CbYCrY422RszCscRotBGR_U8_U16_C2R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr422RszRot_U8_P3R OMXCATBAR(IPCS_YCbCr422RszRot_U8_P3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbYCr422ToBGR565_U8_U16_C2C3R OMXCATBAR(IPCS_YCbYCr422ToBGR565_U8_U16_C2C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr422ToBGR565LS_MCU_S16_U16_P3C3R OMXCATBAR(IPCS_YCbCr422ToBGR565LS_MCU_S16_U16_P3C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbYCr422ToBGR888_U8_C2C3R OMXCATBAR(IPCS_YCbYCr422ToBGR888_U8_C2C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R OMXCATBAR(IPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R OMXCATBAR(IPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R, OMXIPCS_SUFFIX) +#define omxIPCS_CbYCrY422ToYCbCr420Rotate_U8_C2P3R OMXCATBAR(IPCS_CbYCrY422ToYCbCr420Rotate_U8_C2P3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr422ToYCbCr420Rotate_U8_P3R OMXCATBAR(IPCS_YCbCr422ToYCbCr420Rotate_U8_P3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr444ToBGR565_U8_U16_C3R OMXCATBAR(IPCS_YCbCr444ToBGR565_U8_U16_C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr444ToBGR565_U8_U16_P3C3R OMXCATBAR(IPCS_YCbCr444ToBGR565_U8_U16_P3C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr444ToBGR565LS_MCU_S16_U16_P3C3R OMXCATBAR(IPCS_YCbCr444ToBGR565LS_MCU_S16_U16_P3C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr444ToBGR888_U8_C3R OMXCATBAR(IPCS_YCbCr444ToBGR888_U8_C3R, OMXIPCS_SUFFIX) + +#define omxIPPP_Deblock_HorEdge_U8_I OMXCATBAR(IPPP_Deblock_HorEdge_U8_I, OMXIPPP_SUFFIX) +#define omxIPPP_Deblock_VerEdge_U8_I OMXCATBAR(IPPP_Deblock_VerEdge_U8_I, OMXIPPP_SUFFIX) +#define omxIPPP_FilterFIR_U8_C1R OMXCATBAR(IPPP_FilterFIR_U8_C1R, OMXIPPP_SUFFIX) +#define omxIPPP_FilterMedian_U8_C1R OMXCATBAR(IPPP_FilterMedian_U8_C1R, OMXIPPP_SUFFIX) +#define omxIPPP_GetCentralMoment_S64 OMXCATBAR(IPPP_GetCentralMoment_S64, OMXIPPP_SUFFIX) +#define omxIPPP_GetSpatialMoment_S64 OMXCATBAR(IPPP_GetSpatialMoment_S64, OMXIPPP_SUFFIX) +#define omxIPPP_MomentGetStateSize OMXCATBAR(IPPP_MomentGetStateSize, OMXIPPP_SUFFIX) +#define omxIPPP_MomentInit OMXCATBAR(IPPP_MomentInit, OMXIPPP_SUFFIX) +#define omxIPPP_Moments_U8_C1R OMXCATBAR(IPPP_Moments_U8_C1R, OMXIPPP_SUFFIX) +#define omxIPPP_Moments_U8_C3R OMXCATBAR(IPPP_Moments_U8_C3R, OMXIPPP_SUFFIX) + +#define omxSP_BlockExp_S16 OMXCATBAR(SP_BlockExp_S16, OMXSP_SUFFIX) +#define omxSP_BlockExp_S32 OMXCATBAR(SP_BlockExp_S32, OMXSP_SUFFIX) +#define omxSP_Copy_S16 OMXCATBAR(SP_Copy_S16, OMXSP_SUFFIX) +#define omxSP_DotProd_S16 OMXCATBAR(SP_DotProd_S16, OMXSP_SUFFIX) +#define omxSP_DotProd_S16_Sfs OMXCATBAR(SP_DotProd_S16_Sfs, OMXSP_SUFFIX) +#define omxSP_FFTFwd_CToC_SC16_Sfs OMXCATBAR(SP_FFTFwd_CToC_SC16_Sfs, OMXSP_SUFFIX) +#define omxSP_FFTFwd_CToC_SC32_Sfs OMXCATBAR(SP_FFTFwd_CToC_SC32_Sfs, OMXSP_SUFFIX) +#define omxSP_FFTFwd_RToCCS_S16S32_Sfs OMXCATBAR(SP_FFTFwd_RToCCS_S16S32_Sfs, OMXSP_SUFFIX) +#define omxSP_FFTFwd_RToCCS_S32_Sfs OMXCATBAR(SP_FFTFwd_RToCCS_S32_Sfs, OMXSP_SUFFIX) +#define omxSP_FFTGetBufSize_C_SC16 OMXCATBAR(SP_FFTGetBufSize_C_SC16, OMXSP_SUFFIX) +#define omxSP_FFTGetBufSize_C_SC32 OMXCATBAR(SP_FFTGetBufSize_C_SC32, OMXSP_SUFFIX) +#define omxSP_FFTGetBufSize_R_S16S32 OMXCATBAR(SP_FFTGetBufSize_R_S16S32, OMXSP_SUFFIX) +#define omxSP_FFTGetBufSize_R_S32 OMXCATBAR(SP_FFTGetBufSize_R_S32, OMXSP_SUFFIX) +#define omxSP_FFTInit_C_SC16 OMXCATBAR(SP_FFTInit_C_SC16, OMXSP_SUFFIX) +#define omxSP_FFTInit_C_SC32 OMXCATBAR(SP_FFTInit_C_SC32, OMXSP_SUFFIX) +#define omxSP_FFTInit_R_S16S32 OMXCATBAR(SP_FFTInit_R_S16S32, OMXSP_SUFFIX) +#define omxSP_FFTInit_R_S32 OMXCATBAR(SP_FFTInit_R_S32, OMXSP_SUFFIX) +#define omxSP_FFTInv_CCSToR_S32_Sfs OMXCATBAR(SP_FFTInv_CCSToR_S32_Sfs, OMXSP_SUFFIX) +#define omxSP_FFTInv_CCSToR_S32S16_Sfs OMXCATBAR(SP_FFTInv_CCSToR_S32S16_Sfs, OMXSP_SUFFIX) +#define omxSP_FFTInv_CToC_SC16_Sfs OMXCATBAR(SP_FFTInv_CToC_SC16_Sfs, OMXSP_SUFFIX) +#define omxSP_FFTInv_CToC_SC32_Sfs OMXCATBAR(SP_FFTInv_CToC_SC32_Sfs, OMXSP_SUFFIX) +#define omxSP_FilterMedian_S32 OMXCATBAR(SP_FilterMedian_S32, OMXSP_SUFFIX) +#define omxSP_FilterMedian_S32_I OMXCATBAR(SP_FilterMedian_S32_I, OMXSP_SUFFIX) +#define omxSP_FIR_Direct_S16 OMXCATBAR(SP_FIR_Direct_S16, OMXSP_SUFFIX) +#define omxSP_FIR_Direct_S16_I OMXCATBAR(SP_FIR_Direct_S16_I, OMXSP_SUFFIX) +#define omxSP_FIR_Direct_S16_ISfs OMXCATBAR(SP_FIR_Direct_S16_ISfs, OMXSP_SUFFIX) +#define omxSP_FIR_Direct_S16_Sfs OMXCATBAR(SP_FIR_Direct_S16_Sfs, OMXSP_SUFFIX) +#define omxSP_FIROne_Direct_S16 OMXCATBAR(SP_FIROne_Direct_S16, OMXSP_SUFFIX) +#define omxSP_FIROne_Direct_S16_I OMXCATBAR(SP_FIROne_Direct_S16_I, OMXSP_SUFFIX) +#define omxSP_FIROne_Direct_S16_ISfs OMXCATBAR(SP_FIROne_Direct_S16_ISfs, OMXSP_SUFFIX) +#define omxSP_FIROne_Direct_S16_Sfs OMXCATBAR(SP_FIROne_Direct_S16_Sfs, OMXSP_SUFFIX) +#define omxSP_IIR_BiQuadDirect_S16 OMXCATBAR(SP_IIR_BiQuadDirect_S16, OMXSP_SUFFIX) +#define omxSP_IIR_BiQuadDirect_S16_I OMXCATBAR(SP_IIR_BiQuadDirect_S16_I, OMXSP_SUFFIX) +#define omxSP_IIR_Direct_S16 OMXCATBAR(SP_IIR_Direct_S16, OMXSP_SUFFIX) +#define omxSP_IIR_Direct_S16_I OMXCATBAR(SP_IIR_Direct_S16_I, OMXSP_SUFFIX) +#define omxSP_IIROne_BiQuadDirect_S16 OMXCATBAR(SP_IIROne_BiQuadDirect_S16, OMXSP_SUFFIX) +#define omxSP_IIROne_BiQuadDirect_S16_I OMXCATBAR(SP_IIROne_BiQuadDirect_S16_I, OMXSP_SUFFIX) +#define omxSP_IIROne_Direct_S16 OMXCATBAR(SP_IIROne_Direct_S16, OMXSP_SUFFIX) +#define omxSP_IIROne_Direct_S16_I OMXCATBAR(SP_IIROne_Direct_S16_I, OMXSP_SUFFIX) + +#define omxVCCOMM_Average_16x OMXCATBAR(VCCOMM_Average_16x, OMXVCCOMM_SUFFIX) +#define omxVCCOMM_Average_8x OMXCATBAR(VCCOMM_Average_8x, OMXVCCOMM_SUFFIX) +#define omxVCCOMM_ComputeTextureErrorBlock OMXCATBAR(VCCOMM_ComputeTextureErrorBlock, OMXVCCOMM_SUFFIX) +#define omxVCCOMM_ComputeTextureErrorBlock_SAD OMXCATBAR(VCCOMM_ComputeTextureErrorBlock_SAD, OMXVCCOMM_SUFFIX) +#define omxVCCOMM_Copy16x16 OMXCATBAR(VCCOMM_Copy16x16, OMXVCCOMM_SUFFIX) +#define omxVCCOMM_Copy8x8 OMXCATBAR(VCCOMM_Copy8x8, OMXVCCOMM_SUFFIX) +#define omxVCCOMM_ExpandFrame_I OMXCATBAR(VCCOMM_ExpandFrame_I, OMXVCCOMM_SUFFIX) +#define omxVCCOMM_LimitMVToRect OMXCATBAR(VCCOMM_LimitMVToRect, OMXVCCOMM_SUFFIX) +#define omxVCCOMM_SAD_16x OMXCATBAR(VCCOMM_SAD_16x, OMXVCCOMM_SUFFIX) +#define omxVCCOMM_SAD_8x OMXCATBAR(VCCOMM_SAD_8x, OMXVCCOMM_SUFFIX) + +#define omxVCM4P10_Average_4x OMXCATBAR(VCM4P10_Average_4x, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_BlockMatch_Half OMXCATBAR(VCM4P10_BlockMatch_Half, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_BlockMatch_Integer OMXCATBAR(VCM4P10_BlockMatch_Integer, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_BlockMatch_Quarter OMXCATBAR(VCM4P10_BlockMatch_Quarter, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_DeblockChroma_I OMXCATBAR(VCM4P10_DeblockChroma_I, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_DeblockLuma_I OMXCATBAR(VCM4P10_DeblockLuma_I, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC OMXCATBAR(VCM4P10_DecodeChromaDcCoeffsToPairCAVLC, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_DecodeCoeffsToPairCAVLC OMXCATBAR(VCM4P10_DecodeCoeffsToPairCAVLC, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_DequantTransformResidualFromPairAndAdd OMXCATBAR(VCM4P10_DequantTransformResidualFromPairAndAdd, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_FilterDeblockingChroma_HorEdge_I OMXCATBAR(VCM4P10_FilterDeblockingChroma_HorEdge_I, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_FilterDeblockingChroma_VerEdge_I OMXCATBAR(VCM4P10_FilterDeblockingChroma_VerEdge_I, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_FilterDeblockingLuma_HorEdge_I OMXCATBAR(VCM4P10_FilterDeblockingLuma_HorEdge_I, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_FilterDeblockingLuma_VerEdge_I OMXCATBAR(VCM4P10_FilterDeblockingLuma_VerEdge_I, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_GetVLCInfo OMXCATBAR(VCM4P10_GetVLCInfo, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_InterpolateChroma OMXCATBAR(VCM4P10_InterpolateChroma, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_InterpolateHalfHor_Luma OMXCATBAR(VCM4P10_InterpolateHalfHor_Luma, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_InterpolateHalfVer_Luma OMXCATBAR(VCM4P10_InterpolateHalfVer_Luma, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_InterpolateLuma OMXCATBAR(VCM4P10_InterpolateLuma, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_InvTransformDequant_ChromaDC OMXCATBAR(VCM4P10_InvTransformDequant_ChromaDC, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_InvTransformDequant_LumaDC OMXCATBAR(VCM4P10_InvTransformDequant_LumaDC, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_InvTransformResidualAndAdd OMXCATBAR(VCM4P10_InvTransformResidualAndAdd, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_MEGetBufSize OMXCATBAR(VCM4P10_MEGetBufSize, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_MEInit OMXCATBAR(VCM4P10_MEInit, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_MotionEstimationMB OMXCATBAR(VCM4P10_MotionEstimationMB, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_PredictIntra_16x16 OMXCATBAR(VCM4P10_PredictIntra_16x16, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_PredictIntra_4x4 OMXCATBAR(VCM4P10_PredictIntra_4x4, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_PredictIntraChroma_8x8 OMXCATBAR(VCM4P10_PredictIntraChroma_8x8, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_SAD_4x OMXCATBAR(VCM4P10_SAD_4x, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_SADQuar_16x OMXCATBAR(VCM4P10_SADQuar_16x, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_SADQuar_4x OMXCATBAR(VCM4P10_SADQuar_4x, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_SADQuar_8x OMXCATBAR(VCM4P10_SADQuar_8x, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_SATD_4x4 OMXCATBAR(VCM4P10_SATD_4x4, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_SubAndTransformQDQResidual OMXCATBAR(VCM4P10_SubAndTransformQDQResidual, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_TransformDequantChromaDCFromPair OMXCATBAR(VCM4P10_TransformDequantChromaDCFromPair, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_TransformDequantLumaDCFromPair OMXCATBAR(VCM4P10_TransformDequantLumaDCFromPair, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_TransformQuant_ChromaDC OMXCATBAR(VCM4P10_TransformQuant_ChromaDC, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_TransformQuant_LumaDC OMXCATBAR(VCM4P10_TransformQuant_LumaDC, OMXVCM4P10_SUFFIX) + +#define omxVCM4P2_BlockMatch_Half_16x16 OMXCATBAR(VCM4P2_BlockMatch_Half_16x16, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_BlockMatch_Half_8x8 OMXCATBAR(VCM4P2_BlockMatch_Half_8x8, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_BlockMatch_Integer_16x16 OMXCATBAR(VCM4P2_BlockMatch_Integer_16x16, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_BlockMatch_Integer_8x8 OMXCATBAR(VCM4P2_BlockMatch_Integer_8x8, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_DCT8x8blk OMXCATBAR(VCM4P2_DCT8x8blk, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_DecodeBlockCoef_Inter OMXCATBAR(VCM4P2_DecodeBlockCoef_Inter, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_DecodeBlockCoef_Intra OMXCATBAR(VCM4P2_DecodeBlockCoef_Intra, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_DecodePadMV_PVOP OMXCATBAR(VCM4P2_DecodePadMV_PVOP, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_DecodeVLCZigzag_Inter OMXCATBAR(VCM4P2_DecodeVLCZigzag_Inter, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_DecodeVLCZigzag_IntraACVLC OMXCATBAR(VCM4P2_DecodeVLCZigzag_IntraACVLC, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_DecodeVLCZigzag_IntraDCVLC OMXCATBAR(VCM4P2_DecodeVLCZigzag_IntraDCVLC, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_EncodeMV OMXCATBAR(VCM4P2_EncodeMV, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_EncodeVLCZigzag_Inter OMXCATBAR(VCM4P2_EncodeVLCZigzag_Inter, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_EncodeVLCZigzag_IntraACVLC OMXCATBAR(VCM4P2_EncodeVLCZigzag_IntraACVLC, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_EncodeVLCZigzag_IntraDCVLC OMXCATBAR(VCM4P2_EncodeVLCZigzag_IntraDCVLC, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_FindMVpred OMXCATBAR(VCM4P2_FindMVpred, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_IDCT8x8blk OMXCATBAR(VCM4P2_IDCT8x8blk, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_MCReconBlock OMXCATBAR(VCM4P2_MCReconBlock, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_MEGetBufSize OMXCATBAR(VCM4P2_MEGetBufSize, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_MEInit OMXCATBAR(VCM4P2_MEInit, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_MotionEstimationMB OMXCATBAR(VCM4P2_MotionEstimationMB, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_PredictReconCoefIntra OMXCATBAR(VCM4P2_PredictReconCoefIntra, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_QuantInter_I OMXCATBAR(VCM4P2_QuantInter_I, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_QuantIntra_I OMXCATBAR(VCM4P2_QuantIntra_I, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_QuantInvInter_I OMXCATBAR(VCM4P2_QuantInvInter_I, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_QuantInvIntra_I OMXCATBAR(VCM4P2_QuantInvIntra_I, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_TransRecBlockCoef_inter OMXCATBAR(VCM4P2_TransRecBlockCoef_inter, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_TransRecBlockCoef_intra OMXCATBAR(VCM4P2_TransRecBlockCoef_intra, OMXVCM4P2_SUFFIX) + + +#endif /* _armOMX_h_ */ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/api/omxtypes.h b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/api/omxtypes.h new file mode 100644 index 0000000000000000000000000000000000000000..8b295a6feee35b4c7cca52b5ef61b36bb41e0c63 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/api/omxtypes.h @@ -0,0 +1,252 @@ +/** + * File: omxtypes.h + * Brief: Defines basic Data types used in OpenMAX v1.0.2 header files. + * + * Copyright © 2005-2008 The Khronos Group Inc. All Rights Reserved. + * + * These materials are protected by copyright laws and contain material + * proprietary to the Khronos Group, Inc. You may use these materials + * for implementing Khronos specifications, without altering or removing + * any trademark, copyright or other notice from the specification. + * + * Khronos Group makes no, and expressly disclaims any, representations + * or warranties, express or implied, regarding these materials, including, + * without limitation, any implied warranties of merchantability or fitness + * for a particular purpose or non-infringement of any intellectual property. + * Khronos Group makes no, and expressly disclaims any, warranties, express + * or implied, regarding the correctness, accuracy, completeness, timeliness, + * and reliability of these materials. + * + * Under no circumstances will the Khronos Group, or any of its Promoters, + * Contributors or Members or their respective partners, officers, directors, + * employees, agents or representatives be liable for any damages, whether + * direct, indirect, special or consequential damages for lost revenues, + * lost profits, or otherwise, arising from or in connection with these + * materials. + * + * Khronos and OpenMAX are trademarks of the Khronos Group Inc. + * + */ + +#ifndef _OMXTYPES_H_ +#define _OMXTYPES_H_ + +#include + +#define OMX_IN +#define OMX_OUT +#define OMX_INOUT + + +typedef enum { + + /* Mandatory return codes - use cases are explicitly described for each function */ + OMX_Sts_NoErr = 0, /* No error, the function completed successfully */ + OMX_Sts_Err = -2, /* Unknown/unspecified error */ + OMX_Sts_InvalidBitstreamValErr = -182, /* Invalid value detected during bitstream processing */ + OMX_Sts_MemAllocErr = -9, /* Not enough memory allocated for the operation */ + OMX_StsACAAC_GainCtrErr = -159, /* AAC: Unsupported gain control data detected */ + OMX_StsACAAC_PrgNumErr = -167, /* AAC: Invalid number of elements for one program */ + OMX_StsACAAC_CoefValErr = -163, /* AAC: Invalid quantized coefficient value */ + OMX_StsACAAC_MaxSfbErr = -162, /* AAC: Invalid maxSfb value in relation to numSwb */ + OMX_StsACAAC_PlsDataErr = -160, /* AAC: pulse escape sequence data error */ + + /* Optional return codes - use cases are explicitly described for each function*/ + OMX_Sts_BadArgErr = -5, /* Bad Arguments */ + + OMX_StsACAAC_TnsNumFiltErr = -157, /* AAC: Invalid number of TNS filters */ + OMX_StsACAAC_TnsLenErr = -156, /* AAC: Invalid TNS region length */ + OMX_StsACAAC_TnsOrderErr = -155, /* AAC: Invalid order of TNS filter */ + OMX_StsACAAC_TnsCoefResErr = -154, /* AAC: Invalid bit-resolution for TNS filter coefficients */ + OMX_StsACAAC_TnsCoefErr = -153, /* AAC: Invalid TNS filter coefficients */ + OMX_StsACAAC_TnsDirectErr = -152, /* AAC: Invalid TNS filter direction */ + + OMX_StsICJP_JPEGMarkerErr = -183, /* JPEG marker encountered within an entropy-coded block; */ + /* Huffman decoding operation terminated early. */ + OMX_StsICJP_JPEGMarker = -181, /* JPEG marker encountered; Huffman decoding */ + /* operation terminated early. */ + OMX_StsIPPP_ContextMatchErr = -17, /* Context parameter doesn't match to the operation */ + + OMX_StsSP_EvenMedianMaskSizeErr = -180, /* Even size of the Median Filter mask was replaced by the odd one */ + + OMX_Sts_MaximumEnumeration = INT_MAX /*Placeholder, forces enum of size OMX_INT*/ + + } OMXResult; /** Return value or error value returned from a function. Identical to OMX_INT */ + + +/* OMX_U8 */ +#if UCHAR_MAX == 0xff +typedef unsigned char OMX_U8; +#elif USHRT_MAX == 0xff +typedef unsigned short int OMX_U8; +#else +#error OMX_U8 undefined +#endif + + +/* OMX_S8 */ +#if SCHAR_MAX == 0x7f +typedef signed char OMX_S8; +#elif SHRT_MAX == 0x7f +typedef signed short int OMX_S8; +#else +#error OMX_S8 undefined +#endif + + +/* OMX_U16 */ +#if USHRT_MAX == 0xffff +typedef unsigned short int OMX_U16; +#elif UINT_MAX == 0xffff +typedef unsigned int OMX_U16; +#else +#error OMX_U16 undefined +#endif + + +/* OMX_S16 */ +#if SHRT_MAX == 0x7fff +typedef signed short int OMX_S16; +#elif INT_MAX == 0x7fff +typedef signed int OMX_S16; +#else +#error OMX_S16 undefined +#endif + + +/* OMX_U32 */ +#if UINT_MAX == 0xffffffff +typedef unsigned int OMX_U32; +#elif LONG_MAX == 0xffffffff +typedef unsigned long int OMX_U32; +#else +#error OMX_U32 undefined +#endif + + +/* OMX_S32 */ +#if INT_MAX == 0x7fffffff +typedef signed int OMX_S32; +#elif LONG_MAX == 0x7fffffff +typedef long signed int OMX_S32; +#else +#error OMX_S32 undefined +#endif + + +/* OMX_U64 & OMX_S64 */ +#if defined( _WIN32 ) || defined ( _WIN64 ) + typedef __int64 OMX_S64; /** Signed 64-bit integer */ + typedef unsigned __int64 OMX_U64; /** Unsigned 64-bit integer */ + #define OMX_MIN_S64 (0x8000000000000000i64) + #define OMX_MIN_U64 (0x0000000000000000i64) + #define OMX_MAX_S64 (0x7FFFFFFFFFFFFFFFi64) + #define OMX_MAX_U64 (0xFFFFFFFFFFFFFFFFi64) +#else + typedef long long OMX_S64; /** Signed 64-bit integer */ + typedef unsigned long long OMX_U64; /** Unsigned 64-bit integer */ + #define OMX_MIN_S64 (0x8000000000000000LL) + #define OMX_MIN_U64 (0x0000000000000000LL) + #define OMX_MAX_S64 (0x7FFFFFFFFFFFFFFFLL) + #define OMX_MAX_U64 (0xFFFFFFFFFFFFFFFFLL) +#endif + + +/* OMX_SC8 */ +typedef struct +{ + OMX_S8 Re; /** Real part */ + OMX_S8 Im; /** Imaginary part */ + +} OMX_SC8; /** Signed 8-bit complex number */ + + +/* OMX_SC16 */ +typedef struct +{ + OMX_S16 Re; /** Real part */ + OMX_S16 Im; /** Imaginary part */ + +} OMX_SC16; /** Signed 16-bit complex number */ + + +/* OMX_SC32 */ +typedef struct +{ + OMX_S32 Re; /** Real part */ + OMX_S32 Im; /** Imaginary part */ + +} OMX_SC32; /** Signed 32-bit complex number */ + + +/* OMX_SC64 */ +typedef struct +{ + OMX_S64 Re; /** Real part */ + OMX_S64 Im; /** Imaginary part */ + +} OMX_SC64; /** Signed 64-bit complex number */ + + +/* OMX_F32 */ +typedef float OMX_F32; /** Single precision floating point,IEEE 754 */ + + +/* OMX_F64 */ +typedef double OMX_F64; /** Double precision floating point,IEEE 754 */ + + +/* OMX_INT */ +typedef int OMX_INT; /** signed integer corresponding to machine word length, has maximum signed value INT_MAX*/ + + +#define OMX_MIN_S8 (-128) +#define OMX_MIN_U8 0 +#define OMX_MIN_S16 (-32768) +#define OMX_MIN_U16 0 +#define OMX_MIN_S32 (-2147483647-1) +#define OMX_MIN_U32 0 + +#define OMX_MAX_S8 (127) +#define OMX_MAX_U8 (255) +#define OMX_MAX_S16 (32767) +#define OMX_MAX_U16 (0xFFFF) +#define OMX_MAX_S32 (2147483647) +#define OMX_MAX_U32 (0xFFFFFFFF) + +typedef void OMXVoid; + +#ifndef NULL +#define NULL ((void*)0) +#endif + +/** Defines the geometric position and size of a rectangle, + * where x,y defines the coordinates of the top left corner + * of the rectangle, with dimensions width in the x-direction + * and height in the y-direction */ +typedef struct { + OMX_INT x; /** x-coordinate of top left corner of rectangle */ + OMX_INT y; /** y-coordinate of top left corner of rectangle */ + OMX_INT width; /** Width in the x-direction. */ + OMX_INT height; /** Height in the y-direction. */ +}OMXRect; + + +/** Defines the geometric position of a point, */ +typedef struct +{ + OMX_INT x; /** x-coordinate */ + OMX_INT y; /** y-coordinate */ + +} OMXPoint; + + +/** Defines the dimensions of a rectangle, or region of interest in an image */ +typedef struct +{ + OMX_INT width; /** Width of the rectangle, in the x-direction */ + OMX_INT height; /** Height of the rectangle, in the y-direction */ + +} OMXSize; + +#endif /* _OMXTYPES_H_ */ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/build_vc.pl b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/build_vc.pl new file mode 100755 index 0000000000000000000000000000000000000000..f0b43e01e5413ad940e5624e9f0ae82ab8fd5ee1 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/build_vc.pl @@ -0,0 +1,111 @@ +#!/usr/bin/perl +# +# +# File Name: build_vc.pl +# OpenMAX DL: v1.0.2 +# Revision: 9641 +# Date: Thursday, February 7, 2008 +# +# (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +# +# +# +# This file builds the OpenMAX DL vc domain library omxVC.o. +# + +use File::Spec; +use strict; + +my ($CC, $CC_OPTS, $AS, $AS_OPTS, $LIB, $LIB_OPTS, $LIB_TYPE); + +$CC = 'armcc'; +$CC_OPTS = '--no_unaligned_access --cpu ARM7TDMI -c'; +$AS = 'armasm'; +$AS_OPTS = '--no_unaligned_access --cpu ARM7TDMI'; +# $LIB = 'armlink'; +# $LIB_OPTS = '--partial -o'; +# $LIB_TYPE = '.o'; +$LIB = 'armar'; +$LIB_OPTS = '--create -r'; +$LIB_TYPE = '.a'; + +#------------------------ + +my (@headerlist, @filelist, $hd, $file, $ofile, $command, $objlist, $libfile, $h); + +# Define the list of directories containing included header files. +@headerlist = qw(api vc/api vc/m4p2/api vc/m4p10/api); + +# Define the list of source files to compile. +open(FILES, '; +close(FILES); + +# Fix the file separators in the header paths +foreach $h (@headerlist) +{ + $h = File::Spec->canonpath($h); +} + +# Create the include path to be passed to the compiler +$hd = '-I' . join(' -I', @headerlist); + +# Create the build directories "/lib/" and "/obj/" (if they are not there already) +mkdir "obj", 0777 if (! -d "obj"); +mkdir "lib", 0777 if (! -d "lib"); + +$objlist = ''; + +# Compile each file +foreach $file (@filelist) +{ + my $f; + my $base; + my $ext; + my $objfile; + + chomp($file); + $file = File::Spec->canonpath($file); + + (undef, undef, $f) = File::Spec->splitpath($file); + if(($base, $ext) = $f =~ /(.+)\.(\w)$/) + { + $objfile = File::Spec->catfile('obj', $base.'.o'); + + if($ext eq 'c') + { + $objlist .= "$objfile "; + $command = $CC.' '.$CC_OPTS.' '.$hd.' -o '.$objfile.' '.$file; + print "$command\n"; + system($command); + } + elsif($ext eq 's') + { + $objlist .= "$objfile "; + $command = $AS.' '.$AS_OPTS.' '.$hd.' -o '.$objfile.' '.$file; + print "$command\n"; + system($command); + } + else + { + print "Ignoring file: $f\n"; + } + } + else + { + die "No file extension found: $f\n"; + } +} + +# Do the final link stage to create the libraries. +$libfile = File::Spec->catfile('lib', 'omxVC'.$LIB_TYPE); +$command = $LIB.' '.$LIB_OPTS.' '.$libfile.' '.$objlist; +print "$command\n"; +(system($command) == 0) and print "Build successful\n"; + + + + + + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/filelist_vc.txt b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/filelist_vc.txt new file mode 100644 index 0000000000000000000000000000000000000000..66f34ac368e2f349402a36e2a4ca859c51e52176 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/filelist_vc.txt @@ -0,0 +1,123 @@ +./api/armCOMM.h +./api/armCOMM_Bitstream.h +./api/armCOMM_Version.h +./api/armOMX_ReleaseVersion.h +./api/omxtypes.h +./src/armCOMM.c +./src/armCOMM_Bitstream.c +./vc/api/armVC.h +./vc/api/omxVC.h +./vc/comm/src/armVCCOMM_Average.c +./vc/comm/src/armVCCOMM_SAD.c +./vc/comm/src/omxVCCOMM_Average_16x.c +./vc/comm/src/omxVCCOMM_Average_8x.c +./vc/comm/src/omxVCCOMM_ComputeTextureErrorBlock.c +./vc/comm/src/omxVCCOMM_ComputeTextureErrorBlock_SAD.c +./vc/comm/src/omxVCCOMM_Copy16x16.c +./vc/comm/src/omxVCCOMM_Copy8x8.c +./vc/comm/src/omxVCCOMM_ExpandFrame_I.c +./vc/comm/src/omxVCCOMM_LimitMVToRect.c +./vc/comm/src/omxVCCOMM_SAD_16x.c +./vc/comm/src/omxVCCOMM_SAD_8x.c +./vc/m4p10/api/armVCM4P10_CAVLCTables.h +./vc/m4p10/src/armVCM4P10_CAVLCTables.c +./vc/m4p10/src/armVCM4P10_CompareMotionCostToMV.c +./vc/m4p10/src/armVCM4P10_DeBlockPixel.c +./vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair.c +./vc/m4p10/src/armVCM4P10_DequantTables.c +./vc/m4p10/src/armVCM4P10_FwdTransformResidual4x4.c +./vc/m4p10/src/armVCM4P10_Interpolate_Chroma.c +./vc/m4p10/src/armVCM4P10_Interpolate_Luma.c +./vc/m4p10/src/armVCM4P10_InterpolateHalfDiag_Luma.c +./vc/m4p10/src/armVCM4P10_InterpolateHalfHor_Luma.c +./vc/m4p10/src/armVCM4P10_InterpolateHalfVer_Luma.c +./vc/m4p10/src/armVCM4P10_PredictIntraDC4x4.c +./vc/m4p10/src/armVCM4P10_QuantTables.c +./vc/m4p10/src/armVCM4P10_SADQuar.c +./vc/m4p10/src/armVCM4P10_TransformResidual4x4.c +./vc/m4p10/src/armVCM4P10_UnpackBlock2x2.c +./vc/m4p10/src/armVCM4P10_UnpackBlock4x4.c +./vc/m4p10/src/omxVCM4P10_Average_4x.c +./vc/m4p10/src/omxVCM4P10_BlockMatch_Half.c +./vc/m4p10/src/omxVCM4P10_BlockMatch_Integer.c +./vc/m4p10/src/omxVCM4P10_BlockMatch_Quarter.c +./vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c +./vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c +./vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c +./vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c +./vc/m4p10/src/omxVCM4P10_DequantTransformResidualFromPairAndAdd.c +./vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I.c +./vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I.c +./vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I.c +./vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I.c +./vc/m4p10/src/omxVCM4P10_GetVLCInfo.c +./vc/m4p10/src/omxVCM4P10_InterpolateChroma.c +./vc/m4p10/src/omxVCM4P10_InterpolateHalfHor_Luma.c +./vc/m4p10/src/omxVCM4P10_InterpolateHalfVer_Luma.c +./vc/m4p10/src/omxVCM4P10_InterpolateLuma.c +./vc/m4p10/src/omxVCM4P10_InvTransformDequant_ChromaDC.c +./vc/m4p10/src/omxVCM4P10_InvTransformDequant_LumaDC.c +./vc/m4p10/src/omxVCM4P10_InvTransformResidualAndAdd.c +./vc/m4p10/src/omxVCM4P10_MEGetBufSize.c +./vc/m4p10/src/omxVCM4P10_MEInit.c +./vc/m4p10/src/omxVCM4P10_MotionEstimationMB.c +./vc/m4p10/src/omxVCM4P10_PredictIntra_16x16.c +./vc/m4p10/src/omxVCM4P10_PredictIntra_4x4.c +./vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8.c +./vc/m4p10/src/omxVCM4P10_SAD_4x.c +./vc/m4p10/src/omxVCM4P10_SADQuar_16x.c +./vc/m4p10/src/omxVCM4P10_SADQuar_4x.c +./vc/m4p10/src/omxVCM4P10_SADQuar_8x.c +./vc/m4p10/src/omxVCM4P10_SATD_4x4.c +./vc/m4p10/src/omxVCM4P10_SubAndTransformQDQResidual.c +./vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair.c +./vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair.c +./vc/m4p10/src/omxVCM4P10_TransformQuant_ChromaDC.c +./vc/m4p10/src/omxVCM4P10_TransformQuant_LumaDC.c +./vc/m4p2/api/armVCM4P2_DCT_Table.h +./vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h +./vc/m4p2/api/armVCM4P2_ZigZag_Tables.h +./vc/m4p2/src/armVCM4P2_ACDCPredict.c +./vc/m4p2/src/armVCM4P2_BlockMatch_Half.c +./vc/m4p2/src/armVCM4P2_BlockMatch_Integer.c +./vc/m4p2/src/armVCM4P2_CheckVLCEscapeMode.c +./vc/m4p2/src/armVCM4P2_CompareMV.c +./vc/m4p2/src/armVCM4P2_DCT_Table.c +./vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_intra.c +./vc/m4p2/src/armVCM4P2_EncodeVLCZigzag_intra.c +./vc/m4p2/src/armVCM4P2_FillVLCBuffer.c +./vc/m4p2/src/armVCM4P2_FillVLDBuffer.c +./vc/m4p2/src/armVCM4P2_GetVLCBits.c +./vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c +./vc/m4p2/src/armVCM4P2_PutVLCBits.c +./vc/m4p2/src/armVCM4P2_SetPredDir.c +./vc/m4p2/src/armVCM4P2_Zigzag_Tables.c +./vc/m4p2/src/omxVCM4P2_BlockMatch_Half_16x16.c +./vc/m4p2/src/omxVCM4P2_BlockMatch_Half_8x8.c +./vc/m4p2/src/omxVCM4P2_BlockMatch_Integer_16x16.c +./vc/m4p2/src/omxVCM4P2_BlockMatch_Integer_8x8.c +./vc/m4p2/src/omxVCM4P2_DCT8x8blk.c +./vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c +./vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c +./vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP.c +./vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter.c +./vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC.c +./vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC.c +./vc/m4p2/src/omxVCM4P2_EncodeMV.c +./vc/m4p2/src/omxVCM4P2_EncodeVLCZigzag_Inter.c +./vc/m4p2/src/omxVCM4P2_EncodeVLCZigzag_IntraACVLC.c +./vc/m4p2/src/omxVCM4P2_EncodeVLCZigzag_IntraDCVLC.c +./vc/m4p2/src/omxVCM4P2_FindMVpred.c +./vc/m4p2/src/omxVCM4P2_IDCT8x8blk.c +./vc/m4p2/src/omxVCM4P2_MCReconBlock.c +./vc/m4p2/src/omxVCM4P2_MEGetBufSize.c +./vc/m4p2/src/omxVCM4P2_MEInit.c +./vc/m4p2/src/omxVCM4P2_MotionEstimationMB.c +./vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra.c +./vc/m4p2/src/omxVCM4P2_QuantInter_I.c +./vc/m4p2/src/omxVCM4P2_QuantIntra_I.c +./vc/m4p2/src/omxVCM4P2_QuantInvInter_I.c +./vc/m4p2/src/omxVCM4P2_QuantInvIntra_I.c +./vc/m4p2/src/omxVCM4P2_TransRecBlockCoef_inter.c +./vc/m4p2/src/omxVCM4P2_TransRecBlockCoef_intra.c +./vc/src/armVC_Version.c \ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/src/armCOMM.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/src/armCOMM.c new file mode 100644 index 0000000000000000000000000000000000000000..e572a896754dd46c166e31ae827eab526d62a645 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/src/armCOMM.c @@ -0,0 +1,936 @@ +/** + * + * File Name: armCOMM.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Defines Common APIs used across OpenMAX API's + */ + +#include "omxtypes.h" +#include "armCOMM.h" + +/***********************************************************************/ + /* Miscellaneous Arithmetic operations */ + +/** + * Function: armRoundFloatToS16 + * + * Description: + * Converts a double precision value into a short int after rounding + * + * Parameters: + * [in] Value Float value to be converted + * + * Return Value: + * [out] converted value in OMX_S16 format + * + */ + +OMX_S16 armRoundFloatToS16 (OMX_F64 Value) +{ + if (Value > 0) + { + return (OMX_S16)(Value + .5); + } + else + { + return (OMX_S16)(Value - .5); + } +} + +/** + * Function: armRoundFloatToS32 + * + * Description: + * Converts a double precision value into a int after rounding + * + * Parameters: + * [in] Value Float value to be converted + * + * Return Value: + * [out] converted value in OMX_S32 format + * + */ + +OMX_S32 armRoundFloatToS32 (OMX_F64 Value) +{ + if (Value > 0) + { + return (OMX_S32)(Value + .5); + } + else + { + return (OMX_S32)(Value - .5); + } +} +/** + * Function: armSatRoundFloatToS16 + * + * Description: + * Converts a double precision value into a short int after rounding and saturation + * + * Parameters: + * [in] Value Float value to be converted + * + * Return Value: + * [out] converted value in OMX_S16 format + * + */ + +OMX_S16 armSatRoundFloatToS16 (OMX_F64 Value) +{ + if (Value > 0) + { + Value += 0.5; + + if(Value > (OMX_S16)OMX_MAX_S16 ) + { + return (OMX_S16)OMX_MAX_S16; + } + else + { + return (OMX_S16)Value; + } + } + else + { + Value -= 0.5; + + if(Value < (OMX_S16)OMX_MIN_S16 ) + { + return (OMX_S16)OMX_MIN_S16; + } + else + { + return (OMX_S16)Value; + } + } +} + +/** + * Function: armSatRoundFloatToS32 + * + * Description: + * Converts a double precision value into a int after rounding and saturation + * + * Parameters: + * [in] Value Float value to be converted + * + * Return Value: + * [out] converted value in OMX_S32 format + * + */ + +OMX_S32 armSatRoundFloatToS32 (OMX_F64 Value) +{ + if (Value > 0) + { + Value += 0.5; + + if(Value > (OMX_S32)OMX_MAX_S32 ) + { + return (OMX_S32)OMX_MAX_S32; + } + else + { + return (OMX_S32)Value; + } + } + else + { + Value -= 0.5; + + if(Value < (OMX_S32)OMX_MIN_S32 ) + { + return (OMX_S32)OMX_MIN_S32; + } + else + { + return (OMX_S32)Value; + } + } +} + +/** + * Function: armSatRoundFloatToU16 + * + * Description: + * Converts a double precision value into a unsigned short int after rounding and saturation + * + * Parameters: + * [in] Value Float value to be converted + * + * Return Value: + * [out] converted value in OMX_U16 format + * + */ + +OMX_U16 armSatRoundFloatToU16 (OMX_F64 Value) +{ + Value += 0.5; + + if(Value > (OMX_U16)OMX_MAX_U16 ) + { + return (OMX_U16)OMX_MAX_U16; + } + else + { + return (OMX_U16)Value; + } +} + +/** + * Function: armSatRoundFloatToU32 + * + * Description: + * Converts a double precision value into a unsigned int after rounding and saturation + * + * Parameters: + * [in] Value Float value to be converted + * + * Return Value: + * [out] converted value in OMX_U32 format + * + */ + +OMX_U32 armSatRoundFloatToU32 (OMX_F64 Value) +{ + Value += 0.5; + + if(Value > (OMX_U32)OMX_MAX_U32 ) + { + return (OMX_U32)OMX_MAX_U32; + } + else + { + return (OMX_U32)Value; + } +} + +/** + * Function: armRoundFloatToS64 + * + * Description: + * Converts a double precision value into a 64 bit int after rounding + * + * Parameters: + * [in] Value Float value to be converted + * + * Return Value: + * [out] converted value in OMX_S64 format + * + */ + +OMX_S64 armRoundFloatToS64 (OMX_F64 Value) +{ + if (Value > 0) + { + return (OMX_S64)(Value + .5); + } + else + { + return (OMX_S64)(Value - .5); + } +} + +/** + * Function: armSignCheck + * + * Description: + * Checks the sign of a variable: + * returns 1 if it is Positive + * returns 0 if it is 0 + * returns -1 if it is Negative + * + * Remarks: + * + * Parameters: + * [in] var Variable to be checked + * + * Return Value: + * OMX_INT -- returns 1 if it is Positive + * returns 0 if it is 0 + * returns -1 if it is Negative + */ + +OMX_INT armSignCheck ( + OMX_S16 var +) + +{ + OMX_INT Sign; + + if (var < 0) + { + Sign = -1; + } + else if ( var > 0) + { + Sign = 1; + } + else + { + Sign = 0; + } + + return Sign; +} + +/** + * Function: armClip + * + * Description: Clips the input between MAX and MIN value + * + * + * Remarks: + * + * Parameters: + * [in] Min lower bound + * [in] Max upper bound + * [in] src variable to the clipped + * + * Return Value: + * OMX_S32 -- returns clipped value + */ + +OMX_S32 armClip ( + OMX_INT min, + OMX_INT max, + OMX_S32 src +) + +{ + if (src > max) + { + src = max; + } + else if (src < min) + { + src = min; + } + + return src; +} + +/** + * Function: armClip_F32 + * + * Description: Clips the input between MAX and MIN value + * + * + * Remarks: + * + * Parameters: + * [in] Min lower bound + * [in] Max upper bound + * [in] src variable to the clipped + * + * Return Value: + * OMX_F32 -- returns clipped value + */ + +OMX_F32 armClip_F32 ( + OMX_F32 min, + OMX_F32 max, + OMX_F32 src +) + +{ + if (src > max) + { + src = max; + } + else if (src < min) + { + src = min; + } + + return src; +} + +/** + * Function: armShiftSat_F32 + * + * Description: Divides a float value by 2^shift and + * saturates it for unsigned value range for satBits. + * Second parameter is like "shifting" the corresponding + * integer value. Takes care of rounding while clipping the final + * value. + * + * Parameters: + * [in] v Number to be operated upon + * [in] shift Divides the input "v" by "2^shift" + * [in] satBits Final range is [0, 2^satBits) + * + * Return Value: + * OMX_S32 -- returns "shifted" saturated value + */ + +OMX_U32 armShiftSat_F32(OMX_F32 v, OMX_INT shift, OMX_INT satBits) +{ + OMX_U32 allOnes = (OMX_U32)(-1); + OMX_U32 maxV = allOnes >> (32-satBits); + OMX_F32 vShifted, vRounded, shiftDiv = (OMX_F32)(1 << shift); + OMX_U32 vInt; + OMX_U32 vIntSat; + + if(v <= 0) + return 0; + + vShifted = v / shiftDiv; + vRounded = (OMX_F32)(vShifted + 0.5); + vInt = (OMX_U32)vRounded; + vIntSat = vInt; + if(vIntSat > maxV) + vIntSat = maxV; + return vIntSat; +} + +/** + * Functions: armSwapElem + * + * Description: + * These function swaps two elements at the specified pointer locations. + * The size of each element could be anything as specified by + * + * Return Value: + * OMXResult -- Error status from the function + */ +OMXResult armSwapElem( + OMX_U8 *pBuf1, + OMX_U8 *pBuf2, + OMX_INT elemSize + ) +{ + OMX_INT i; + OMX_U8 temp; + armRetArgErrIf(!pBuf1 || !pBuf2, OMX_Sts_BadArgErr); + + for(i = 0; i < elemSize; i++) + { + temp = *(pBuf1 + i); + *(pBuf1 + i) = *(pBuf2 + i); + *(pBuf2 + i) = temp; + } + return OMX_Sts_NoErr; +} + +/** + * Function: armMedianOf3 + * + * Description: Finds the median of three numbers + * + * Remarks: + * + * Parameters: + * [in] fEntry First entry + * [in] sEntry second entry + * [in] tEntry Third entry + * + * Return Value: + * OMX_S32 -- returns the median value + */ + +OMX_S32 armMedianOf3 ( + OMX_S32 fEntry, + OMX_S32 sEntry, + OMX_S32 tEntry +) +{ + OMX_S32 a, b, c; + + a = armMin (fEntry, sEntry); + b = armMax (fEntry, sEntry); + c = armMin (b, tEntry); + return (armMax (a, c)); +} + +/** + * Function: armLogSize + * + * Description: Finds the size of a positive value and returns the same + * + * Remarks: + * + * Parameters: + * [in] value Positive value + * + * Return Value: + * OMX_U8 -- Returns the minimum number of bits required to represent the positive value. + This is the smallest k>=0 such that that value is less than (1< 0; value = value >> 1) + { + i++; + } + return i; +} + +/***********************************************************************/ + /* Saturating Arithmetic operations */ + +/** + * Function :armSatAdd_S32() + * + * Description : + * Returns the result of saturated addition of the two inputs Value1, Value2 + * + * Parametrs: + * [in] Value1 First Operand + * [in] Value2 Second Operand + * + * Return: + * [out] Result of operation + * + * + **/ + +OMX_S32 armSatAdd_S32(OMX_S32 Value1,OMX_S32 Value2) +{ + OMX_S32 Result; + + Result = Value1 + Value2; + + if( (Value1^Value2) >= 0) + { + /*Same sign*/ + if( (Result^Value1) >= 0) + { + /*Result has not saturated*/ + return Result; + } + else + { + if(Value1 >= 0) + { + /*Result has saturated in positive side*/ + return OMX_MAX_S32; + } + else + { + /*Result has saturated in negative side*/ + return OMX_MIN_S32; + } + + } + + } + else + { + return Result; + } + +} + +/** + * Function :armSatAdd_S64() + * + * Description : + * Returns the result of saturated addition of the two inputs Value1, Value2 + * + * Parametrs: + * [in] Value1 First Operand + * [in] Value2 Second Operand + * + * Return: + * [out] Result of operation + * + * + **/ + +OMX_S64 armSatAdd_S64(OMX_S64 Value1,OMX_S64 Value2) +{ + OMX_S64 Result; + + Result = Value1 + Value2; + + if( (Value1^Value2) >= 0) + { + /*Same sign*/ + if( (Result^Value1) >= 0) + { + /*Result has not saturated*/ + return Result; + } + else + { + if(Value1 >= 0) + { + /*Result has saturated in positive side*/ + Result = OMX_MAX_S64; + return Result; + } + else + { + /*Result has saturated in negative side*/ + return OMX_MIN_S64; + } + + } + + } + else + { + return Result; + } + +} + +/** Function :armSatSub_S32() + * + * Description : + * Returns the result of saturated substraction of the two inputs Value1, Value2 + * + * Parametrs: + * [in] Value1 First Operand + * [in] Value2 Second Operand + * + * Return: + * [out] Result of operation + * + **/ + +OMX_S32 armSatSub_S32(OMX_S32 Value1,OMX_S32 Value2) +{ + OMX_S32 Result; + + Result = Value1 - Value2; + + if( (Value1^Value2) < 0) + { + /*Opposite sign*/ + if( (Result^Value1) >= 0) + { + /*Result has not saturated*/ + return Result; + } + else + { + if(Value1 >= 0) + { + /*Result has saturated in positive side*/ + return OMX_MAX_S32; + } + else + { + /*Result has saturated in negative side*/ + return OMX_MIN_S32; + } + + } + + } + else + { + return Result; + } + +} + +/** + * Function :armSatMac_S32() + * + * Description : + * Returns the result of Multiplication of Value1 and Value2 and subesquent saturated + * accumulation with Mac + * + * Parametrs: + * [in] Value1 First Operand + * [in] Value2 Second Operand + * [in] Mac Accumulator + * + * Return: + * [out] Result of operation + **/ + +OMX_S32 armSatMac_S32(OMX_S32 Mac,OMX_S16 Value1,OMX_S16 Value2) +{ + OMX_S32 Result; + + Result = (OMX_S32)(Value1*Value2); + Result = armSatAdd_S32( Mac , Result ); + + return Result; +} + +/** + * Function :armSatMac_S16S32_S32 + * + * Description : + * Returns the result of saturated MAC operation of the three inputs delayElem, filTap , mac + * + * mac = mac + Saturate_in_32Bits(delayElem * filTap) + * + * Parametrs: + * [in] delayElem First 32 bit Operand + * [in] filTap Second 16 bit Operand + * [in] mac Result of MAC operation + * + * Return: + * [out] mac Result of operation + * + **/ + +OMX_S32 armSatMac_S16S32_S32(OMX_S32 mac, OMX_S32 delayElem, OMX_S16 filTap ) +{ + + OMX_S32 result; + + result = armSatMulS16S32_S32(filTap,delayElem); + + if ( result > OMX_MAX_S16 ) + { + result = OMX_MAX_S32; + } + else if( result < OMX_MIN_S16 ) + { + result = OMX_MIN_S32; + } + else + { + result = delayElem * filTap; + } + + mac = armSatAdd_S32(mac,result); + + return mac; +} + + +/** + * Function :armSatRoundRightShift_S32_S16 + * + * Description : + * Returns the result of rounded right shift operation of input by the scalefactor + * + * output = Saturate_in_16Bits( ( Right/LeftShift( (Round(input) , shift ) ) + * + * Parametrs: + * [in] input The input to be operated on + * [in] shift The shift number + * + * Return: + * [out] Result of operation + * + **/ + + +OMX_S16 armSatRoundRightShift_S32_S16(OMX_S32 input, OMX_INT shift) +{ + input = armSatRoundLeftShift_S32(input,-shift); + + if ( input > OMX_MAX_S16 ) + { + return (OMX_S16)OMX_MAX_S16; + } + else if (input < OMX_MIN_S16) + { + return (OMX_S16)OMX_MIN_S16; + } + else + { + return (OMX_S16)input; + } + +} + +/** + * Function :armSatRoundLeftShift_S32() + * + * Description : + * Returns the result of saturating left-shift operation on input + * Or rounded Right shift if the input Shift is negative. + * + * Parametrs: + * [in] Value Operand + * [in] Shift Operand for shift operation + * + * Return: + * [out] Result of operation + * + **/ + +OMX_S32 armSatRoundLeftShift_S32(OMX_S32 Value, OMX_INT Shift) +{ + OMX_INT i; + + if (Shift < 0) + { + Shift = -Shift; + Value = armSatAdd_S32(Value, (1 << (Shift - 1))); + Value = Value >> Shift; + } + else + { + for (i = 0; i < Shift; i++) + { + Value = armSatAdd_S32(Value, Value); + } + } + return Value; +} + +/** + * Function :armSatRoundLeftShift_S64() + * + * Description : + * Returns the result of saturating left-shift operation on input + * Or rounded Right shift if the input Shift is negative. + * + * Parametrs: + * [in] Value Operand + * [in] shift Operand for shift operation + * + * Return: + * [out] Result of operation + * + **/ + +OMX_S64 armSatRoundLeftShift_S64(OMX_S64 Value, OMX_INT Shift) +{ + OMX_INT i; + + if (Shift < 0) + { + Shift = -Shift; + Value = armSatAdd_S64(Value, ((OMX_S64)1 << (Shift - 1))); + Value = Value >> Shift; + } + else + { + for (i = 0; i < Shift; i++) + { + Value = armSatAdd_S64(Value, Value); + } + } + return Value; +} + +/** + * Function :armSatMulS16S32_S32() + * + * Description : + * Returns the result of a S16 data type multiplied with an S32 data type + * in a S32 container + * + * Parametrs: + * [in] input1 Operand 1 + * [in] input2 Operand 2 + * + * Return: + * [out] Result of operation + * + **/ + + +OMX_S32 armSatMulS16S32_S32(OMX_S16 input1,OMX_S32 input2) +{ + OMX_S16 hi2,lo1; + OMX_U16 lo2; + + OMX_S32 temp1,temp2; + OMX_S32 result; + + lo1 = input1; + + hi2 = ( input2 >> 16 ); + lo2 = ( (OMX_U32)( input2 << 16 ) >> 16 ); + + temp1 = hi2 * lo1; + temp2 = ( lo2* lo1 ) >> 16; + + result = armSatAdd_S32(temp1,temp2); + + return result; +} + +/** + * Function :armSatMulS32S32_S32() + * + * Description : + * Returns the result of a S32 data type multiplied with an S32 data type + * in a S32 container + * + * Parametrs: + * [in] input1 Operand 1 + * [in] input2 Operand 2 + * + * Return: + * [out] Result of operation + * + **/ + +OMX_S32 armSatMulS32S32_S32(OMX_S32 input1,OMX_S32 input2) +{ + OMX_S16 hi1,hi2; + OMX_U16 lo1,lo2; + + OMX_S32 temp1,temp2,temp3; + OMX_S32 result; + + hi1 = ( input1 >> 16 ); + lo1 = ( (OMX_U32)( input1 << 16 ) >> 16 ); + + hi2 = ( input2 >> 16 ); + lo2 = ( (OMX_U32)( input2 << 16 ) >> 16 ); + + temp1 = hi1 * hi2; + temp2 = ( hi1* lo2 ) >> 16; + temp3 = ( hi2* lo1 ) >> 16; + + result = armSatAdd_S32(temp1,temp2); + result = armSatAdd_S32(result,temp3); + + return result; +} + +/** + * Function :armIntDivAwayFromZero() + * + * Description : Integer division with rounding to the nearest integer. + * Half-integer values are rounded away from zero + * unless otherwise specified. For example 3//2 is rounded + * to 2, and -3//2 is rounded to -2. + * + * Parametrs: + * [in] Num Operand 1 + * [in] Deno Operand 2 + * + * Return: + * [out] Result of operation input1//input2 + * + **/ + +OMX_S32 armIntDivAwayFromZero (OMX_S32 Num, OMX_S32 Deno) +{ + OMX_F64 result; + + result = ((OMX_F64)Num)/((OMX_F64)Deno); + + if (result >= 0) + { + result += 0.5; + } + else + { + result -= 0.5; + } + + return (OMX_S32)(result); +} + + +/*End of File*/ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/src/armCOMM_Bitstream.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/src/armCOMM_Bitstream.c new file mode 100644 index 0000000000000000000000000000000000000000..9ef9319d3056336525f7c5d4adcd77a4bfb9e1f3 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/src/armCOMM_Bitstream.c @@ -0,0 +1,329 @@ +/** + * + * File Name: armCOMM_Bitstream.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Defines bitstream encode and decode functions common to all codecs + */ + +#include "omxtypes.h" +#include "armCOMM.h" +#include "armCOMM_Bitstream.h" + +/*************************************** + * Fixed bit length Decode + ***************************************/ + +/** + * Function: armLookAheadBits() + * + * Description: + * Get the next N bits from the bitstream without advancing the bitstream pointer + * + * Parameters: + * [in] **ppBitStream + * [in] *pOffset + * [in] N=1...32 + * + * Returns Value + */ + +OMX_U32 armLookAheadBits(const OMX_U8 **ppBitStream, OMX_INT *pOffset, OMX_INT N) +{ + const OMX_U8 *pBitStream = *ppBitStream; + OMX_INT Offset = *pOffset; + OMX_U32 Value; + + armAssert(Offset>=0 && Offset<=7); + armAssert(N>=1 && N<=32); + + /* Read next 32 bits from stream */ + Value = (pBitStream[0] << 24 ) | ( pBitStream[1] << 16) | (pBitStream[2] << 8 ) | (pBitStream[3]) ; + Value = (Value << Offset ) | (pBitStream[4] >> (8-Offset)); + + /* Return N bits */ + return Value >> (32-N); +} + + +/** + * Function: armGetBits() + * + * Description: + * Read N bits from the bitstream + * + * Parameters: + * [in] *ppBitStream + * [in] *pOffset + * [in] N=1..32 + * + * [out] *ppBitStream + * [out] *pOffset + * Returns Value + */ + + +OMX_U32 armGetBits(const OMX_U8 **ppBitStream, OMX_INT *pOffset, OMX_INT N) +{ + const OMX_U8 *pBitStream = *ppBitStream; + OMX_INT Offset = *pOffset; + OMX_U32 Value; + + if(N == 0) + { + return 0; + } + + armAssert(Offset>=0 && Offset<=7); + armAssert(N>=1 && N<=32); + + /* Read next 32 bits from stream */ + Value = (pBitStream[0] << 24 ) | ( pBitStream[1] << 16) | (pBitStream[2] << 8 ) | (pBitStream[3]) ; + Value = (Value << Offset ) | (pBitStream[4] >> (8-Offset)); + + /* Advance bitstream pointer by N bits */ + Offset += N; + *ppBitStream = pBitStream + (Offset>>3); + *pOffset = Offset & 7; + + /* Return N bits */ + return Value >> (32-N); +} + +/** + * Function: armByteAlign() + * + * Description: + * Align the pointer *ppBitStream to the next byte boundary + * + * Parameters: + * [in] *ppBitStream + * [in] *pOffset + * + * [out] *ppBitStream + * [out] *pOffset + * + **/ + +OMXVoid armByteAlign(const OMX_U8 **ppBitStream,OMX_INT *pOffset) +{ + if(*pOffset > 0) + { + *ppBitStream += 1; + *pOffset = 0; + } +} + +/** + * Function: armSkipBits() + * + * Description: + * Skip N bits from the value at *ppBitStream + * + * Parameters: + * [in] *ppBitStream + * [in] *pOffset + * [in] N + * + * [out] *ppBitStream + * [out] *pOffset + * + **/ + + +OMXVoid armSkipBits(const OMX_U8 **ppBitStream,OMX_INT *pOffset,OMX_INT N) +{ + OMX_INT Offset = *pOffset; + const OMX_U8 *pBitStream = *ppBitStream; + + /* Advance bitstream pointer by N bits */ + Offset += N; + *ppBitStream = pBitStream + (Offset>>3); + *pOffset = Offset & 7; +} + +/*************************************** + * Variable bit length Decode + ***************************************/ + +/** + * Function: armUnPackVLC32() + * + * Description: + * Variable length decode of variable length symbol (max size 32 bits) read from + * the bit stream pointed by *ppBitStream at *pOffset by using the table + * pointed by pCodeBook + * + * Parameters: + * [in] *pBitStream + * [in] *pOffset + * [in] pCodeBook + * + * [out] *pBitStream + * [out] *pOffset + * + * Returns : Code Book Index if successfull. + * : ARM_NO_CODEBOOK_INDEX = -1 if search fails. + **/ +#ifndef C_OPTIMIZED_IMPLEMENTATION + +OMX_U16 armUnPackVLC32( + const OMX_U8 **ppBitStream, + OMX_INT *pOffset, + const ARM_VLC32 *pCodeBook +) +{ + const OMX_U8 *pBitStream = *ppBitStream; + OMX_INT Offset = *pOffset; + OMX_U32 Value; + OMX_INT Index; + + armAssert(Offset>=0 && Offset<=7); + + /* Read next 32 bits from stream */ + Value = (pBitStream[0] << 24 ) | ( pBitStream[1] << 16) | (pBitStream[2] << 8 ) | (pBitStream[3]) ; + Value = (Value << Offset ) | (pBitStream[4] >> (8-Offset)); + + /* Search through the codebook */ + for (Index=0; pCodeBook->codeLen != 0; Index++) + { + if (pCodeBook->codeWord == (Value >> (32 - pCodeBook->codeLen))) + { + Offset = Offset + pCodeBook->codeLen; + *ppBitStream = pBitStream + (Offset >> 3) ; + *pOffset = Offset & 7; + + return Index; + } + pCodeBook++; + } + + /* No code match found */ + return ARM_NO_CODEBOOK_INDEX; +} + +#endif + +/*************************************** + * Fixed bit length Encode + ***************************************/ + +/** + * Function: armPackBits + * + * Description: + * Pack a VLC code word into the bitstream + * + * Remarks: + * + * Parameters: + * [in] ppBitStream pointer to the pointer to the current byte + * in the bit stream. + * [in] pOffset pointer to the bit position in the byte + * pointed by *ppBitStream. Valid within 0 + * to 7. + * [in] codeWord Code word that need to be inserted in to the + * bitstream + * [in] codeLength Length of the code word valid range 1...32 + * + * [out] ppBitStream *ppBitStream is updated after the block is encoded, + * so that it points to the current byte in the bit + * stream buffer. + * [out] pBitOffset *pBitOffset is updated so that it points to the + * current bit position in the byte pointed by + * *ppBitStream. + * + * Return Value: + * Standard OMX_RESULT result. See enumeration for possible result codes. + * + */ + +OMXResult armPackBits ( + OMX_U8 **ppBitStream, + OMX_INT *pOffset, + OMX_U32 codeWord, + OMX_INT codeLength +) +{ + OMX_U8 *pBitStream = *ppBitStream; + OMX_INT Offset = *pOffset; + OMX_U32 Value; + + /* checking argument validity */ + armRetArgErrIf(Offset < 0, OMX_Sts_BadArgErr); + armRetArgErrIf(Offset > 7, OMX_Sts_BadArgErr); + armRetArgErrIf(codeLength < 1, OMX_Sts_BadArgErr); + armRetArgErrIf(codeLength > 32, OMX_Sts_BadArgErr); + + /* Prepare the first byte */ + codeWord = codeWord << (32-codeLength); + Value = (pBitStream[0] >> (8-Offset)) << (8-Offset); + Value = Value | (codeWord >> (24+Offset)); + + /* Write out whole bytes */ + while (8-Offset <= codeLength) + { + *pBitStream++ = (OMX_U8)Value; + codeWord = codeWord << (8-Offset); + codeLength = codeLength - (8-Offset); + Offset = 0; + Value = codeWord >> 24; + } + + /* Write out final partial byte */ + *pBitStream = (OMX_U8)Value; + *ppBitStream = pBitStream; + *pOffset = Offset + codeLength; + + return OMX_Sts_NoErr; +} + +/*************************************** + * Variable bit length Encode + ***************************************/ + +/** + * Function: armPackVLC32 + * + * Description: + * Pack a VLC code word into the bitstream + * + * Remarks: + * + * Parameters: + * [in] ppBitStream pointer to the pointer to the current byte + * in the bit stream. + * [in] pBitOffset pointer to the bit position in the byte + * pointed by *ppBitStream. Valid within 0 + * to 7. + * [in] code VLC code word that need to be inserted in to the + * bitstream + * + * [out] ppBitStream *ppBitStream is updated after the block is encoded, + * so that it points to the current byte in the bit + * stream buffer. + * [out] pBitOffset *pBitOffset is updated so that it points to the + * current bit position in the byte pointed by + * *ppBitStream. + * + * Return Value: + * Standard OMX_RESULT result. See enumeration for possible result codes. + * + */ + +OMXResult armPackVLC32 ( + OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + ARM_VLC32 code +) +{ + return (armPackBits(ppBitStream, pBitOffset, code.codeWord, code.codeLen)); +} + +/*End of File*/ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/api/armVC.h b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/api/armVC.h new file mode 100644 index 0000000000000000000000000000000000000000..7fa7716740b9122ae10b1c8f5f1e5ed2c8aac4c9 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/api/armVC.h @@ -0,0 +1,1153 @@ +/** + * + * File Name: armVC.h + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * File: armVideo.h + * Brief: Declares API's/Basic Data types used across the OpenMAX Video domain + * + */ + + +#ifndef _armVideo_H_ +#define _armVideo_H_ + +#include "omxVC.h" +#include "armCOMM_Bitstream.h" + +/** + * ARM specific state structure to hold Motion Estimation information. + */ + +struct m4p2_MESpec +{ + OMXVCM4P2MEParams MEParams; + OMXVCM4P2MEMode MEMode; +}; + +struct m4p10_MESpec +{ + OMXVCM4P10MEParams MEParams; + OMXVCM4P10MEMode MEMode; +}; + +typedef struct m4p2_MESpec ARMVCM4P2_MESpec; +typedef struct m4p10_MESpec ARMVCM4P10_MESpec; + +/** + * Function: armVCM4P2_CompareMV + * + * Description: + * Performs comparision of motion vectors and SAD's to decide the + * best MV and SAD + * + * Remarks: + * + * Parameters: + * [in] mvX x coordinate of the candidate motion vector + * [in] mvY y coordinate of the candidate motion vector + * [in] candSAD Candidate SAD + * [in] bestMVX x coordinate of the best motion vector + * [in] bestMVY y coordinate of the best motion vector + * [in] bestSAD best SAD + * + * Return Value: + * OMX_INT -- 1 to indicate that the current sad is the best + * 0 to indicate that it is NOT the best SAD + */ + +OMX_INT armVCM4P2_CompareMV ( + OMX_S16 mvX, + OMX_S16 mvY, + OMX_INT candSAD, + OMX_S16 bestMVX, + OMX_S16 bestMVY, + OMX_INT bestSAD); + +/** + * Function: armVCM4P2_ACDCPredict + * + * Description: + * Performs adaptive DC/AC coefficient prediction for an intra block. Prior + * to the function call, prediction direction (predDir) should be selected + * as specified in subclause 7.4.3.1 of ISO/IEC 14496-2. + * + * Remarks: + * + * Parameters: + * [in] pSrcDst pointer to the coefficient buffer which contains + * the quantized coefficient residuals (PQF) of the + * current block + * [in] pPredBufRow pointer to the coefficient row buffer + * [in] pPredBufCol pointer to the coefficient column buffer + * [in] curQP quantization parameter of the current block. curQP + * may equal to predQP especially when the current + * block and the predictor block are in the same + * macroblock. + * [in] predQP quantization parameter of the predictor block + * [in] predDir indicates the prediction direction which takes one + * of the following values: + * OMX_VIDEO_HORIZONTAL predict horizontally + * OMX_VIDEO_VERTICAL predict vertically + * [in] ACPredFlag a flag indicating if AC prediction should be + * performed. It is equal to ac_pred_flag in the bit + * stream syntax of MPEG-4 + * [in] videoComp video component type (luminance, chrominance or + * alpha) of the current block + * [in] flag This flag defines the if one wants to use this functions to + * calculate PQF (set 1, prediction) or QF (set 0, reconstruction) + * [out] pPreACPredict pointer to the predicted coefficients buffer. + * Filled ONLY if it is not NULL + * [out] pSrcDst pointer to the coefficient buffer which contains + * the quantized coefficients (QF) of the current + * block + * [out] pPredBufRow pointer to the updated coefficient row buffer + * [out] pPredBufCol pointer to the updated coefficient column buffer + * [out] pSumErr pointer to the updated sum of the difference + * between predicted and unpredicted coefficients + * If this is NULL, do not update + * + * Return Value: + * Standard OMXResult result. See enumeration for possible result codes. + * + */ + +OMXResult armVCM4P2_ACDCPredict( + OMX_S16 * pSrcDst, + OMX_S16 * pPreACPredict, + OMX_S16 * pPredBufRow, + OMX_S16 * pPredBufCol, + OMX_INT curQP, + OMX_INT predQP, + OMX_INT predDir, + OMX_INT ACPredFlag, + OMXVCM4P2VideoComponent videoComp, + OMX_U8 flag, + OMX_INT *pSumErr +); + +/** + * Function: armVCM4P2_SetPredDir + * + * Description: + * Performs detecting the prediction direction + * + * Remarks: + * + * Parameters: + * [in] blockIndex block index indicating the component type and + * position as defined in subclause 6.1.3.8, of ISO/IEC + * 14496-2. Furthermore, indexes 6 to 9 indicate the + * alpha blocks spatially corresponding to luminance + * blocks 0 to 3 in the same macroblock. + * [in] pCoefBufRow pointer to the coefficient row buffer + * [in] pQpBuf pointer to the quantization parameter buffer + * [out] predQP quantization parameter of the predictor block + * [out] predDir indicates the prediction direction which takes one + * of the following values: + * OMX_VIDEO_HORIZONTAL predict horizontally + * OMX_VIDEO_VERTICAL predict vertically + * + * Return Value: + * Standard OMXResult result. See enumeration for possible result codes. + * + */ + +OMXResult armVCM4P2_SetPredDir( + OMX_INT blockIndex, + OMX_S16 *pCoefBufRow, + OMX_S16 *pCoefBufCol, + OMX_INT *predDir, + OMX_INT *predQP, + const OMX_U8 *pQpBuf +); + +/** + * Function: armVCM4P2_EncodeVLCZigzag_Intra + * + * Description: + * Performs zigzag scanning and VLC encoding for one intra block. + * + * Remarks: + * + * Parameters: + * [in] ppBitStream pointer to the pointer to the current byte in + * the bit stream + * [in] pBitOffset pointer to the bit position in the byte pointed + * by *ppBitStream. Valid within 0 to 7. + * [in] pQDctBlkCoef pointer to the quantized DCT coefficient + * [in] predDir AC prediction direction, which is used to decide + * the zigzag scan pattern. This takes one of the + * following values: + * OMX_VIDEO_NONE AC prediction not used. + * Performs classical zigzag + * scan. + * OMX_VIDEO_HORIZONTAL Horizontal prediction. + * Performs alternate-vertical + * zigzag scan. + * OMX_VIDEO_VERTICAL Vertical prediction. + * Performs alternate-horizontal + * zigzag scan. + * [in] pattern block pattern which is used to decide whether + * this block is encoded + * [in] start start indicates whether the encoding begins with 0th element + * or 1st. + * [out] ppBitStream *ppBitStream is updated after the block is encoded, + * so that it points to the current byte in the bit + * stream buffer. + * [out] pBitOffset *pBitOffset is updated so that it points to the + * current bit position in the byte pointed by + * *ppBitStream. + * + * Return Value: + * Standard OMXResult result. See enumeration for possible result codes. + * + */ + +OMXResult armVCM4P2_EncodeVLCZigzag_Intra( + OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + const OMX_S16 *pQDctBlkCoef, + OMX_U8 predDir, + OMX_U8 pattern, + OMX_INT shortVideoHeader, + OMX_U8 start +); + +/** + * Function: armVCM4P2_DecodeVLCZigzag_Intra + * + * Description: + * Performs VLC decoding and inverse zigzag scan for one intra coded block. + * + * Remarks: + * + * Parameters: + * [in] ppBitStream pointer to the pointer to the current byte in + * the bitstream buffer + * [in] pBitOffset pointer to the bit position in the byte pointed + * to by *ppBitStream. *pBitOffset is valid within + * [0-7]. + * [in] predDir AC prediction direction which is used to decide + * the zigzag scan pattern. It takes one of the + * following values: + * OMX_VIDEO_NONE AC prediction not used; + * perform classical zigzag scan; + * OMX_VIDEO_HORIZONTAL Horizontal prediction; + * perform alternate-vertical + * zigzag scan; + * OMX_VIDEO_VERTICAL Vertical prediction; + * thus perform + * alternate-horizontal + * zigzag scan. + * [in] videoComp video component type (luminance, chrominance or + * alpha) of the current block + * [in] shortVideoHeader binary flag indicating presence of short_video_header; escape modes 0-3 are used if shortVideoHeader==0, + * and escape mode 4 is used when shortVideoHeader==1. + * [in] start start indicates whether the encoding begins with 0th element + * or 1st. + * [out] ppBitStream *ppBitStream is updated after the block is + * decoded, so that it points to the current byte + * in the bit stream buffer + * [out] pBitOffset *pBitOffset is updated so that it points to the + * current bit position in the byte pointed by + * *ppBitStream + * [out] pDst pointer to the coefficient buffer of current + * block. Should be 32-bit aligned + * + * Return Value: + * Standard OMXResult result. See enumeration for possible result codes. + * + */ + +OMXResult armVCM4P2_DecodeVLCZigzag_Intra( + const OMX_U8 ** ppBitStream, + OMX_INT * pBitOffset, + OMX_S16 * pDst, + OMX_U8 predDir, + OMX_INT shortVideoHeader, + OMX_U8 start +); + +/** + * Function: armVCM4P2_FillVLDBuffer + * + * Description: + * Performs filling of the coefficient buffer according to the run, level + * and sign, also updates the index + * + * Parameters: + * [in] storeRun Stored Run value (count of zeros) + * [in] storeLevel Stored Level value (non-zero value) + * [in] sign Flag indicating the sign of level + * [in] last status of the last flag + * [in] pIndex pointer to coefficient index in 8x8 matrix + * [out] pIndex pointer to updated coefficient index in 8x8 + * matrix + * [in] pZigzagTable pointer to the zigzag tables + * [out] pDst pointer to the coefficient buffer of current + * block. Should be 32-bit aligned + * Return Value: + * Standard OMXResult result. See enumeration for possible result codes. + * + */ + +OMXResult armVCM4P2_FillVLDBuffer( + OMX_U32 storeRun, + OMX_S16 * pDst, + OMX_S16 storeLevel, + OMX_U8 sign, + OMX_U8 last, + OMX_U8 * index, + const OMX_U8 * pZigzagTable +); + +/** + * Function: armVCM4P2_GetVLCBits + * + * Description: + * Performs escape mode decision based on the run, run+, level, level+ and + * last combinations. + * + * Remarks: + * + * Parameters: + * [in] ppBitStream pointer to the pointer to the current byte in + * the bit stream + * [in] pBitOffset pointer to the bit position in the byte pointed + * by *ppBitStream. Valid within 0 to 7 + * [in] shortVideoHeader binary flag indicating presence of short_video_header; escape modes 0-3 are used if shortVideoHeader==0, + * and escape mode 4 is used when shortVideoHeader==1. + * [in] start start indicates whether the encoding begins with + * 0th element or 1st. + * [in/out] pLast pointer to last status flag + * [in] runBeginSingleLevelEntriesL0 The run value from which level + * will be equal to 1: last == 0 + * [in] IndexBeginSingleLevelEntriesL0 Array index in the VLC table + * pointing to the + * runBeginSingleLevelEntriesL0 + * [in] runBeginSingleLevelEntriesL1 The run value from which level + * will be equal to 1: last == 1 + * [in] IndexBeginSingleLevelEntriesL1 Array index in the VLC table + * pointing to the + * runBeginSingleLevelEntriesL0 + * [in] pRunIndexTableL0 Run Index table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 0 + * [in] pVlcTableL0 VLC table for last == 0 + * [in] pRunIndexTableL1 Run Index table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 1 + * [in] pVlcTableL1 VLC table for last == 1 + * [in] pLMAXTableL0 Level MAX table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 0 + * [in] pLMAXTableL1 Level MAX table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 1 + * [in] pRMAXTableL0 Run MAX table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 0 + * [in] pRMAXTableL1 Run MAX table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 1 + * [out]pDst pointer to the coefficient buffer of current + * block. Should be 32-bit aligned + * + * Return Value: + * Standard OMXResult result. See enumeration for possible result codes. + * + */ + +OMXResult armVCM4P2_GetVLCBits ( + const OMX_U8 **ppBitStream, + OMX_INT * pBitOffset, + OMX_S16 * pDst, + OMX_INT shortVideoHeader, + OMX_U8 start, + OMX_U8 * pLast, + OMX_U8 runBeginSingleLevelEntriesL0, + OMX_U8 maxIndexForMultipleEntriesL0, + OMX_U8 maxRunForMultipleEntriesL1, + OMX_U8 maxIndexForMultipleEntriesL1, + const OMX_U8 * pRunIndexTableL0, + const ARM_VLC32 *pVlcTableL0, + const OMX_U8 * pRunIndexTableL1, + const ARM_VLC32 *pVlcTableL1, + const OMX_U8 * pLMAXTableL0, + const OMX_U8 * pLMAXTableL1, + const OMX_U8 * pRMAXTableL0, + const OMX_U8 * pRMAXTableL1, + const OMX_U8 * pZigzagTable +); + +/** + * Function: armVCM4P2_PutVLCBits + * + * Description: + * Checks the type of Escape Mode and put encoded bits for + * quantized DCT coefficients. + * + * Remarks: + * + * Parameters: + * [in] ppBitStream pointer to the pointer to the current byte in + * the bit stream + * [in] pBitOffset pointer to the bit position in the byte pointed + * by *ppBitStream. Valid within 0 to 7 + * [in] shortVideoHeader binary flag indicating presence of short_video_header; escape modes 0-3 are used if shortVideoHeader==0, + * and escape mode 4 is used when shortVideoHeader==1. + * [in] start start indicates whether the encoding begins with + * 0th element or 1st. + * [in] maxStoreRunL0 Max store possible (considering last and inter/intra) + * for last = 0 + * [in] maxStoreRunL1 Max store possible (considering last and inter/intra) + * for last = 1 + * [in] maxRunForMultipleEntriesL0 + * The run value after which level + * will be equal to 1: + * (considering last and inter/intra status) for last = 0 + * [in] maxRunForMultipleEntriesL1 + * The run value after which level + * will be equal to 1: + * (considering last and inter/intra status) for last = 1 + * [in] pRunIndexTableL0 Run Index table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 0 + * [in] pVlcTableL0 VLC table for last == 0 + * [in] pRunIndexTableL1 Run Index table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 1 + * [in] pVlcTableL1 VLC table for last == 1 + * [in] pLMAXTableL0 Level MAX table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 0 + * [in] pLMAXTableL1 Level MAX table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 1 + * [in] pRMAXTableL0 Run MAX table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 0 + * [in] pRMAXTableL1 Run MAX table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 1 + * [out] pQDctBlkCoef pointer to the quantized DCT coefficient + * [out] ppBitStream *ppBitStream is updated after the block is encoded + * so that it points to the current byte in the bit + * stream buffer. + * [out] pBitOffset *pBitOffset is updated so that it points to the + * current bit position in the byte pointed by + * *ppBitStream. + * + * Return Value: + * Standard OMXResult result. See enumeration for possible result codes. + * + */ + + +OMXResult armVCM4P2_PutVLCBits ( + OMX_U8 **ppBitStream, + OMX_INT * pBitOffset, + const OMX_S16 *pQDctBlkCoef, + OMX_INT shortVideoHeader, + OMX_U8 start, + OMX_U8 maxStoreRunL0, + OMX_U8 maxStoreRunL1, + OMX_U8 maxRunForMultipleEntriesL0, + OMX_U8 maxRunForMultipleEntriesL1, + const OMX_U8 * pRunIndexTableL0, + const ARM_VLC32 *pVlcTableL0, + const OMX_U8 * pRunIndexTableL1, + const ARM_VLC32 *pVlcTableL1, + const OMX_U8 * pLMAXTableL0, + const OMX_U8 * pLMAXTableL1, + const OMX_U8 * pRMAXTableL0, + const OMX_U8 * pRMAXTableL1, + const OMX_U8 * pZigzagTable +); +/** + * Function: armVCM4P2_FillVLCBuffer + * + * Description: + * Performs calculating the VLC bits depending on the escape type and insert + * the same in the bitstream + * + * Remarks: + * + * Parameters: + * [in] ppBitStream pointer to the pointer to the current byte in + * the bit stream + * [in] pBitOffset pointer to the bit position in the byte pointed + * by *ppBitStream. Valid within 0 to 7 + * [in] run Run value (count of zeros) to be encoded + * [in] level Level value (non-zero value) to be encoded + * [in] runPlus Calculated as runPlus = run - (RMAX + 1) + * [in] levelPlus Calculated as + * levelPlus = sign(level)*[abs(level) - LMAX] + * [in] fMode Flag indicating the escape modes + * [in] last status of the last flag + * [in] maxRunForMultipleEntries + * The run value after which level will be equal to 1: + * (considering last and inter/intra status) + * [in] pRunIndexTable Run Index table defined in + * armVCM4P2_Huff_tables_VLC.h + * [in] pVlcTable VLC table defined in armVCM4P2_Huff_tables_VLC.h + * [out] ppBitStream *ppBitStream is updated after the block is encoded + * so that it points to the current byte in the bit + * stream buffer. + * [out] pBitOffset *pBitOffset is updated so that it points to the + * current bit position in the byte pointed by + * *ppBitStream. + * + * Return Value: + * Standard OMXResult result. See enumeration for possible result codes. + * + */ + +OMXResult armVCM4P2_FillVLCBuffer ( + OMX_U8 **ppBitStream, + OMX_INT * pBitOffset, + OMX_U32 run, + OMX_S16 level, + OMX_U32 runPlus, + OMX_S16 levelPlus, + OMX_U8 fMode, + OMX_U8 last, + OMX_U8 maxRunForMultipleEntries, + const OMX_U8 *pRunIndexTable, + const ARM_VLC32 *pVlcTable +); + +/** + * Function: armVCM4P2_CheckVLCEscapeMode + * + * Description: + * Performs escape mode decision based on the run, run+, level, level+ and + * last combinations. + * + * Remarks: + * + * Parameters: + * [in] run Run value (count of zeros) to be encoded + * [in] level Level value (non-zero value) to be encoded + * [in] runPlus Calculated as runPlus = run - (RMAX + 1) + * [in] levelPlus Calculated as + * levelPlus = sign(level)*[abs(level) - LMAX] + * [in] maxStoreRun Max store possible (considering last and inter/intra) + * [in] maxRunForMultipleEntries + * The run value after which level + * will be equal to 1: + * (considering last and inter/intra status) + * [in] shortVideoHeader binary flag indicating presence of short_video_header; escape modes 0-3 are used if shortVideoHeader==0, + * and escape mode 4 is used when shortVideoHeader==1. + * [in] pRunIndexTable Run Index table defined in + * armVCM4P2_Huff_Tables_VLC.c + * (considering last and inter/intra status) + * + * + * Return Value: + * Returns an Escape mode which can take values from 0 to 3 + * 0 --> no escape mode, 1 --> escape type 1, + * 1 --> escape type 2, 3 --> escape type 3, check section 7.4.1.3 + * in the MPEG ISO standard. + * + */ + +OMX_U8 armVCM4P2_CheckVLCEscapeMode( + OMX_U32 run, + OMX_U32 runPlus, + OMX_S16 level, + OMX_S16 levelPlus, + OMX_U8 maxStoreRun, + OMX_U8 maxRunForMultipleEntries, + OMX_INT shortVideoHeader, + const OMX_U8 *pRunIndexTable +); + + +/** + * Function: armVCM4P2_BlockMatch_Integer + * + * Description: + * Performs a 16x16 block search; estimates motion vector and associated minimum SAD. + * Both the input and output motion vectors are represented using half-pixel units, and + * therefore a shift left or right by 1 bit may be required, respectively, to match the + * input or output MVs with other functions that either generate output MVs or expect + * input MVs represented using integer pixel units. + * + * Remarks: + * + * Parameters: + * [in] pSrcRefBuf pointer to the reference Y plane; points to the reference MB that + * corresponds to the location of the current macroblock in the current + * plane. + * [in] refWidth width of the reference plane + * [in] pRefRect pointer to the valid rectangular in reference plane. Relative to image origin. + * It's not limited to the image boundary, but depended on the padding. For example, + * if you pad 4 pixels outside the image border, then the value for left border + * can be -4 + * [in] pSrcCurrBuf pointer to the current macroblock extracted from original plane (linear array, + * 256 entries); must be aligned on an 8-byte boundary. + * [in] pCurrPointPos position of the current macroblock in the current plane + * [in] pSrcPreMV pointer to predicted motion vector; NULL indicates no predicted MV + * [in] pSrcPreSAD pointer to SAD associated with the predicted MV (referenced by pSrcPreMV) + * [in] searchRange search range for 16X16 integer block,the units of it is full pixel,the search range + * is the same in all directions.It is in inclusive of the boundary and specified in + * terms of integer pixel units. + * [in] pMESpec vendor-specific motion estimation specification structure; must have been allocated + * and then initialized using omxVCM4P2_MEInit prior to calling the block matching + * function. + * [in] BlockSize MacroBlock Size i.e either 16x16 or 8x8. + * [out] pDstMV pointer to estimated MV + * [out] pDstSAD pointer to minimum SAD + * + * Return Value: + * OMX_Sts_NoErr ¨C no error. + * OMX_Sts_BadArgErr ¨C bad arguments + * + */ + +OMXResult armVCM4P2_BlockMatch_Integer( + const OMX_U8 *pSrcRefBuf, + OMX_INT refWidth, + const OMXRect *pRefRect, + const OMX_U8 *pSrcCurrBuf, + const OMXVCM4P2Coordinate *pCurrPointPos, + const OMXVCMotionVector *pSrcPreMV, + const OMX_INT *pSrcPreSAD, + void *pMESpec, + OMXVCMotionVector *pDstMV, + OMX_INT *pDstSAD, + OMX_U8 BlockSize +); + +/** + * Function: armVCM4P2_BlockMatch_Half + * + * Description: + * Performs a 16x16 block match with half-pixel resolution. Returns the estimated + * motion vector and associated minimum SAD. This function estimates the half-pixel + * motion vector by interpolating the integer resolution motion vector referenced + * by the input parameter pSrcDstMV, i.e., the initial integer MV is generated + * externally. The input parameters pSrcRefBuf and pSearchPointRefPos should be + * shifted by the winning MV of 16x16 integer search prior to calling BlockMatch_Half_16x16. + * The function BlockMatch_Integer_16x16 may be used for integer motion estimation. + * + * Remarks: + * + * Parameters: + * [in] pSrcRefBuf pointer to the reference Y plane; points to the reference MB + * that corresponds to the location of the current macroblock in + * the current plane. + * [in] refWidth width of the reference plane + * [in] pRefRect reference plane valid region rectangle + * [in] pSrcCurrBuf pointer to the current macroblock extracted from original plane + * (linear array, 256 entries); must be aligned on an 8-byte boundary. + * [in] pSearchPointRefPos position of the starting point for half pixel search (specified + * in terms of integer pixel units) in the reference plane. + * [in] rndVal rounding control bit for half pixel motion estimation; + * 0=rounding control disabled; 1=rounding control enabled + * [in] pSrcDstMV pointer to the initial MV estimate; typically generated during a prior + * 16X16 integer search and its unit is half pixel. + * [in] BlockSize MacroBlock Size i.e either 16x16 or 8x8. + * [out]pSrcDstMV pointer to estimated MV + * [out]pDstSAD pointer to minimum SAD + * + * Return Value: + * OMX_Sts_NoErr ¨C no error + * OMX_Sts_BadArgErr ¨C bad arguments + * + */ + +OMXResult armVCM4P2_BlockMatch_Half( + const OMX_U8 *pSrcRefBuf, + OMX_INT refWidth, + const OMXRect *pRefRect, + const OMX_U8 *pSrcCurrBuf, + const OMXVCM4P2Coordinate *pSearchPointRefPos, + OMX_INT rndVal, + OMXVCMotionVector *pSrcDstMV, + OMX_INT *pDstSAD, + OMX_U8 BlockSize +); +/** + * Function: armVCM4P2_PadMV + * + * Description: + * Performs motion vector padding for a macroblock. + * + * Remarks: + * + * Parameters: + * [in] pSrcDstMV pointer to motion vector buffer of the current + * macroblock + * [in] pTransp pointer to transparent status buffer of the + * current macroblock + * [out] pSrcDstMV pointer to motion vector buffer in which the + * motion vectors have been padded + * Return Value: + * Standard OMXResult result. See enumeration for possible result codes. + * + */ + +OMXResult armVCM4P2_PadMV( + OMXVCMotionVector * pSrcDstMV, + OMX_U8 * pTransp +); + +/* + * H.264 Specific Declarations + */ +/* Defines */ +#define ARM_M4P10_Q_OFFSET (15) + + +/* Dequant tables */ + +extern const OMX_U8 armVCM4P10_PosToVCol4x4[16]; +extern const OMX_U8 armVCM4P10_PosToVCol2x2[4]; +extern const OMX_U8 armVCM4P10_VMatrix[6][3]; +extern const OMX_U32 armVCM4P10_MFMatrix[6][3]; + + +/* + * Description: + * This function perform the work required by the OpenMAX + * DecodeCoeffsToPair function and DecodeChromaDCCoeffsToPair. + * Since most of the code is common we share it here. + * + * Parameters: + * [in] ppBitStream Double pointer to current byte in bit stream buffer + * [in] pOffset Pointer to current bit position in the byte pointed + * to by *ppBitStream + * [in] sMaxNumCoeff Maximum number of non-zero coefficients in current + * block (4,15 or 16) + * [in] nTable Table number (0 to 4) according to the five columns + * of Table 9-5 in the H.264 spec + * [out] ppBitStream *ppBitStream is updated after each block is decoded + * [out] pOffset *pOffset is updated after each block is decoded + * [out] pNumCoeff Pointer to the number of nonzero coefficients in + * this block + * [out] ppPosCoefbuf Double pointer to destination residual + * coefficient-position pair buffer + * Return Value: + * Standard omxError result. See enumeration for possible result codes. + + */ + +OMXResult armVCM4P10_DecodeCoeffsToPair( + const OMX_U8** ppBitStream, + OMX_S32* pOffset, + OMX_U8* pNumCoeff, + OMX_U8**ppPosCoefbuf, + OMX_INT nTable, + OMX_INT sMaxNumCoeff + ); + +/* + * Description: + * Perform DC style intra prediction, averaging upper and left block + * + * Parameters: + * [in] pSrcLeft Pointer to the buffer of 16 left coefficients: + * p[x, y] (x = -1, y = 0..3) + * [in] pSrcAbove Pointer to the buffer of 16 above coefficients: + * p[x,y] (x = 0..3, y = -1) + * [in] leftStep Step of left coefficient buffer + * [in] dstStep Step of the destination buffer + * [in] availability Neighboring 16x16 MB availability flag + * [out] pDst Pointer to the destination buffer + * + * Return Value: + * None + */ + +void armVCM4P10_PredictIntraDC4x4( + const OMX_U8* pSrcLeft, + const OMX_U8 *pSrcAbove, + OMX_U8* pDst, + OMX_INT leftStep, + OMX_INT dstStep, + OMX_S32 availability +); + +/* + * Description + * Unpack a 4x4 block of coefficient-residual pair values + * + * Parameters: + * [in] ppSrc Double pointer to residual coefficient-position pair + * buffer output by CALVC decoding + * [out] ppSrc *ppSrc is updated to the start of next non empty block + * [out] pDst Pointer to unpacked 4x4 block + */ + +void armVCM4P10_UnpackBlock4x4( + const OMX_U8 **ppSrc, + OMX_S16* pDst +); + +/* + * Description + * Unpack a 2x2 block of coefficient-residual pair values + * + * Parameters: + * [in] ppSrc Double pointer to residual coefficient-position pair + * buffer output by CALVC decoding + * [out] ppSrc *ppSrc is updated to the start of next non empty block + * [out] pDst Pointer to unpacked 4x4 block + */ + +void armVCM4P10_UnpackBlock2x2( + const OMX_U8 **ppSrc, + OMX_S16* pDst +); + +/* + * Description + * Deblock one boundary pixel + * + * Parameters: + * [in] pQ0 Pointer to pixel q0 + * [in] Step Step between pixels q0 and q1 + * [in] tC0 Edge threshold value + * [in] alpha alpha threshold value + * [in] beta beta threshold value + * [in] bS deblocking strength + * [in] ChromaFlag True for chroma blocks + * [out] pQ0 Deblocked pixels + * + */ + +void armVCM4P10_DeBlockPixel( + OMX_U8 *pQ0, /* pointer to the pixel q0 */ + int Step, /* step between pixels q0 and q1 */ + int tC0, /* edge threshold value */ + int alpha, /* alpha */ + int beta, /* beta */ + int bS, /* deblocking strength */ + int ChromaFlag +); + +/** + * Function: armVCM4P10_InterpolateHalfHor_Luma + * + * Description: + * This function performs interpolation for horizontal 1/2-pel positions + * + * Remarks: + * + * [in] pSrc Pointer to top-left corner of block used to interpolate + in the reconstructed frame plane + * [in] iSrcStep Step of the source buffer. + * [in] iDstStep Step of the destination(interpolation) buffer. + * [in] iWidth Width of the current block + * [in] iHeight Height of the current block + * [out] pDst Pointer to the interpolation buffer of the 1/2-pel + * + * Return Value: + * Standard OMXResult value. + * + */ + +OMXResult armVCM4P10_InterpolateHalfHor_Luma( + const OMX_U8* pSrc, + OMX_U32 iSrcStep, + OMX_U8* pDst, + OMX_U32 iDstStep, + OMX_U32 iWidth, + OMX_U32 iHeight +); + +/** + * Function: armVCM4P10_InterpolateHalfVer_Luma + * + * Description: + * This function performs interpolation for vertical 1/2-pel positions + * around a full-pel position. + * + * Remarks: + * + * [in] pSrc Pointer to top-left corner of block used to interpolate + * in the reconstructed frame plane + * [in] iSrcStep Step of the source buffer. + * [in] iDstStep Step of the destination(interpolation) buffer. + * [in] iWidth Width of the current block + * [in] iHeight Height of the current block + * [out] pDst Pointer to the interpolation buffer of the 1/2-pel + * + * Return Value: + * Standard OMXResult value. + * + */ + +OMXResult armVCM4P10_InterpolateHalfVer_Luma( + const OMX_U8* pSrc, + OMX_U32 iSrcStep, + OMX_U8* pDst, + OMX_U32 iDstStep, + OMX_U32 iWidth, + OMX_U32 iHeight +); + +/** + * Function: armVCM4P10_InterpolateHalfDiag_Luma + * + * Description: + * This function performs interpolation for (1/2, 1/2) positions + * around a full-pel position. + * + * Remarks: + * + * [in] pSrc Pointer to top-left corner of block used to interpolate + * in the reconstructed frame plane + * [in] iSrcStep Step of the source buffer. + * [in] iDstStep Step of the destination(interpolation) buffer. + * [in] iWidth Width of the current block + * [in] iHeight Height of the current block + * [out] pDst Pointer to the interpolation buffer of the (1/2,1/2)-pel + * + * Return Value: + * Standard OMXResult value. + * + */ + +OMXResult armVCM4P10_InterpolateHalfDiag_Luma( + const OMX_U8* pSrc, + OMX_U32 iSrcStep, + OMX_U8* pDst, + OMX_U32 iDstStep, + OMX_U32 iWidth, + OMX_U32 iHeight +); + +/* + * Description: + * Transform Residual 4x4 Coefficients + * + * Parameters: + * [in] pSrc Source 4x4 block + * [out] pDst Destination 4x4 block + * + */ + +void armVCM4P10_TransformResidual4x4(OMX_S16* pDst, OMX_S16 *pSrc); + +/* + * Description: + * Forward Transform Residual 4x4 Coefficients + * + * Parameters: + * [in] pSrc Source 4x4 block + * [out] pDst Destination 4x4 block + * + */ + +void armVCM4P10_FwdTransformResidual4x4(OMX_S16* pDst, OMX_S16 *pSrc); + +OMX_INT armVCM4P10_CompareMotionCostToMV ( + OMX_S16 mvX, + OMX_S16 mvY, + OMXVCMotionVector diffMV, + OMX_INT candSAD, + OMXVCMotionVector *bestMV, + OMX_U32 nLamda, + OMX_S32 *pBestCost); + +/** + * Function: armVCCOMM_SAD + * + * Description: + * This function calculate the SAD for NxM blocks. + * + * Remarks: + * + * [in] pSrcOrg Pointer to the original block + * [in] iStepOrg Step of the original block buffer + * [in] pSrcRef Pointer to the reference block + * [in] iStepRef Step of the reference block buffer + * [in] iHeight Height of the block + * [in] iWidth Width of the block + * [out] pDstSAD Pointer of result SAD + * + * Return Value: + * Standard OMXResult value. + * + */ +OMXResult armVCCOMM_SAD( + const OMX_U8* pSrcOrg, + OMX_U32 iStepOrg, + const OMX_U8* pSrcRef, + OMX_U32 iStepRef, + OMX_S32* pDstSAD, + OMX_U32 iHeight, + OMX_U32 iWidth); + +/** + * Function: armVCCOMM_Average + * + * Description: + * This function calculates the average of two blocks and stores the result. + * + * Remarks: + * + * [in] pPred0 Pointer to the top-left corner of reference block 0 + * [in] pPred1 Pointer to the top-left corner of reference block 1 + * [in] iPredStep0 Step of reference block 0 + * [in] iPredStep1 Step of reference block 1 + * [in] iDstStep Step of the destination buffer + * [in] iWidth Width of the blocks + * [in] iHeight Height of the blocks + * [out] pDstPred Pointer to the destination buffer + * + * Return Value: + * Standard OMXResult value. + * + */ + OMXResult armVCCOMM_Average ( + const OMX_U8* pPred0, + const OMX_U8* pPred1, + OMX_U32 iPredStep0, + OMX_U32 iPredStep1, + OMX_U8* pDstPred, + OMX_U32 iDstStep, + OMX_U32 iWidth, + OMX_U32 iHeight +); + +/** + * Function: armVCM4P10_SADQuar + * + * Description: + * This function calculates the SAD between one block (pSrc) and the + * average of the other two (pSrcRef0 and pSrcRef1) + * + * Remarks: + * + * [in] pSrc Pointer to the original block + * [in] pSrcRef0 Pointer to reference block 0 + * [in] pSrcRef1 Pointer to reference block 1 + * [in] iSrcStep Step of the original block buffer + * [in] iRefStep0 Step of reference block 0 + * [in] iRefStep1 Step of reference block 1 + * [in] iHeight Height of the block + * [in] iWidth Width of the block + * [out] pDstSAD Pointer of result SAD + * + * Return Value: + * Standard OMXResult value. + * + */ +OMXResult armVCM4P10_SADQuar( + const OMX_U8* pSrc, + const OMX_U8* pSrcRef0, + const OMX_U8* pSrcRef1, + OMX_U32 iSrcStep, + OMX_U32 iRefStep0, + OMX_U32 iRefStep1, + OMX_U32* pDstSAD, + OMX_U32 iHeight, + OMX_U32 iWidth +); + +/** + * Function: armVCM4P10_Interpolate_Chroma + * + * Description: + * This function performs interpolation for chroma components. + * + * Remarks: + * + * [in] pSrc Pointer to top-left corner of block used to + * interpolate in the reconstructed frame plane + * [in] iSrcStep Step of the source buffer. + * [in] iDstStep Step of the destination(interpolation) buffer. + * [in] iWidth Width of the current block + * [in] iHeight Height of the current block + * [in] dx Fractional part of horizontal motion vector + * component in 1/8 pixel unit (0~7) + * [in] dy Fractional part of vertical motion vector + * component in 1/8 pixel unit (0~7) + * [out] pDst Pointer to the interpolation buffer + * + * Return Value: + * Standard OMXResult value. + * + */ + OMXResult armVCM4P10_Interpolate_Chroma( + OMX_U8 *pSrc, + OMX_U32 iSrcStep, + OMX_U8 *pDst, + OMX_U32 iDstStep, + OMX_U32 iWidth, + OMX_U32 iHeight, + OMX_U32 dx, + OMX_U32 dy +); + +/** + * Function: armVCM4P10_Interpolate_Luma + * + * Description: + * This function performs interpolation for luma components. + * + * Remarks: + * + * [in] pSrc Pointer to top-left corner of block used to + * interpolate in the reconstructed frame plane + * [in] iSrcStep Step of the source buffer. + * [in] iDstStep Step of the destination(interpolation) buffer. + * [in] iWidth Width of the current block + * [in] iHeight Height of the current block + * [in] dx Fractional part of horizontal motion vector + * component in 1/4 pixel unit (0~3) + * [in] dy Fractional part of vertical motion vector + * component in 1/4 pixel unit (0~3) + * [out] pDst Pointer to the interpolation buffer + * + * Return Value: + * Standard OMXResult value. + * + */ + + OMXResult armVCM4P10_Interpolate_Luma( + const OMX_U8 *pSrc, + OMX_U32 iSrcStep, + OMX_U8 *pDst, + OMX_U32 iDstStep, + OMX_U32 iWidth, + OMX_U32 iHeight, + OMX_U32 dx, + OMX_U32 dy +); + +/** + * Function: omxVCH264_DequantTransformACFromPair_U8_S16_C1_DLx + * + * Description: + * Reconstruct the 4x4 residual block from coefficient-position pair buffer, + * perform dequantisation and integer inverse transformation for 4x4 block of + * residuals and update the pair buffer pointer to next non-empty block. + * + * Remarks: + * + * Parameters: + * [in] ppSrc Double pointer to residual coefficient-position + * pair buffer output by CALVC decoding + * [in] pDC Pointer to the DC coefficient of this block, NULL + * if it doesn't exist + * [in] QP Quantization parameter + * [in] AC Flag indicating if at least one non-zero coefficient exists + * [out] pDst pointer to the reconstructed 4x4 block data + * + * Return Value: + * Standard omxError result. See enumeration for possible result codes. + * + */ + +OMXResult armVCM4P10_DequantTransformACFromPair_U8_S16_C1_DLx( + OMX_U8 **ppSrc, + OMX_S16 *pDst, + OMX_INT QP, + OMX_S16* pDC, + int AC +); + +#endif /*_armVideo_H_*/ + +/*End of File*/ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/api/omxVC.h b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/api/omxVC.h new file mode 100644 index 0000000000000000000000000000000000000000..7b3cc7289554a10744eacffc0d0af5ef39d61e8c --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/api/omxVC.h @@ -0,0 +1,4381 @@ +/** + * File: omxVC.h + * Brief: OpenMAX DL v1.0.2 - Video Coding library + * + * Copyright © 2005-2008 The Khronos Group Inc. All Rights Reserved. + * + * These materials are protected by copyright laws and contain material + * proprietary to the Khronos Group, Inc. You may use these materials + * for implementing Khronos specifications, without altering or removing + * any trademark, copyright or other notice from the specification. + * + * Khronos Group makes no, and expressly disclaims any, representations + * or warranties, express or implied, regarding these materials, including, + * without limitation, any implied warranties of merchantability or fitness + * for a particular purpose or non-infringement of any intellectual property. + * Khronos Group makes no, and expressly disclaims any, warranties, express + * or implied, regarding the correctness, accuracy, completeness, timeliness, + * and reliability of these materials. + * + * Under no circumstances will the Khronos Group, or any of its Promoters, + * Contributors or Members or their respective partners, officers, directors, + * employees, agents or representatives be liable for any damages, whether + * direct, indirect, special or consequential damages for lost revenues, + * lost profits, or otherwise, arising from or in connection with these + * materials. + * + * Khronos and OpenMAX are trademarks of the Khronos Group Inc. + * + */ + +/* *****************************************************************************************/ + +#ifndef _OMXVC_H_ +#define _OMXVC_H_ + +#include "omxtypes.h" + +#ifdef __cplusplus +extern "C" { +#endif + + +/* 6.1.1.1 Motion Vectors */ +/* In omxVC, motion vectors are represented as follows: */ + +typedef struct { + OMX_S16 dx; + OMX_S16 dy; +} OMXVCMotionVector; + + + +/** + * Function: omxVCCOMM_Average_8x (6.1.3.1.1) + * + * Description: + * This function calculates the average of two 8x4, 8x8, or 8x16 blocks. The + * result is rounded according to (a+b+1)/2. The block average function can + * be used in conjunction with half-pixel interpolation to obtain quarter + * pixel motion estimates, as described in [ISO14496-10], subclause 8.4.2.2.1. + * + * Input Arguments: + * + * pPred0 - Pointer to the top-left corner of reference block 0 + * pPred1 - Pointer to the top-left corner of reference block 1 + * iPredStep0 - Step of reference block 0 + * iPredStep1 - Step of reference block 1 + * iDstStep - Step of the destination buffer. + * iHeight - Height of the blocks + * + * Output Arguments: + * + * pDstPred - Pointer to the destination buffer. 8-byte aligned. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned under any of the following + * conditions: + * - one or more of the following pointers is NULL: pPred0, pPred1, or + * pDstPred. + * - pDstPred is not aligned on an 8-byte boundary. + * - iPredStep0 <= 0 or iPredStep0 is not a multiple of 8. + * - iPredStep1 <= 0 or iPredStep1 is not a multiple of 8. + * - iDstStep <= 0 or iDstStep is not a multiple of 8. + * - iHeight is not 4, 8, or 16. + * + */ +OMXResult omxVCCOMM_Average_8x ( + const OMX_U8 *pPred0, + const OMX_U8 *pPred1, + OMX_U32 iPredStep0, + OMX_U32 iPredStep1, + OMX_U8 *pDstPred, + OMX_U32 iDstStep, + OMX_U32 iHeight +); + + + +/** + * Function: omxVCCOMM_Average_16x (6.1.3.1.2) + * + * Description: + * This function calculates the average of two 16x16 or 16x8 blocks. The + * result is rounded according to (a+b+1)/2. The block average function can + * be used in conjunction with half-pixel interpolation to obtain quarter + * pixel motion estimates, as described in [ISO14496-10], subclause 8.4.2.2.1. + * + * Input Arguments: + * + * pPred0 - Pointer to the top-left corner of reference block 0 + * pPred1 - Pointer to the top-left corner of reference block 1 + * iPredStep0 - Step of reference block 0 + * iPredStep1 - Step of reference block 1 + * iDstStep - Step of the destination buffer + * iHeight - Height of the blocks + * + * Output Arguments: + * + * pDstPred - Pointer to the destination buffer. 16-byte aligned. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned under any of the following + * conditions: + * - one or more of the following pointers is NULL: pPred0, pPred1, or + * pDstPred. + * - pDstPred is not aligned on a 16-byte boundary. + * - iPredStep0 <= 0 or iPredStep0 is not a multiple of 16. + * - iPredStep1 <= 0 or iPredStep1 is not a multiple of 16. + * - iDstStep <= 0 or iDstStep is not a multiple of 16. + * - iHeight is not 8 or 16. + * + */ +OMXResult omxVCCOMM_Average_16x ( + const OMX_U8 *pPred0, + const OMX_U8 *pPred1, + OMX_U32 iPredStep0, + OMX_U32 iPredStep1, + OMX_U8 *pDstPred, + OMX_U32 iDstStep, + OMX_U32 iHeight +); + + + +/** + * Function: omxVCCOMM_ExpandFrame_I (6.1.3.2.1) + * + * Description: + * This function expands a reconstructed frame in-place. The unexpanded + * source frame should be stored in a plane buffer with sufficient space + * pre-allocated for edge expansion, and the input frame should be located in + * the plane buffer center. This function executes the pixel expansion by + * replicating source frame edge pixel intensities in the empty pixel + * locations (expansion region) between the source frame edge and the plane + * buffer edge. The width/height of the expansion regions on the + * horizontal/vertical edges is controlled by the parameter iExpandPels. + * + * Input Arguments: + * + * pSrcDstPlane - pointer to the top-left corner of the frame to be + * expanded; must be aligned on an 8-byte boundary. + * iFrameWidth - frame width; must be a multiple of 8. + * iFrameHeight -frame height; must be a multiple of 8. + * iExpandPels - number of pixels to be expanded in the horizontal and + * vertical directions; must be a multiple of 8. + * iPlaneStep - distance, in bytes, between the start of consecutive lines + * in the plane buffer; must be larger than or equal to + * (iFrameWidth + 2 * iExpandPels). + * + * Output Arguments: + * + * pSrcDstPlane -Pointer to the top-left corner of the frame (NOT the + * top-left corner of the plane); must be aligned on an 8-byte + * boundary. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned under any of the following + * conditions: + * - pSrcDstPlane is NULL. + * - pSrcDstPlane is not aligned on an 8-byte boundary. + * - one of the following parameters is either equal to zero or is a + * non-multiple of 8: iFrameHeight, iFrameWidth, iPlaneStep, or + * iExpandPels. + * - iPlaneStep < (iFrameWidth + 2 * iExpandPels). + * + */ +OMXResult omxVCCOMM_ExpandFrame_I ( + OMX_U8 *pSrcDstPlane, + OMX_U32 iFrameWidth, + OMX_U32 iFrameHeight, + OMX_U32 iExpandPels, + OMX_U32 iPlaneStep +); + + + +/** + * Function: omxVCCOMM_Copy8x8 (6.1.3.3.1) + * + * Description: + * Copies the reference 8x8 block to the current block. + * + * Input Arguments: + * + * pSrc - pointer to the reference block in the source frame; must be + * aligned on an 8-byte boundary. + * step - distance between the starts of consecutive lines in the reference + * frame, in bytes; must be a multiple of 8 and must be larger than + * or equal to 8. + * + * Output Arguments: + * + * pDst - pointer to the destination block; must be aligned on an 8-byte + * boundary. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned under any of the following + * conditions: + * - one or more of the following pointers is NULL: pSrc, pDst + * - one or more of the following pointers is not aligned on an 8-byte + * boundary: pSrc, pDst + * - step <8 or step is not a multiple of 8. + * + */ +OMXResult omxVCCOMM_Copy8x8 ( + const OMX_U8 *pSrc, + OMX_U8 *pDst, + OMX_INT step +); + + + +/** + * Function: omxVCCOMM_Copy16x16 (6.1.3.3.2) + * + * Description: + * Copies the reference 16x16 macroblock to the current macroblock. + * + * Input Arguments: + * + * pSrc - pointer to the reference macroblock in the source frame; must be + * aligned on a 16-byte boundary. + * step - distance between the starts of consecutive lines in the reference + * frame, in bytes; must be a multiple of 16 and must be larger + * than or equal to 16. + * + * Output Arguments: + * + * pDst - pointer to the destination macroblock; must be aligned on a + * 16-byte boundary. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned under any of the following + * conditions: + * - one or more of the following pointers is NULL: pSrc, pDst + * - one or more of the following pointers is not aligned on a 16-byte + * boundary: pSrc, pDst + * - step <16 or step is not a multiple of 16. + * + */ +OMXResult omxVCCOMM_Copy16x16 ( + const OMX_U8 *pSrc, + OMX_U8 *pDst, + OMX_INT step +); + + + +/** + * Function: omxVCCOMM_ComputeTextureErrorBlock_SAD (6.1.4.1.1) + * + * Description: + * Computes texture error of the block; also returns SAD. + * + * Input Arguments: + * + * pSrc - pointer to the source plane; must be aligned on an 8-byte + * boundary. + * srcStep - step of the source plane + * pSrcRef - pointer to the reference buffer, an 8x8 block; must be aligned + * on an 8-byte boundary. + * + * Output Arguments: + * + * pDst - pointer to the destination buffer, an 8x8 block; must be aligned + * on an 8-byte boundary. + * pDstSAD - pointer to the Sum of Absolute Differences (SAD) value + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments + * - At least one of the following + * pointers is NULL: pSrc, pSrcRef, pDst and pDstSAD. + * - pSrc is not 8-byte aligned. + * - SrcStep <= 0 or srcStep is not a multiple of 8. + * - pSrcRef is not 8-byte aligned. + * - pDst is not 8-byte aligned. + * + */ +OMXResult omxVCCOMM_ComputeTextureErrorBlock_SAD ( + const OMX_U8 *pSrc, + OMX_INT srcStep, + const OMX_U8 *pSrcRef, + OMX_S16 *pDst, + OMX_INT *pDstSAD +); + + + +/** + * Function: omxVCCOMM_ComputeTextureErrorBlock (6.1.4.1.2) + * + * Description: + * Computes the texture error of the block. + * + * Input Arguments: + * + * pSrc - pointer to the source plane. This should be aligned on an 8-byte + * boundary. + * srcStep - step of the source plane + * pSrcRef - pointer to the reference buffer, an 8x8 block. This should be + * aligned on an 8-byte boundary. + * + * Output Arguments: + * + * pDst - pointer to the destination buffer, an 8x8 block. This should be + * aligned on an 8-byte boundary. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments: + * - At least one of the following pointers is NULL: + * pSrc, pSrcRef, pDst. + * - pSrc is not 8-byte aligned. + * - SrcStep <= 0 or srcStep is not a multiple of 8. + * - pSrcRef is not 8-byte aligned. + * - pDst is not 8-byte aligned + * + */ +OMXResult omxVCCOMM_ComputeTextureErrorBlock ( + const OMX_U8 *pSrc, + OMX_INT srcStep, + const OMX_U8 *pSrcRef, + OMX_S16 *pDst +); + + + +/** + * Function: omxVCCOMM_LimitMVToRect (6.1.4.1.3) + * + * Description: + * Limits the motion vector associated with the current block/macroblock to + * prevent the motion compensated block/macroblock from moving outside a + * bounding rectangle as shown in Figure 6-1. + * + * Input Arguments: + * + * pSrcMV - pointer to the motion vector associated with the current block + * or macroblock + * pRectVOPRef - pointer to the bounding rectangle + * Xcoord, Ycoord - coordinates of the current block or macroblock + * size - size of the current block or macroblock; must be equal to 8 or + * 16. + * + * Output Arguments: + * + * pDstMV - pointer to the limited motion vector + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments. Returned if one or more of the + * following conditions is true: + * - at least one of the following pointers is NULL: + * pSrcMV, pDstMV, or pRectVOPRef. + * - size is not equal to either 8 or 16. + * - the width or height of the bounding rectangle is less than + * twice the block size. + */ +OMXResult omxVCCOMM_LimitMVToRect ( + const OMXVCMotionVector *pSrcMV, + OMXVCMotionVector *pDstMV, + const OMXRect *pRectVOPRef, + OMX_INT Xcoord, + OMX_INT Ycoord, + OMX_INT size +); + + + +/** + * Function: omxVCCOMM_SAD_16x (6.1.4.1.4) + * + * Description: + * This function calculates the SAD for 16x16 and 16x8 blocks. + * + * Input Arguments: + * + * pSrcOrg - Pointer to the original block; must be aligned on a 16-byte + * boundary. + * iStepOrg - Step of the original block buffer + * pSrcRef - Pointer to the reference block + * iStepRef - Step of the reference block buffer + * iHeight - Height of the block + * + * Output Arguments: + * + * pDstSAD - Pointer of result SAD + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments. Returned if one or more of the + * following conditions is true: + * - at least one of the following pointers is NULL: + * pSrcOrg, pDstSAD, or pSrcRef + * - pSrcOrg is not 16-byte aligned. + * - iStepOrg <= 0 or iStepOrg is not a multiple of 16 + * - iStepRef <= 0 or iStepRef is not a multiple of 16 + * - iHeight is not 8 or 16 + * + */ +OMXResult omxVCCOMM_SAD_16x ( + const OMX_U8 *pSrcOrg, + OMX_U32 iStepOrg, + const OMX_U8 *pSrcRef, + OMX_U32 iStepRef, + OMX_S32 *pDstSAD, + OMX_U32 iHeight +); + + + +/** + * Function: omxVCCOMM_SAD_8x (6.1.4.1.5) + * + * Description: + * This function calculates the SAD for 8x16, 8x8, 8x4 blocks. + * + * Input Arguments: + * + * pSrcOrg - Pointer to the original block; must be aligned on a 8-byte + * boundary. + * iStepOrg - Step of the original block buffer + * pSrcRef - Pointer to the reference block + * iStepRef - Step of the reference block buffer + * iHeight - Height of the block + * + * Output Arguments: + * + * pDstSAD -Pointer of result SAD + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments. Returned if one or more of the + * following conditions is true: + * - at least one of the following pointers is NULL: + * pSrcOrg, pDstSAD, or pSrcRef + * - pSrcOrg is not 8-byte aligned. + * - iStepOrg <= 0 or iStepOrg is not a multiple of 8 + * - iStepRef <= 0 or iStepRef is not a multiple of 8 + * - iHeight is not 4, 8 or 16 + * + */ +OMXResult omxVCCOMM_SAD_8x ( + const OMX_U8 *pSrcOrg, + OMX_U32 iStepOrg, + const OMX_U8 *pSrcRef, + OMX_U32 iStepRef, + OMX_S32*pDstSAD, + OMX_U32 iHeight +); + + + +/* 6.2.1.1 Direction */ +/* The direction enumerator is used with functions that perform AC/DC prediction and zig-zag scan. */ + +enum { + OMX_VC_NONE = 0, + OMX_VC_HORIZONTAL = 1, + OMX_VC_VERTICAL = 2 +}; + + + +/* 6.2.1.2 Bilinear Interpolation */ +/* The bilinear interpolation enumerator is used with motion estimation, motion compensation, and reconstruction functions. */ + +enum { + OMX_VC_INTEGER_PIXEL = 0, /* case a */ + OMX_VC_HALF_PIXEL_X = 1, /* case b */ + OMX_VC_HALF_PIXEL_Y = 2, /* case c */ + OMX_VC_HALF_PIXEL_XY = 3 /* case d */ +}; + + + +/* 6.2.1.3 Neighboring Macroblock Availability */ +/* Neighboring macroblock availability is indicated using the following flags: */ + +enum { + OMX_VC_UPPER = 1, /** above macroblock is available */ + OMX_VC_LEFT = 2, /** left macroblock is available */ + OMX_VC_CENTER = 4, + OMX_VC_RIGHT = 8, + OMX_VC_LOWER = 16, + OMX_VC_UPPER_LEFT = 32, /** above-left macroblock is available */ + OMX_VC_UPPER_RIGHT = 64, /** above-right macroblock is available */ + OMX_VC_LOWER_LEFT = 128, + OMX_VC_LOWER_RIGHT = 256 +}; + + + +/* 6.2.1.4 Video Components */ +/* A data type that enumerates video components is defined as follows: */ + +typedef enum { + OMX_VC_LUMINANCE, /** Luminance component */ + OMX_VC_CHROMINANCE /** chrominance component */ +} OMXVCM4P2VideoComponent; + + + +/* 6.2.1.5 MacroblockTypes */ +/* A data type that enumerates macroblock types is defined as follows: */ + +typedef enum { + OMX_VC_INTER = 0, /** P picture or P-VOP */ + OMX_VC_INTER_Q = 1, /** P picture or P-VOP */ + OMX_VC_INTER4V = 2, /** P picture or P-VOP */ + OMX_VC_INTRA = 3, /** I and P picture, I- and P-VOP */ + OMX_VC_INTRA_Q = 4, /** I and P picture, I- and P-VOP */ + OMX_VC_INTER4V_Q = 5 /** P picture or P-VOP (H.263)*/ +} OMXVCM4P2MacroblockType; + + + +/* 6.2.1.6 Coordinates */ +/* Coordinates are represented as follows: */ + +typedef struct { + OMX_INT x; + OMX_INT y; +} OMXVCM4P2Coordinate; + + + +/* 6.2.1.7 Motion Estimation Algorithms */ +/* A data type that enumerates motion estimation search methods is defined as follows: */ + +typedef enum { + OMX_VC_M4P2_FAST_SEARCH = 0, /** Fast motion search */ + OMX_VC_M4P2_FULL_SEARCH = 1 /** Full motion search */ +} OMXVCM4P2MEMode; + + + +/* 6.2.1.8 Motion Estimation Parameters */ +/* A data structure containing control parameters for + * motion estimation functions is defined as follows: + */ + +typedef struct { + OMX_INT searchEnable8x8; /** enables 8x8 search */ + OMX_INT halfPelSearchEnable; /** enables half-pel resolution */ + OMX_INT searchRange; /** search range */ + OMX_INT rndVal; /** rounding control; 0-disabled, 1-enabled*/ +} OMXVCM4P2MEParams; + + + +/* 6.2.1.9 Macroblock Information */ +/* A data structure containing macroblock parameters for + * motion estimation functions is defined as follows: + */ + +typedef struct { + OMX_S32 sliceId; /* slice number */ + OMXVCM4P2MacroblockType mbType; /* MB type: OMX_VC_INTRA, OMX_VC_INTER, or OMX_VC_INTER4 */ + OMX_S32 qp; /* quantization parameter*/ + OMX_U32 cbpy; /* CBP Luma */ + OMX_U32 cbpc; /* CBP Chroma */ + OMXVCMotionVector pMV0[2][2]; /* motion vector, represented using 1/2-pel units, + * pMV0[blocky][blockx] (blocky = 0~1, blockx =0~1) + */ + OMXVCMotionVector pMVPred[2][2]; /* motion vector prediction, represented using 1/2-pel units, + * pMVPred[blocky][blockx] (blocky = 0~1, blockx = 0~1) + */ + OMX_U8 pPredDir[2][2]; /* AC prediction direction: + * OMX_VC_NONE, OMX_VC_VERTICAL, OMX_VC_HORIZONTAL + */ +} OMXVCM4P2MBInfo, *OMXVCM4P2MBInfoPtr; + + + +/** + * Function: omxVCM4P2_FindMVpred (6.2.3.1.1) + * + * Description: + * Predicts a motion vector for the current block using the procedure + * specified in [ISO14496-2], subclause 7.6.5. The resulting predicted MV is + * returned in pDstMVPred. If the parameter pDstMVPredME if is not NULL then + * the set of three MV candidates used for prediction is also returned, + * otherwise pDstMVPredMEis NULL upon return. + * + * Input Arguments: + * + * pSrcMVCurMB - pointer to the MV buffer associated with the current Y + * macroblock; a value of NULL indicates unavailability. + * pSrcCandMV1 - pointer to the MV buffer containing the 4 MVs associated + * with the MB located to the left of the current MB; set to NULL + * if there is no MB to the left. + * pSrcCandMV2 - pointer to the MV buffer containing the 4 MVs associated + * with the MB located above the current MB; set to NULL if there + * is no MB located above the current MB. + * pSrcCandMV3 - pointer to the MV buffer containing the 4 MVs associated + * with the MB located to the right and above the current MB; set + * to NULL if there is no MB located to the above-right. + * iBlk - the index of block in the current macroblock + * pDstMVPredME - MV candidate return buffer; if set to NULL then + * prediction candidate MVs are not returned and pDstMVPredME will + * be NULL upon function return; if pDstMVPredME is non-NULL then it + * must point to a buffer containing sufficient space for three + * return MVs. + * + * Output Arguments: + * + * pDstMVPred - pointer to the predicted motion vector + * pDstMVPredME - if non-NULL upon input then pDstMVPredME points upon + * return to a buffer containing the three motion vector candidates + * used for prediction as specified in [ISO14496-2], subclause + * 7.6.5, otherwise if NULL upon input then pDstMVPredME is NULL + * upon output. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned under any of the following + * conditions: + * - the pointer pDstMVPred is NULL + * - the parameter iBlk does not fall into the range 0 <= iBlk<=3 + * + */ +OMXResult omxVCM4P2_FindMVpred ( + const OMXVCMotionVector *pSrcMVCurMB, + const OMXVCMotionVector *pSrcCandMV1, + const OMXVCMotionVector *pSrcCandMV2, + const OMXVCMotionVector *pSrcCandMV3, + OMXVCMotionVector *pDstMVPred, + OMXVCMotionVector *pDstMVPredME, + OMX_INT iBlk +); + + + +/** + * Function: omxVCM4P2_IDCT8x8blk (6.2.3.2.1) + * + * Description: + * Computes a 2D inverse DCT for a single 8x8 block, as defined in + * [ISO14496-2]. + * + * Input Arguments: + * + * pSrc - pointer to the start of the linearly arranged IDCT input buffer; + * must be aligned on a 16-byte boundary. According to + * [ISO14496-2], the input coefficient values should lie within the + * range [-2048, 2047]. + * + * Output Arguments: + * + * pDst - pointer to the start of the linearly arranged IDCT output buffer; + * must be aligned on a 16-byte boundary. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments: + * - pSrc or pDst is NULL. + * - pSrc or pDst is not 16-byte aligned. + * + */ +OMXResult omxVCM4P2_IDCT8x8blk ( + const OMX_S16 *pSrc, + OMX_S16 *pDst +); + + + +/** + * Function: omxVCM4P2_MEGetBufSize (6.2.4.1.1) + * + * Description: + * Computes the size, in bytes, of the vendor-specific specification + * structure for the following motion estimation functions: + * BlockMatch_Integer_8x8, BlockMatch_Integer_16x16, and MotionEstimationMB. + * + * Input Arguments: + * + * MEmode - motion estimation mode; available modes are defined by the + * enumerated type OMXVCM4P2MEMode + * pMEParams - motion estimation parameters + * + * Output Arguments: + * + * pSize - pointer to the number of bytes required for the specification + * structure + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - one or more of the following is true: + * - an invalid value was specified for the parameter MEmode + * - a negative or zero value was specified for the + * parameter pMEParams->searchRange + * + */ +OMXResult omxVCM4P2_MEGetBufSize ( + OMXVCM4P2MEMode MEmode, + const OMXVCM4P2MEParams *pMEParams, + OMX_U32 *pSize +); + + + +/** + * Function: omxVCM4P2_MEInit (6.2.4.1.2) + * + * Description: + * Initializes the vendor-specific specification structure required for the + * following motion estimation functions: BlockMatch_Integer_8x8, + * BlockMatch_Integer_16x16, and MotionEstimationMB. Memory for the + * specification structure *pMESpec must be allocated prior to calling the + * function, and should be aligned on a 4-byte boundary. Following + * initialization by this function, the vendor-specific structure *pMESpec + * should contain an implementation-specific representation of all motion + * estimation parameters received via the structure pMEParams, for example + * rndVal, searchRange, etc. The number of bytes required for the + * specification structure can be determined using the function + * omxVCM4P2_MEGetBufSize. + * + * Input Arguments: + * + * MEmode - motion estimation mode; available modes are defined by the + * enumerated type OMXVCM4P2MEMode + * pMEParams - motion estimation parameters + * pMESpec - pointer to the uninitialized ME specification structure + * + * Output Arguments: + * + * pMESpec - pointer to the initialized ME specification structure + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - one or more of the following is true: + * - an invalid value was specified for the parameter MEmode + * - a negative or zero value was specified for the + * parameter pMEParams->searchRange + * + */ +OMXResult omxVCM4P2_MEInit ( + OMXVCM4P2MEMode MEmode, + const OMXVCM4P2MEParams*pMEParams, + void *pMESpec +); + + + +/** + * Function: omxVCM4P2_BlockMatch_Integer_16x16 (6.2.4.2.1) + * + * Description: + * Performs a 16x16 block search; estimates motion vector and associated + * minimum SAD. Both the input and output motion vectors are represented using + * half-pixel units, and therefore a shift left or right by 1 bit may be + * required, respectively, to match the input or output MVs with other + * functions that either generate output MVs or expect input MVs represented + * using integer pixel units. + * + * Input Arguments: + * + * pSrcRefBuf - pointer to the reference Y plane; points to the reference + * MB that corresponds to the location of the current macroblock in + * the current plane. + * refWidth - width of the reference plane + * pRefRect - pointer to the valid reference plane rectangle; coordinates + * are specified relative to the image origin. Rectangle + * boundaries may extend beyond image boundaries if the image has + * been padded. For example, if padding extends 4 pixels beyond + * frame border, then the value for the left border could be set to + * -4. + * pSrcCurrBuf - pointer to the current block in the current macroblock + * buffer extracted from the original plane (linear array, 256 + * entries); must be aligned on a 16-byte boundary. The number of + * bytes between lines (step) is 16. + * pCurrPointPos - position of the current macroblock in the current plane + * pSrcPreMV - pointer to predicted motion vector; NULL indicates no + * predicted MV + * pSrcPreSAD - pointer to SAD associated with the predicted MV (referenced + * by pSrcPreMV); may be set to NULL if unavailable. + * pMESpec - vendor-specific motion estimation specification structure; + * must have been allocated and then initialized using + * omxVCM4P2_MEInit prior to calling the block matching function. + * + * Output Arguments: + * + * pDstMV - pointer to estimated MV + * pDstSAD - pointer to minimum SAD + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments. Returned if one of the following + * conditions is true: + * - at least one of the following pointers is NULL: pSrcRefBuf, + * pRefRect, pSrcCurrBuff, pCurrPointPos, pDstMV, pDstSAD or + * pMESpec, or + * - pSrcCurrBuf is not 16-byte aligned + * + */ +OMXResult omxVCM4P2_BlockMatch_Integer_16x16 ( + const OMX_U8 *pSrcRefBuf, + OMX_INT refWidth, + const OMXRect *pRefRect, + const OMX_U8 *pSrcCurrBuf, + const OMXVCM4P2Coordinate *pCurrPointPos, + const OMXVCMotionVector*pSrcPreMV, + const OMX_INT *pSrcPreSAD, + void *pMESpec, + OMXVCMotionVector*pDstMV, + OMX_INT *pDstSAD +); + + + +/** + * Function: omxVCM4P2_BlockMatch_Integer_8x8 (6.2.4.2.2) + * + * Description: + * Performs an 8x8 block search; estimates motion vector and associated + * minimum SAD. Both the input and output motion vectors are represented + * using half-pixel units, and therefore a shift left or right by 1 bit may be + * required, respectively, to match the input or output MVs with other + * functions that either generate output MVs or expect input MVs represented + * using integer pixel units. + * + * Input Arguments: + * + * pSrcRefBuf - pointer to the reference Y plane; points to the reference + * block that corresponds to the location of the current 8x8 block + * in the current plane. + * refWidth - width of the reference plane + * pRefRect - pointer to the valid reference plane rectangle; coordinates + * are specified relative to the image origin. Rectangle + * boundaries may extend beyond image boundaries if the image has + * been padded. + * pSrcCurrBuf - pointer to the current block in the current macroblock + * buffer extracted from the original plane (linear array, 128 + * entries); must be aligned on an 8-byte boundary. The number of + * bytes between lines (step) is 16 bytes. + * pCurrPointPos - position of the current block in the current plane + * pSrcPreMV - pointer to predicted motion vector; NULL indicates no + * predicted MV + * pSrcPreSAD - pointer to SAD associated with the predicted MV (referenced + * by pSrcPreMV); may be set to NULL if unavailable. + * pMESpec - vendor-specific motion estimation specification structure; + * must have been allocated and then initialized using + * omxVCM4P2_MEInit prior to calling the block matching function. + * + * Output Arguments: + * + * pDstMV - pointer to estimated MV + * pDstSAD - pointer to minimum SAD + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments. Returned if one of the following + * conditions is true: + * - at least one of the following pointers is NULL: pSrcRefBuf, + * pRefRect, pSrcCurrBuff, pCurrPointPos, pDstMV, pDstSAD or + * pMESpec, or + * - pSrcCurrBuf is not 8-byte aligned + * + */ +OMXResult omxVCM4P2_BlockMatch_Integer_8x8 ( + const OMX_U8 *pSrcRefBuf, + OMX_INT refWidth, + const OMXRect *pRefRect, + const OMX_U8 *pSrcCurrBuf, + const OMXVCM4P2Coordinate *pCurrPointPos, + const OMXVCMotionVector *pSrcPreMV, + const OMX_INT *pSrcPreSAD, + void *pMESpec, + OMXVCMotionVector *pDstMV, + OMX_INT *pDstSAD +); + + + +/** + * Function: omxVCM4P2_BlockMatch_Half_16x16 (6.2.4.2.3) + * + * Description: + * Performs a 16x16 block match with half-pixel resolution. Returns the + * estimated motion vector and associated minimum SAD. This function + * estimates the half-pixel motion vector by interpolating the integer + * resolution motion vector referenced by the input parameter pSrcDstMV, i.e., + * the initial integer MV is generated externally. The input parameters + * pSrcRefBuf and pSearchPointRefPos should be shifted by the winning MV of + * 16x16 integer search prior to calling BlockMatch_Half_16x16. The function + * BlockMatch_Integer_16x16 may be used for integer motion estimation. + * + * Input Arguments: + * + * pSrcRefBuf - pointer to the reference Y plane; points to the reference + * macroblock that corresponds to the location of the current + * macroblock in the current plane. + * refWidth - width of the reference plane + * pRefRect - reference plane valid region rectangle + * pSrcCurrBuf - pointer to the current block in the current macroblock + * buffer extracted from the original plane (linear array, 256 + * entries); must be aligned on a 16-byte boundary. The number of + * bytes between lines (step) is 16. + * pSearchPointRefPos - position of the starting point for half pixel + * search (specified in terms of integer pixel units) in the + * reference plane, i.e., the reference position pointed to by the + * predicted motion vector. + * rndVal - rounding control parameter: 0 - disabled; 1 - enabled. + * pSrcDstMV - pointer to the initial MV estimate; typically generated + * during a prior 16X16 integer search; specified in terms of + * half-pixel units. + * + * Output Arguments: + * + * pSrcDstMV - pointer to estimated MV + * pDstSAD - pointer to minimum SAD + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments. Returned if one of the following + * conditions is true: + * - at least one of the following pointers is NULL: pSrcRefBuf, + * pRefRect, pSrcCurrBuff, pSearchPointRefPos, pSrcDstMV. + * - pSrcCurrBuf is not 16-byte aligned, or + * + */ +OMXResult omxVCM4P2_BlockMatch_Half_16x16 ( + const OMX_U8 *pSrcRefBuf, + OMX_INT refWidth, + const OMXRect *pRefRect, + const OMX_U8 *pSrcCurrBuf, + const OMXVCM4P2Coordinate *pSearchPointRefPos, + OMX_INT rndVal, + OMXVCMotionVector *pSrcDstMV, + OMX_INT *pDstSAD +); + + + +/** + * Function: omxVCM4P2_BlockMatch_Half_8x8 (6.2.4.2.4) + * + * Description: + * Performs an 8x8 block match with half-pixel resolution. Returns the + * estimated motion vector and associated minimum SAD. This function + * estimates the half-pixel motion vector by interpolating the integer + * resolution motion vector referenced by the input parameter pSrcDstMV, i.e., + * the initial integer MV is generated externally. The input parameters + * pSrcRefBuf and pSearchPointRefPos should be shifted by the winning MV of + * 8x8 integer search prior to calling BlockMatch_Half_8x8. The function + * BlockMatch_Integer_8x8 may be used for integer motion estimation. + * + * Input Arguments: + * + * pSrcRefBuf - pointer to the reference Y plane; points to the reference + * block that corresponds to the location of the current 8x8 block + * in the current plane. + * refWidth - width of the reference plane + * pRefRect - reference plane valid region rectangle + * pSrcCurrBuf - pointer to the current block in the current macroblock + * buffer extracted from the original plane (linear array, 128 + * entries); must be aligned on a 8-byte boundary. The number of + * bytes between lines (step) is 16. + * pSearchPointRefPos - position of the starting point for half pixel + * search (specified in terms of integer pixel units) in the + * reference plane. + * rndVal - rounding control parameter: 0 - disabled; 1 - enabled. + * pSrcDstMV - pointer to the initial MV estimate; typically generated + * during a prior 8x8 integer search, specified in terms of + * half-pixel units. + * + * Output Arguments: + * + * pSrcDstMV - pointer to estimated MV + * pDstSAD - pointer to minimum SAD + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments. Returned if one of the following + * conditions is true: + * - at least one of the following pointers is NULL: + * pSrcRefBuf, pRefRect, pSrcCurrBuff, pSearchPointRefPos, pSrcDstMV + * - pSrcCurrBuf is not 8-byte aligned + * + */ +OMXResult omxVCM4P2_BlockMatch_Half_8x8 ( + const OMX_U8 *pSrcRefBuf, + OMX_INT refWidth, + const OMXRect *pRefRect, + const OMX_U8 *pSrcCurrBuf, + const OMXVCM4P2Coordinate *pSearchPointRefPos, + OMX_INT rndVal, + OMXVCMotionVector *pSrcDstMV, + OMX_INT *pDstSAD +); + + + +/** + * Function: omxVCM4P2_MotionEstimationMB (6.2.4.3.1) + * + * Description: + * Performs motion search for a 16x16 macroblock. Selects best motion search + * strategy from among inter-1MV, inter-4MV, and intra modes. Supports + * integer and half pixel resolution. + * + * Input Arguments: + * + * pSrcCurrBuf - pointer to the top-left corner of the current MB in the + * original picture plane; must be aligned on a 16-byte boundary. + * The function does not expect source data outside the region + * bounded by the MB to be available; for example it is not + * necessary for the caller to guarantee the availability of + * pSrcCurrBuf[-SrcCurrStep], i.e., the row of pixels above the MB + * to be processed. + * srcCurrStep - width of the original picture plane, in terms of full + * pixels; must be a multiple of 16. + * pSrcRefBuf - pointer to the reference Y plane; points to the reference + * plane location corresponding to the location of the current + * macroblock in the current plane; must be aligned on a 16-byte + * boundary. + * srcRefStep - width of the reference picture plane, in terms of full + * pixels; must be a multiple of 16. + * pRefRect - reference plane valid region rectangle, specified relative to + * the image origin + * pCurrPointPos - position of the current macroblock in the current plane + * pMESpec - pointer to the vendor-specific motion estimation specification + * structure; must be allocated and then initialized using + * omxVCM4P2_MEInit prior to calling this function. + * pMBInfo - array, of dimension four, containing pointers to information + * associated with four nearby MBs: + * - pMBInfo[0] - pointer to left MB information + * - pMBInfo[1] - pointer to top MB information + * - pMBInfo[2] - pointer to top-left MB information + * - pMBInfo[3] - pointer to top-right MB information + * Any pointer in the array may be set equal to NULL if the + * corresponding MB doesn't exist. For each MB, the following structure + * members are used: + * - mbType - macroblock type, either OMX_VC_INTRA, OMX_VC_INTER, or + * OMX_VC_INTER4V + * - pMV0[2][2] - estimated motion vectors; represented + * in 1/2 pixel units + * - sliceID - number of the slice to which the MB belongs + * pSrcDstMBCurr - pointer to information structure for the current MB. + * The following entries should be set prior to calling the + * function: sliceID - the number of the slice the to which the + * current MB belongs. The structure elements cbpy and cbpc are + * ignored. + * + * Output Arguments: + * + * pSrcDstMBCurr - pointer to updated information structure for the current + * MB after MB-level motion estimation has been completed. The + * following structure members are updated by the ME function: + * - mbType - macroblock type: OMX_VC_INTRA, OMX_VC_INTER, or + * OMX_VC_INTER4V. + * - pMV0[2][2] - estimated motion vectors; represented in + * terms of 1/2 pel units. + * - pMVPred[2][2] - predicted motion vectors; represented + * in terms of 1/2 pel units. + * The structure members cbpy and cbpc are not updated by the function. + * pDstSAD - pointer to the minimum SAD for INTER1V, or sum of minimum SADs + * for INTER4V + * pDstBlockSAD - pointer to an array of SAD values for each of the four + * 8x8 luma blocks in the MB. The block SADs are in scan order for + * each MB. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments. Returned if one or more of the + * following conditions is true: + * - at least one of the following pointers is NULL: pSrcCurrBuf, + * pSrcRefBuf, pRefRect, pCurrPointPos, pMBInter, pMBIntra, + * pSrcDstMBCurr, or pDstSAD. + * + */ +OMXResult omxVCM4P2_MotionEstimationMB ( + const OMX_U8 *pSrcCurrBuf, + OMX_S32 srcCurrStep, + const OMX_U8 *pSrcRefBuf, + OMX_S32 srcRefStep, + const OMXRect*pRefRect, + const OMXVCM4P2Coordinate *pCurrPointPos, + void *pMESpec, + const OMXVCM4P2MBInfoPtr *pMBInfo, + OMXVCM4P2MBInfo *pSrcDstMBCurr, + OMX_U16 *pDstSAD, + OMX_U16 *pDstBlockSAD +); + + + +/** + * Function: omxVCM4P2_DCT8x8blk (6.2.4.4.1) + * + * Description: + * Computes a 2D forward DCT for a single 8x8 block, as defined in + * [ISO14496-2]. + * + * Input Arguments: + * + * pSrc - pointer to the start of the linearly arranged input buffer; must + * be aligned on a 16-byte boundary. Input values (pixel + * intensities) are valid in the range [-255,255]. + * + * Output Arguments: + * + * pDst - pointer to the start of the linearly arranged output buffer; must + * be aligned on a 16-byte boundary. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments, returned if: + * - pSrc or pDst is NULL. + * - pSrc or pDst is not 16-byte aligned. + * + */ +OMXResult omxVCM4P2_DCT8x8blk ( + const OMX_S16 *pSrc, + OMX_S16 *pDst +); + + + +/** + * Function: omxVCM4P2_QuantIntra_I (6.2.4.4.2) + * + * Description: + * Performs quantization on intra block coefficients. This function supports + * bits_per_pixel == 8. + * + * Input Arguments: + * + * pSrcDst - pointer to the input intra block coefficients; must be aligned + * on a 16-byte boundary. + * QP - quantization parameter (quantizer_scale). + * blockIndex - block index indicating the component type and position, + * valid in the range 0 to 5, as defined in [ISO14496-2], subclause + * 6.1.3.8. + * shortVideoHeader - binary flag indicating presence of + * short_video_header; shortVideoHeader==1 selects linear intra DC + * mode, and shortVideoHeader==0 selects non linear intra DC mode. + * + * Output Arguments: + * + * pSrcDst - pointer to the output (quantized) interblock coefficients. + * When shortVideoHeader==1, AC coefficients are saturated on the + * interval [-127, 127], and DC coefficients are saturated on the + * interval [1, 254]. When shortVideoHeader==0, AC coefficients + * are saturated on the interval [-2047, 2047]. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments: + * - pSrcDst is NULL. + * - blockIndex < 0 or blockIndex >= 10 + * - QP <= 0 or QP >= 32. + * + */ +OMXResult omxVCM4P2_QuantIntra_I ( + OMX_S16 *pSrcDst, + OMX_U8 QP, + OMX_INT blockIndex, + OMX_INT shortVideoHeader +); + + + +/** + * Function: omxVCM4P2_QuantInter_I (6.2.4.4.3) + * + * Description: + * Performs quantization on an inter coefficient block; supports + * bits_per_pixel == 8. + * + * Input Arguments: + * + * pSrcDst - pointer to the input inter block coefficients; must be aligned + * on a 16-byte boundary. + * QP - quantization parameter (quantizer_scale) + * shortVideoHeader - binary flag indicating presence of short_video_header; + * shortVideoHeader==1 selects linear intra DC mode, and + * shortVideoHeader==0 selects non linear intra DC mode. + * + * Output Arguments: + * + * pSrcDst - pointer to the output (quantized) interblock coefficients. + * When shortVideoHeader==1, AC coefficients are saturated on the + * interval [-127, 127], and DC coefficients are saturated on the + * interval [1, 254]. When shortVideoHeader==0, AC coefficients + * are saturated on the interval [-2047, 2047]. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments: + * - pSrcDst is NULL. + * - QP <= 0 or QP >= 32. + * + */ +OMXResult omxVCM4P2_QuantInter_I ( + OMX_S16 *pSrcDst, + OMX_U8 QP, + OMX_INT shortVideoHeader +); + + + +/** + * Function: omxVCM4P2_TransRecBlockCoef_intra (6.2.4.4.4) + * + * Description: + * Quantizes the DCT coefficients, implements intra block AC/DC coefficient + * prediction, and reconstructs the current intra block texture for prediction + * on the next frame. Quantized row and column coefficients are returned in + * the updated coefficient buffers. + * + * Input Arguments: + * + * pSrc - pointer to the pixels of current intra block; must be aligned on + * an 8-byte boundary. + * pPredBufRow - pointer to the coefficient row buffer containing + * ((num_mb_per_row * 2 + 1) * 8) elements of type OMX_S16. + * Coefficients are organized into blocks of eight as described + * below (Internal Prediction Coefficient Update Procedures). The + * DC coefficient is first, and the remaining buffer locations + * contain the quantized AC coefficients. Each group of eight row + * buffer elements combined with one element eight elements ahead + * contains the coefficient predictors of the neighboring block + * that is spatially above or to the left of the block currently to + * be decoded. A negative-valued DC coefficient indicates that this + * neighboring block is not INTRA-coded or out of bounds, and + * therefore the AC and DC coefficients are invalid. Pointer must + * be aligned on an 8-byte boundary. + * pPredBufCol - pointer to the prediction coefficient column buffer + * containing 16 elements of type OMX_S16. Coefficients are + * organized as described in section 6.2.2.5. Pointer must be + * aligned on an 8-byte boundary. + * pSumErr - pointer to a flag indicating whether or not AC prediction is + * required; AC prediction is enabled if *pSumErr >=0, but the + * value is not used for coefficient prediction, i.e., the sum of + * absolute differences starts from 0 for each call to this + * function. Otherwise AC prediction is disabled if *pSumErr < 0 . + * blockIndex - block index indicating the component type and position, as + * defined in [ISO14496-2], subclause 6.1.3.8. + * curQp - quantization parameter of the macroblock to which the current + * block belongs + * pQpBuf - pointer to a 2-element quantization parameter buffer; pQpBuf[0] + * contains the quantization parameter associated with the 8x8 + * block left of the current block (QPa), and pQpBuf[1] contains + * the quantization parameter associated with the 8x8 block above + * the current block (QPc). In the event that the corresponding + * block is outside of the VOP bound, the Qp value will not affect + * the intra prediction process, as described in [ISO14496-2], + * sub-clause 7.4.3.3, Adaptive AC Coefficient Prediction. + * srcStep - width of the source buffer; must be a multiple of 8. + * dstStep - width of the reconstructed destination buffer; must be a + * multiple of 16. + * shortVideoHeader - binary flag indicating presence of + * short_video_header; shortVideoHeader==1 selects linear intra DC + * mode, and shortVideoHeader==0 selects non linear intra DC mode. + * + * Output Arguments: + * + * pDst - pointer to the quantized DCT coefficient buffer; pDst[0] contains + * the predicted DC coefficient; the remaining entries contain the + * quantized AC coefficients (without prediction). The pointer + * pDstmust be aligned on a 16-byte boundary. + * pRec - pointer to the reconstructed texture; must be aligned on an + * 8-byte boundary. + * pPredBufRow - pointer to the updated coefficient row buffer + * pPredBufCol - pointer to the updated coefficient column buffer + * pPreACPredict - if prediction is enabled, the parameter points to the + * start of the buffer containing the coefficient differences for + * VLC encoding. The entry pPreACPredict[0]indicates prediction + * direction for the current block and takes one of the following + * values: OMX_VC_NONE (prediction disabled), OMX_VC_HORIZONTAL, or + * OMX_VC_VERTICAL. The entries + * pPreACPredict[1]-pPreACPredict[7]contain predicted AC + * coefficients. If prediction is disabled (*pSumErr<0) then the + * contents of this buffer are undefined upon return from the + * function + * pSumErr - pointer to the value of the accumulated AC coefficient errors, + * i.e., sum of the absolute differences between predicted and + * unpredicted AC coefficients + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - Bad arguments: + * - At least one of the following pointers is NULL: pSrc, pDst, pRec, + * pCoefBufRow, pCoefBufCol, pQpBuf, pPreACPredict, pSumErr. + * - blockIndex < 0 or blockIndex >= 10; + * - curQP <= 0 or curQP >= 32. + * - srcStep, or dstStep <= 0 or not a multiple of 8. + * - pDst is not 16-byte aligned: . + * - At least one of the following pointers is not 8-byte aligned: + * pSrc, pRec. + * + * Note: The coefficient buffers must be updated in accordance with the + * update procedures defined in section in 6.2.2. + * + */ +OMXResult omxVCM4P2_TransRecBlockCoef_intra ( + const OMX_U8 *pSrc, + OMX_S16 *pDst, + OMX_U8 *pRec, + OMX_S16 *pPredBufRow, + OMX_S16 *pPredBufCol, + OMX_S16 *pPreACPredict, + OMX_INT *pSumErr, + OMX_INT blockIndex, + OMX_U8 curQp, + const OMX_U8 *pQpBuf, + OMX_INT srcStep, + OMX_INT dstStep, + OMX_INT shortVideoHeader +); + + + +/** + * Function: omxVCM4P2_TransRecBlockCoef_inter (6.2.4.4.5) + * + * Description: + * Implements DCT, and quantizes the DCT coefficients of the inter block + * while reconstructing the texture residual. There is no boundary check for + * the bit stream buffer. + * + * Input Arguments: + * + * pSrc -pointer to the residuals to be encoded; must be aligned on an + * 16-byte boundary. + * QP - quantization parameter. + * shortVideoHeader - binary flag indicating presence of short_video_header; + * shortVideoHeader==1 selects linear intra DC mode, and + * shortVideoHeader==0 selects non linear intra DC mode. + * + * Output Arguments: + * + * pDst - pointer to the quantized DCT coefficients buffer; must be aligned + * on a 16-byte boundary. + * pRec - pointer to the reconstructed texture residuals; must be aligned + * on a 16-byte boundary. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments: + * - At least one of the following pointers is either NULL or + * not 16-byte aligned: + * - pSrc + * - pDst + * - pRec + * - QP <= 0 or QP >= 32. + * + */ +OMXResult omxVCM4P2_TransRecBlockCoef_inter ( + const OMX_S16 *pSrc, + OMX_S16 *pDst, + OMX_S16 *pRec, + OMX_U8 QP, + OMX_INT shortVideoHeader +); + + + +/** + * Function: omxVCM4P2_EncodeVLCZigzag_IntraDCVLC (6.2.4.5.2) + * + * Description: + * Performs zigzag scan and VLC encoding of AC and DC coefficients for one + * intra block. Two versions of the function (DCVLC and ACVLC) are provided + * in order to support the two different methods of processing DC + * coefficients, as described in [ISO14496-2], subclause 7.4.1.4, "Intra DC + * Coefficient Decoding for the Case of Switched VLC Encoding". + * + * Input Arguments: + * + * ppBitStream - double pointer to the current byte in the bitstream + * pBitOffset - pointer to the bit position in the byte pointed by + * *ppBitStream. Valid within 0 to 7. + * pQDctBlkCoef - pointer to the quantized DCT coefficient + * predDir - AC prediction direction, which is used to decide the zigzag + * scan pattern; takes one of the following values: + * - OMX_VC_NONE - AC prediction not used. + * Performs classical zigzag scan. + * - OMX_VC_HORIZONTAL - Horizontal prediction. + * Performs alternate-vertical zigzag scan. + * - OMX_VC_VERTICAL - Vertical prediction. + * Performs alternate-horizontal zigzag scan. + * pattern - block pattern which is used to decide whether this block is + * encoded + * shortVideoHeader - binary flag indicating presence of + * short_video_header; escape modes 0-3 are used if + * shortVideoHeader==0, and escape mode 4 is used when + * shortVideoHeader==1. + * videoComp - video component type (luminance, chrominance) of the current + * block + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after the block is encoded, so + * that it points to the current byte in the bit stream buffer. + * pBitOffset - *pBitOffset is updated so that it points to the current bit + * position in the byte pointed by *ppBitStream. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - Bad arguments: + * - At least one of the following pointers is NULL: ppBitStream, + * *ppBitStream, pBitOffset, pQDctBlkCoef. + * - *pBitOffset < 0, or *pBitOffset >7. + * - PredDir is not one of: OMX_VC_NONE, OMX_VC_HORIZONTAL, or + * OMX_VC_VERTICAL. + * - VideoComp is not one component of enum OMXVCM4P2VideoComponent. + * + */ +OMXResult omxVCM4P2_EncodeVLCZigzag_IntraDCVLC ( + OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + const OMX_S16 *pQDctBlkCoef, + OMX_U8 predDir, + OMX_U8 pattern, + OMX_INT shortVideoHeader, + OMXVCM4P2VideoComponent videoComp +); + + + +/** + * Function: omxVCM4P2_EncodeVLCZigzag_IntraACVLC (6.2.4.5.2) + * + * Description: + * Performs zigzag scan and VLC encoding of AC and DC coefficients for one + * intra block. Two versions of the function (DCVLC and ACVLC) are provided + * in order to support the two different methods of processing DC + * coefficients, as described in [ISO14496-2], subclause 7.4.1.4, Intra DC + * Coefficient Decoding for the Case of Switched VLC Encoding. + * + * Input Arguments: + * + * ppBitStream - double pointer to the current byte in the bitstream + * pBitOffset - pointer to the bit position in the byte pointed by + * *ppBitStream. Valid within 0 to 7. + * pQDctBlkCoef - pointer to the quantized DCT coefficient + * predDir - AC prediction direction, which is used to decide the zigzag + * scan pattern; takes one of the following values: + * - OMX_VC_NONE - AC prediction not used. + * Performs classical zigzag scan. + * - OMX_VC_HORIZONTAL - Horizontal prediction. + * Performs alternate-vertical zigzag scan. + * - OMX_VC_VERTICAL - Vertical prediction. + * Performs alternate-horizontal zigzag scan. + * pattern - block pattern which is used to decide whether this block is + * encoded + * shortVideoHeader - binary flag indicating presence of + * short_video_header; escape modes 0-3 are used if + * shortVideoHeader==0, and escape mode 4 is used when + * shortVideoHeader==1. + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after the block is encoded, so + * that it points to the current byte in the bit stream buffer. + * pBitOffset - *pBitOffset is updated so that it points to the current bit + * position in the byte pointed by *ppBitStream. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - Bad arguments: + * - At least one of the following pointers is NULL: ppBitStream, + * *ppBitStream, pBitOffset, pQDctBlkCoef. + * - *pBitOffset < 0, or *pBitOffset >7. + * - PredDir is not one of: OMX_VC_NONE, OMX_VC_HORIZONTAL, or + * OMX_VC_VERTICAL. + * - VideoComp is not one component of enum OMXVCM4P2VideoComponent. + * + */ +OMXResult omxVCM4P2_EncodeVLCZigzag_IntraACVLC ( + OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + const OMX_S16 *pQDctBlkCoef, + OMX_U8 predDir, + OMX_U8 pattern, + OMX_INT shortVideoHeader +); + + + +/** + * Function: omxVCM4P2_EncodeVLCZigzag_Inter (6.2.4.5.3) + * + * Description: + * Performs classical zigzag scanning and VLC encoding for one inter block. + * + * Input Arguments: + * + * ppBitStream - pointer to the pointer to the current byte in the bit + * stream + * pBitOffset - pointer to the bit position in the byte pointed by + * *ppBitStream. Valid within 0 to 7 + * pQDctBlkCoef - pointer to the quantized DCT coefficient + * pattern - block pattern which is used to decide whether this block is + * encoded + * shortVideoHeader - binary flag indicating presence of + * short_video_header; escape modes 0-3 are used if + * shortVideoHeader==0, and escape mode 4 is used when + * shortVideoHeader==1. + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after the block is encoded so that + * it points to the current byte in the bit stream buffer. + * pBitOffset - *pBitOffset is updated so that it points to the current bit + * position in the byte pointed by *ppBitStream. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - Bad arguments + * - At least one of the pointers: is NULL: ppBitStream, *ppBitStream, + * pBitOffset, pQDctBlkCoef + * - *pBitOffset < 0, or *pBitOffset >7. + * + */ +OMXResult omxVCM4P2_EncodeVLCZigzag_Inter ( + OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + const OMX_S16 *pQDctBlkCoef, + OMX_U8 pattern, + OMX_INT shortVideoHeader +); + + + +/** + * Function: omxVCM4P2_EncodeMV (6.2.4.5.4) + * + * Description: + * Predicts a motion vector for the current macroblock, encodes the + * difference, and writes the output to the stream buffer. The input MVs + * pMVCurMB, pSrcMVLeftMB, pSrcMVUpperMB, and pSrcMVUpperRightMB should lie + * within the ranges associated with the input parameter fcodeForward, as + * described in [ISO14496-2], subclause 7.6.3. This function provides a + * superset of the functionality associated with the function + * omxVCM4P2_FindMVpred. + * + * Input Arguments: + * + * ppBitStream - double pointer to the current byte in the bitstream buffer + * pBitOffset - index of the first free (next available) bit in the stream + * buffer referenced by *ppBitStream, valid in the range 0 to 7. + * pMVCurMB - pointer to the current macroblock motion vector; a value of + * NULL indicates unavailability. + * pSrcMVLeftMB - pointer to the source left macroblock motion vector; a + * value of NULLindicates unavailability. + * pSrcMVUpperMB - pointer to source upper macroblock motion vector; a + * value of NULL indicates unavailability. + * pSrcMVUpperRightMB - pointer to source upper right MB motion vector; a + * value of NULL indicates unavailability. + * fcodeForward - an integer with values from 1 to 7; used in encoding + * motion vectors related to search range, as described in + * [ISO14496-2], subclause 7.6.3. + * MBType - macro block type, valid in the range 0 to 5 + * + * Output Arguments: + * + * ppBitStream - updated pointer to the current byte in the bit stream + * buffer + * pBitOffset - updated index of the next available bit position in stream + * buffer referenced by *ppBitStream + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments + * - At least one of the following pointers is NULL: ppBitStream, + * *ppBitStream, pBitOffset, pMVCurMB + * - *pBitOffset < 0, or *pBitOffset >7. + * - fcodeForward <= 0, or fcodeForward > 7, or MBType < 0. + * + */ +OMXResult omxVCM4P2_EncodeMV ( + OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + const OMXVCMotionVector *pMVCurMB, + const OMXVCMotionVector*pSrcMVLeftMB, + const OMXVCMotionVector *pSrcMVUpperMB, + const OMXVCMotionVector *pSrcMVUpperRightMB, + OMX_INT fcodeForward, + OMXVCM4P2MacroblockType MBType +); + + + +/** + * Function: omxVCM4P2_DecodePadMV_PVOP (6.2.5.1.1) + * + * Description: + * Decodes and pads the four motion vectors associated with a non-intra P-VOP + * macroblock. For macroblocks of type OMX_VC_INTER4V, the output MV is + * padded as specified in [ISO14496-2], subclause 7.6.1.6. Otherwise, for + * macroblocks of types other than OMX_VC_INTER4V, the decoded MV is copied to + * all four output MV buffer entries. + * + * Input Arguments: + * + * ppBitStream - pointer to the pointer to the current byte in the bit + * stream buffer + * pBitOffset - pointer to the bit position in the byte pointed to by + * *ppBitStream. *pBitOffset is valid within [0-7]. + * pSrcMVLeftMB, pSrcMVUpperMB, and pSrcMVUpperRightMB - pointers to the + * motion vector buffers of the macroblocks specially at the left, + * upper, and upper-right side of the current macroblock, + * respectively; a value of NULL indicates unavailability. Note: + * Any neighborhood macroblock outside the current VOP or video + * packet or outside the current GOB (when short_video_header is + * 1 ) for which gob_header_empty is 0 is treated as + * transparent, according to [ISO14496-2], subclause 7.6.5. + * fcodeForward - a code equal to vop_fcode_forward in MPEG-4 bit stream + * syntax + * MBType - the type of the current macroblock. If MBType is not equal to + * OMX_VC_INTER4V, the destination motion vector buffer is still + * filled with the same decoded vector. + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after the block is decoded, so + * that it points to the current byte in the bit stream buffer + * pBitOffset - *pBitOffset is updated so that it points to the current bit + * position in the byte pointed by *ppBitStream + * pDstMVCurMB - pointer to the motion vector buffer for the current + * macroblock; contains four decoded motion vectors + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments: + * - At least one of the following pointers is NULL: + * ppBitStream, *ppBitStream, pBitOffset, pDstMVCurMB + * - *pBitOffset exceeds [0,7] + * - fcodeForward exceeds (0,7] + * - MBType less than zero + * - motion vector buffer is not 4-byte aligned. + * OMX_Sts_Err - status error + * + */ +OMXResult omxVCM4P2_DecodePadMV_PVOP ( + const OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + OMXVCMotionVector *pSrcMVLeftMB, + OMXVCMotionVector*pSrcMVUpperMB, + OMXVCMotionVector *pSrcMVUpperRightMB, + OMXVCMotionVector*pDstMVCurMB, + OMX_INT fcodeForward, + OMXVCM4P2MacroblockType MBType +); + + + +/** + * Function: omxVCM4P2_DecodeVLCZigzag_IntraDCVLC (6.2.5.2.2) + * + * Description: + * Performs VLC decoding and inverse zigzag scan of AC and DC coefficients + * for one intra block. Two versions of the function (DCVLC and ACVLC) are + * provided in order to support the two different methods of processing DC + * coefficients, as described in [ISO14496-2], subclause 7.4.1.4, Intra DC + * Coefficient Decoding for the Case of Switched VLC Encoding. + * + * Input Arguments: + * + * ppBitStream - pointer to the pointer to the current byte in the + * bitstream buffer + * pBitOffset - pointer to the bit position in the current byte referenced + * by *ppBitStream. The parameter *pBitOffset is valid in the + * range [0-7]. + * Bit Position in one byte: |Most Least| + * *pBitOffset |0 1 2 3 4 5 6 7| + * predDir - AC prediction direction; used to select the zigzag scan + * pattern; takes one of the following values: + * - OMX_VC_NONE - AC prediction not used; + * performs classical zigzag scan. + * - OMX_VC_HORIZONTAL - Horizontal prediction; + * performs alternate-vertical zigzag scan; + * - OMX_VC_VERTICAL - Vertical prediction; + * performs alternate-horizontal zigzag scan. + * shortVideoHeader - binary flag indicating presence of + * short_video_header; escape modes 0-3 are used if + * shortVideoHeader==0, and escape mode 4 is used when + * shortVideoHeader==1. + * videoComp - video component type (luminance or chrominance) of the + * current block + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after the block is decoded such + * that it points to the current byte in the bit stream buffer + * pBitOffset - *pBitOffset is updated such that it points to the current + * bit position in the byte pointed by *ppBitStream + * pDst - pointer to the coefficient buffer of current block; must be + * 4-byte aligned. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments, if: + * - At least one of the following pointers is NULL: + * ppBitStream, *ppBitStream, pBitOffset, pDst + * - *pBitOffset exceeds [0,7] + * - preDir exceeds [0,2] + * - pDst is not 4-byte aligned + * OMX_Sts_Err - if: + * - In DecodeVLCZigzag_IntraDCVLC, dc_size > 12 + * - At least one of mark bits equals zero + * - Illegal stream encountered; code cannot be located in VLC table + * - Forbidden code encountered in the VLC FLC table. + * - The number of coefficients is greater than 64 + * + */ +OMXResult omxVCM4P2_DecodeVLCZigzag_IntraDCVLC ( + const OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + OMX_S16 *pDst, + OMX_U8 predDir, + OMX_INT shortVideoHeader, + OMXVCM4P2VideoComponent videoComp +); + + + +/** + * Function: omxVCM4P2_DecodeVLCZigzag_IntraACVLC (6.2.5.2.2) + * + * Description: + * Performs VLC decoding and inverse zigzag scan of AC and DC coefficients + * for one intra block. Two versions of the function (DCVLC and ACVLC) are + * provided in order to support the two different methods of processing DC + * coefficients, as described in [ISO14496-2], subclause 7.4.1.4, Intra DC + * Coefficient Decoding for the Case of Switched VLC Encoding. + * + * Input Arguments: + * + * ppBitStream - pointer to the pointer to the current byte in the + * bitstream buffer + * pBitOffset - pointer to the bit position in the current byte referenced + * by *ppBitStream. The parameter *pBitOffset is valid in the + * range [0-7]. Bit Position in one byte: |Most Least| *pBitOffset + * |0 1 2 3 4 5 6 7| + * predDir - AC prediction direction; used to select the zigzag scan + * pattern; takes one of the following values: OMX_VC_NONE - AC + * prediction not used; performs classical zigzag scan. + * OMX_VC_HORIZONTAL - Horizontal prediction; performs + * alternate-vertical zigzag scan; OMX_VC_VERTICAL - Vertical + * prediction; performs alternate-horizontal zigzag scan. + * shortVideoHeader - binary flag indicating presence of + * short_video_header; escape modes 0-3 are used if + * shortVideoHeader==0, and escape mode 4 is used when + * shortVideoHeader==1. + * videoComp - video component type (luminance or chrominance) of the + * current block + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after the block is decoded such + * that it points to the current byte in the bit stream buffer + * pBitOffset - *pBitOffset is updated such that it points to the current + * bit position in the byte pointed by *ppBitStream + * pDst - pointer to the coefficient buffer of current block; must be + * 4-byte aligned. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments At least one of the following + * pointers is NULL: ppBitStream, *ppBitStream, pBitOffset, pDst, + * or At least one of the following conditions is true: + * *pBitOffset exceeds [0,7], preDir exceeds [0,2], or pDst is + * not 4-byte aligned + * OMX_Sts_Err In DecodeVLCZigzag_IntraDCVLC, dc_size > 12 At least one of + * mark bits equals zero Illegal stream encountered; code cannot + * be located in VLC table Forbidden code encountered in the VLC + * FLC table The number of coefficients is greater than 64 + * + */ +OMXResult omxVCM4P2_DecodeVLCZigzag_IntraACVLC ( + const OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + OMX_S16 *pDst, + OMX_U8 predDir, + OMX_INT shortVideoHeader +); + + + +/** + * Function: omxVCM4P2_DecodeVLCZigzag_Inter (6.2.5.2.3) + * + * Description: + * Performs VLC decoding and inverse zigzag scan for one inter-coded block. + * + * Input Arguments: + * + * ppBitStream - double pointer to the current byte in the stream buffer + * pBitOffset - pointer to the next available bit in the current stream + * byte referenced by *ppBitStream. The parameter *pBitOffset is + * valid within the range [0-7]. + * shortVideoHeader - binary flag indicating presence of + * short_video_header; escape modes 0-3 are used if + * shortVideoHeader==0, and escape mode 4 is used when + * shortVideoHeader==1. + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after the block is decoded such + * that it points to the current byte in the stream buffer + * pBitOffset - *pBitOffset is updated after decoding such that it points + * to the next available bit in the stream byte referenced by + * *ppBitStream + * pDst - pointer to the coefficient buffer of current block; must be + * 4-byte aligned. + * + * Return Value: + * + * OMX_Sts_BadArgErr - bad arguments: + * - At least one of the following pointers is NULL: + * ppBitStream, *ppBitStream, pBitOffset, pDst + * - pDst is not 4-byte aligned + * - *pBitOffset exceeds [0,7] + * OMX_Sts_Err - status error, if: + * - At least one mark bit is equal to zero + * - Encountered an illegal stream code that cannot be found in the VLC table + * - Encountered an illegal code in the VLC FLC table + * - The number of coefficients is greater than 64 + * + */ +OMXResult omxVCM4P2_DecodeVLCZigzag_Inter ( + const OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + OMX_S16 *pDst, + OMX_INT shortVideoHeader +); + + + +/** + * Function: omxVCM4P2_QuantInvIntra_I (6.2.5.3.2) + * + * Description: + * Performs the second inverse quantization mode on an intra/inter coded + * block. Supports bits_per_pixel = 8. The output coefficients are clipped to + * the range [-2048, 2047]. + * + * Input Arguments: + * + * pSrcDst - pointer to the input (quantized) intra/inter block; must be + * aligned on a 16-byte boundary. + * QP - quantization parameter (quantizer_scale) + * videoComp - video component type of the current block. Takes one of the + * following flags: OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE (intra + * version only). + * shortVideoHeader - binary flag indicating presence of short_video_header + * (intra version only). + * + * Output Arguments: + * + * pSrcDst - pointer to the output (dequantized) intra/inter block + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; one or more of the following is + * true: + * - pSrcDst is NULL + * - QP <= 0 or QP >=31 + * - videoComp is neither OMX_VC_LUMINANCE nor OMX_VC_CHROMINANCE. + * + */ +OMXResult omxVCM4P2_QuantInvIntra_I ( + OMX_S16 *pSrcDst, + OMX_INT QP, + OMXVCM4P2VideoComponent videoComp, + OMX_INT shortVideoHeader +); + + + +/** + * Function: omxVCM4P2_QuantInvInter_I (6.2.5.3.2) + * + * Description: + * Performs the second inverse quantization mode on an intra/inter coded + * block. Supports bits_per_pixel = 8. The output coefficients are clipped to + * the range [-2048, 2047]. + * + * Input Arguments: + * + * pSrcDst - pointer to the input (quantized) intra/inter block; must be + * aligned on a 16-byte boundary. + * QP - quantization parameter (quantizer_scale) + * videoComp - video component type of the current block. Takes one of the + * following flags: OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE (intra + * version only). + * shortVideoHeader - binary flag indicating presence of short_video_header + * (intra version only). + * + * Output Arguments: + * + * pSrcDst - pointer to the output (dequantized) intra/inter block + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; one or more of the following is + * true: + * - pSrcDst is NULL + * - QP <= 0 or QP >=31 + * - videoComp is neither OMX_VC_LUMINANCE nor OMX_VC_CHROMINANCE. + * + */ +OMXResult omxVCM4P2_QuantInvInter_I ( + OMX_S16 *pSrcDst, + OMX_INT QP +); + + + +/** + * Function: omxVCM4P2_DecodeBlockCoef_Intra (6.2.5.4.1) + * + * Description: + * Decodes the INTRA block coefficients. Inverse quantization, inversely + * zigzag positioning, and IDCT, with appropriate clipping on each step, are + * performed on the coefficients. The results are then placed in the output + * frame/plane on a pixel basis. Note: This function will be used only when + * at least one non-zero AC coefficient of current block exists in the bit + * stream. The DC only condition will be handled in another function. + * + * + * Input Arguments: + * + * ppBitStream - pointer to the pointer to the current byte in the bit + * stream buffer. There is no boundary check for the bit stream + * buffer. + * pBitOffset - pointer to the bit position in the byte pointed to by + * *ppBitStream. *pBitOffset is valid within [0-7]. + * step - width of the destination plane + * pCoefBufRow - pointer to the coefficient row buffer; must be aligned on + * an 8-byte boundary. + * pCoefBufCol - pointer to the coefficient column buffer; must be aligned + * on an 8-byte boundary. + * curQP - quantization parameter of the macroblock which the current block + * belongs to + * pQPBuf - pointer to the quantization parameter buffer + * blockIndex - block index indicating the component type and position as + * defined in [ISO14496-2], subclause 6.1.3.8, Figure 6-5. + * intraDCVLC - a code determined by intra_dc_vlc_thr and QP. This allows a + * mechanism to switch between two VLC for coding of Intra DC + * coefficients as per [ISO14496-2], Table 6-21. + * ACPredFlag - a flag equal to ac_pred_flag (of luminance) indicating if + * the ac coefficients of the first row or first column are + * differentially coded for intra coded macroblock. + * shortVideoHeader - binary flag indicating presence of + * short_video_header; shortVideoHeader==1 selects linear intra DC + * mode, and shortVideoHeader==0 selects non linear intra DC mode. + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after the block is decoded, so + * that it points to the current byte in the bit stream buffer + * pBitOffset - *pBitOffset is updated so that it points to the current bit + * position in the byte pointed by *ppBitStream + * pDst - pointer to the block in the destination plane; must be aligned on + * an 8-byte boundary. + * pCoefBufRow - pointer to the updated coefficient row buffer. + * pCoefBufCol - pointer to the updated coefficient column buffer Note: + * The coefficient buffers must be updated in accordance with the + * update procedure defined in section 6.2.2. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments, if: + * - At least one of the following pointers is NULL: + * ppBitStream, *ppBitStream, pBitOffset, pCoefBufRow, pCoefBufCol, + * pQPBuf, pDst. + * - *pBitOffset exceeds [0,7] + * - curQP exceeds (1, 31) + * - blockIndex exceeds [0,5] + * - step is not the multiple of 8 + * - a pointer alignment requirement was violated. + * OMX_Sts_Err - status error. Refer to OMX_Sts_Err of DecodeVLCZigzag_Intra. + * + */ +OMXResult omxVCM4P2_DecodeBlockCoef_Intra ( + const OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + OMX_U8 *pDst, + OMX_INT step, + OMX_S16 *pCoefBufRow, + OMX_S16 *pCoefBufCol, + OMX_U8 curQP, + const OMX_U8 *pQPBuf, + OMX_INT blockIndex, + OMX_INT intraDCVLC, + OMX_INT ACPredFlag, + OMX_INT shortVideoHeader +); + + + +/** + * Function: omxVCM4P2_DecodeBlockCoef_Inter (6.2.5.4.2) + * + * Description: + * Decodes the INTER block coefficients. This function performs inverse + * quantization, inverse zigzag positioning, and IDCT (with appropriate + * clipping on each step) on the coefficients. The results (residuals) are + * placed in a contiguous array of 64 elements. For INTER block, the output + * buffer holds the residuals for further reconstruction. + * + * Input Arguments: + * + * ppBitStream - pointer to the pointer to the current byte in the bit + * stream buffer. There is no boundary check for the bit stream + * buffer. + * pBitOffset - pointer to the bit position in the byte pointed to by + * *ppBitStream. *pBitOffset is valid within [0-7] + * QP - quantization parameter + * shortVideoHeader - binary flag indicating presence of + * short_video_header; shortVideoHeader==1 selects linear intra DC + * mode, and shortVideoHeader==0 selects non linear intra DC mode. + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after the block is decoded, so + * that it points to the current byte in the bit stream buffer + * pBitOffset - *pBitOffset is updated so that it points to the current bit + * position in the byte pointed by *ppBitStream + * pDst - pointer to the decoded residual buffer (a contiguous array of 64 + * elements of OMX_S16 data type); must be aligned on a 16-byte + * boundary. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments, if: + * - At least one of the following pointers is Null: + * ppBitStream, *ppBitStream, pBitOffset , pDst + * - *pBitOffset exceeds [0,7] + * - QP <= 0. + * - pDst is not 16-byte aligned + * OMX_Sts_Err - status error. Refer to OMX_Sts_Err of DecodeVLCZigzag_Inter . + * + */ +OMXResult omxVCM4P2_DecodeBlockCoef_Inter ( + const OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + OMX_S16 *pDst, + OMX_INT QP, + OMX_INT shortVideoHeader +); + + + +/** + * Function: omxVCM4P2_PredictReconCoefIntra (6.2.5.4.3) + * + * Description: + * Performs adaptive DC/AC coefficient prediction for an intra block. Prior + * to the function call, prediction direction (predDir) should be selected as + * specified in [ISO14496-2], subclause 7.4.3.1. + * + * Input Arguments: + * + * pSrcDst - pointer to the coefficient buffer which contains the quantized + * coefficient residuals (PQF) of the current block; must be + * aligned on a 4-byte boundary. The output coefficients are + * saturated to the range [-2048, 2047]. + * pPredBufRow - pointer to the coefficient row buffer; must be aligned on + * a 4-byte boundary. + * pPredBufCol - pointer to the coefficient column buffer; must be aligned + * on a 4-byte boundary. + * curQP - quantization parameter of the current block. curQP may equal to + * predQP especially when the current block and the predictor block + * are in the same macroblock. + * predQP - quantization parameter of the predictor block + * predDir - indicates the prediction direction which takes one of the + * following values: OMX_VC_HORIZONTAL - predict horizontally + * OMX_VC_VERTICAL - predict vertically + * ACPredFlag - a flag indicating if AC prediction should be performed. It + * is equal to ac_pred_flag in the bit stream syntax of MPEG-4 + * videoComp - video component type (luminance or chrominance) of the + * current block + * + * Output Arguments: + * + * pSrcDst - pointer to the coefficient buffer which contains the quantized + * coefficients (QF) of the current block + * pPredBufRow - pointer to the updated coefficient row buffer + * pPredBufCol - pointer to the updated coefficient column buffer Note: + * Buffer update: Update the AC prediction buffer (both row and + * column buffer). + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments, if: + * - At least one of the pointers is NULL: + * pSrcDst, pPredBufRow, or pPredBufCol. + * - curQP <= 0, + * - predQP <= 0, + * - curQP >31, + * - predQP > 31, + * - preDir exceeds [1,2] + * - pSrcDst, pPredBufRow, or pPredBufCol is not 4-byte aligned. + * + */ +OMXResult omxVCM4P2_PredictReconCoefIntra ( + OMX_S16 *pSrcDst, + OMX_S16 *pPredBufRow, + OMX_S16 *pPredBufCol, + OMX_INT curQP, + OMX_INT predQP, + OMX_INT predDir, + OMX_INT ACPredFlag, + OMXVCM4P2VideoComponent videoComp +); + + + +/** + * Function: omxVCM4P2_MCReconBlock (6.2.5.5.1) + * + * Description: + * Performs motion compensation prediction for an 8x8 block using + * interpolation described in [ISO14496-2], subclause 7.6.2. + * + * Input Arguments: + * + * pSrc - pointer to the block in the reference plane. + * srcStep - distance between the start of consecutive lines in the + * reference plane, in bytes; must be a multiple of 8. + * dstStep - distance between the start of consecutive lines in the + * destination plane, in bytes; must be a multiple of 8. + * pSrcResidue - pointer to a buffer containing the 16-bit prediction + * residuals; must be 16-byte aligned. If the pointer is NULL, then + * no prediction is done, only motion compensation, i.e., the block + * is moved with interpolation. + * predictType - bilinear interpolation type, as defined in section + * 6.2.1.2. + * rndVal - rounding control parameter: 0 - disabled; 1 - enabled. + * + * Output Arguments: + * + * pDst - pointer to the destination buffer; must be 8-byte aligned. If + * prediction residuals are added then output intensities are + * clipped to the range [0,255]. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned under any of the following + * conditions: + * - pDst is not 8-byte aligned. + * - pSrcResidue is not 16-byte aligned. + * - one or more of the following pointers is NULL: pSrc or pDst. + * - either srcStep or dstStep is not a multiple of 8. + * - invalid type specified for the parameter predictType. + * - the parameter rndVal is not equal either to 0 or 1. + * + */ +OMXResult omxVCM4P2_MCReconBlock ( + const OMX_U8 *pSrc, + OMX_INT srcStep, + const OMX_S16 *pSrcResidue, + OMX_U8 *pDst, + OMX_INT dstStep, + OMX_INT predictType, + OMX_INT rndVal +); + + + +/* 6.3.1.1 Intra 16x16 Prediction Modes */ +/* A data type that enumerates intra_16x16 macroblock prediction modes is defined as follows: */ + +typedef enum { + OMX_VC_16X16_VERT = 0, /** Intra_16x16_Vertical */ + OMX_VC_16X16_HOR = 1, /** Intra_16x16_Horizontal */ + OMX_VC_16X16_DC = 2, /** Intra_16x16_DC */ + OMX_VC_16X16_PLANE = 3 /** Intra_16x16_Plane */ +} OMXVCM4P10Intra16x16PredMode; + + + +/* 6.3.1.2 Intra 4x4 Prediction Modes */ +/* A data type that enumerates intra_4x4 macroblock prediction modes is defined as follows: */ + +typedef enum { + OMX_VC_4X4_VERT = 0, /** Intra_4x4_Vertical */ + OMX_VC_4X4_HOR = 1, /** Intra_4x4_Horizontal */ + OMX_VC_4X4_DC = 2, /** Intra_4x4_DC */ + OMX_VC_4X4_DIAG_DL = 3, /** Intra_4x4_Diagonal_Down_Left */ + OMX_VC_4X4_DIAG_DR = 4, /** Intra_4x4_Diagonal_Down_Right */ + OMX_VC_4X4_VR = 5, /** Intra_4x4_Vertical_Right */ + OMX_VC_4X4_HD = 6, /** Intra_4x4_Horizontal_Down */ + OMX_VC_4X4_VL = 7, /** Intra_4x4_Vertical_Left */ + OMX_VC_4X4_HU = 8 /** Intra_4x4_Horizontal_Up */ +} OMXVCM4P10Intra4x4PredMode; + + + +/* 6.3.1.3 Chroma Prediction Modes */ +/* A data type that enumerates intra chroma prediction modes is defined as follows: */ + +typedef enum { + OMX_VC_CHROMA_DC = 0, /** Intra_Chroma_DC */ + OMX_VC_CHROMA_HOR = 1, /** Intra_Chroma_Horizontal */ + OMX_VC_CHROMA_VERT = 2, /** Intra_Chroma_Vertical */ + OMX_VC_CHROMA_PLANE = 3 /** Intra_Chroma_Plane */ +} OMXVCM4P10IntraChromaPredMode; + + + +/* 6.3.1.4 Motion Estimation Modes */ +/* A data type that enumerates H.264 motion estimation modes is defined as follows: */ + +typedef enum { + OMX_VC_M4P10_FAST_SEARCH = 0, /** Fast motion search */ + OMX_VC_M4P10_FULL_SEARCH = 1 /** Full motion search */ +} OMXVCM4P10MEMode; + + + +/* 6.3.1.5 Macroblock Types */ +/* A data type that enumerates H.264 macroblock types is defined as follows: */ + +typedef enum { + OMX_VC_P_16x16 = 0, /* defined by [ISO14496-10] */ + OMX_VC_P_16x8 = 1, + OMX_VC_P_8x16 = 2, + OMX_VC_P_8x8 = 3, + OMX_VC_PREF0_8x8 = 4, + OMX_VC_INTER_SKIP = 5, + OMX_VC_INTRA_4x4 = 8, + OMX_VC_INTRA_16x16 = 9, + OMX_VC_INTRA_PCM = 10 +} OMXVCM4P10MacroblockType; + + + +/* 6.3.1.6 Sub-Macroblock Types */ +/* A data type that enumerates H.264 sub-macroblock types is defined as follows: */ + +typedef enum { + OMX_VC_SUB_P_8x8 = 0, /* defined by [ISO14496-10] */ + OMX_VC_SUB_P_8x4 = 1, + OMX_VC_SUB_P_4x8 = 2, + OMX_VC_SUB_P_4x4 = 3 +} OMXVCM4P10SubMacroblockType; + + + +/* 6.3.1.7 Variable Length Coding (VLC) Information */ + +typedef struct { + OMX_U8 uTrailing_Ones; /* Trailing ones; 3 at most */ + OMX_U8 uTrailing_One_Signs; /* Trailing ones signal */ + OMX_U8 uNumCoeffs; /* Total number of non-zero coefs, including trailing ones */ + OMX_U8 uTotalZeros; /* Total number of zero coefs */ + OMX_S16 iLevels[16]; /* Levels of non-zero coefs, in reverse zig-zag order */ + OMX_U8 uRuns[16]; /* Runs for levels and trailing ones, in reverse zig-zag order */ +} OMXVCM4P10VLCInfo; + + + +/* 6.3.1.8 Macroblock Information */ + +typedef struct { + OMX_S32 sliceId; /* slice number */ + OMXVCM4P10MacroblockType mbType; /* MB type */ + OMXVCM4P10SubMacroblockType subMBType[4]; /* sub-block type */ + OMX_S32 qpy; /* qp for luma */ + OMX_S32 qpc; /* qp for chroma */ + OMX_U32 cbpy; /* CBP Luma */ + OMX_U32 cbpc; /* CBP Chroma */ + OMXVCMotionVector pMV0[4][4]; /* motion vector, represented using 1/4-pel units, pMV0[blocky][blockx] (blocky = 0~3, blockx =0~3) */ + OMXVCMotionVector pMVPred[4][4]; /* motion vector prediction, Represented using 1/4-pel units, pMVPred[blocky][blockx] (blocky = 0~3, blockx = 0~3) */ + OMX_U8 pRefL0Idx[4]; /* reference picture indices */ + OMXVCM4P10Intra16x16PredMode Intra16x16PredMode; /* best intra 16x16 prediction mode */ + OMXVCM4P10Intra4x4PredMode pIntra4x4PredMode[16]; /* best intra 4x4 prediction mode for each block, pMV0 indexed as above */ +} OMXVCM4P10MBInfo, *OMXVCM4P10MBInfoPtr; + + + +/* 6.3.1.9 Motion Estimation Parameters */ + +typedef struct { + OMX_S32 blockSplitEnable8x8; /* enables 16x8, 8x16, 8x8 */ + OMX_S32 blockSplitEnable4x4; /* enable splitting of 8x4, 4x8, 4x4 blocks */ + OMX_S32 halfSearchEnable; + OMX_S32 quarterSearchEnable; + OMX_S32 intraEnable4x4; /* 1=enable, 0=disable */ + OMX_S32 searchRange16x16; /* integer pixel units */ + OMX_S32 searchRange8x8; + OMX_S32 searchRange4x4; +} OMXVCM4P10MEParams; + + + +/** + * Function: omxVCM4P10_PredictIntra_4x4 (6.3.3.1.1) + * + * Description: + * Perform Intra_4x4 prediction for luma samples. If the upper-right block is + * not available, then duplication work should be handled inside the function. + * Users need not define them outside. + * + * Input Arguments: + * + * pSrcLeft - Pointer to the buffer of 4 left pixels: + * p[x, y] (x = -1, y = 0..3) + * pSrcAbove - Pointer to the buffer of 8 above pixels: + * p[x,y] (x = 0..7, y =-1); + * must be aligned on a 4-byte boundary. + * pSrcAboveLeft - Pointer to the above left pixels: p[x,y] (x = -1, y = -1) + * leftStep - Step of left pixel buffer; must be a multiple of 4. + * dstStep - Step of the destination buffer; must be a multiple of 4. + * predMode - Intra_4x4 prediction mode. + * availability - Neighboring 4x4 block availability flag, refer to + * "Neighboring Macroblock Availability" . + * + * Output Arguments: + * + * pDst - Pointer to the destination buffer; must be aligned on a 4-byte + * boundary. + * + * Return Value: + * If the function runs without error, it returns OMX_Sts_NoErr. + * If one of the following cases occurs, the function returns + * OMX_Sts_BadArgErr: + * pDst is NULL. + * dstStep < 4, or dstStep is not a multiple of 4. + * leftStep is not a multiple of 4. + * predMode is not in the valid range of enumeration + * OMXVCM4P10Intra4x4PredMode. + * predMode is OMX_VC_4x4_VERT, but availability doesn't set OMX_VC_UPPER + * indicating p[x,-1] (x = 0..3) is not available. + * predMode is OMX_VC_4x4_HOR, but availability doesn't set OMX_VC_LEFT + * indicating p[-1,y] (y = 0..3) is not available. + * predMode is OMX_VC_4x4_DIAG_DL, but availability doesn't set + * OMX_VC_UPPER indicating p[x, -1] (x = 0..3) is not available. + * predMode is OMX_VC_4x4_DIAG_DR, but availability doesn't set + * OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating + * p[x,-1] (x = 0..3), or p[-1,y] (y = 0..3) or p[-1,-1] is not + * available. + * predMode is OMX_VC_4x4_VR, but availability doesn't set + * OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating + * p[x,-1] (x = 0..3), or p[-1,y] (y = 0..3) or p[-1,-1] is not + * available. + * predMode is OMX_VC_4x4_HD, but availability doesn't set + * OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating + * p[x,-1] (x = 0..3), or p[-1,y] (y = 0..3) or p[-1,-1] is not + * available. + * predMode is OMX_VC_4x4_VL, but availability doesn't set OMX_VC_UPPER + * indicating p[x,-1] (x = 0..3) is not available. + * predMode is OMX_VC_4x4_HU, but availability doesn't set OMX_VC_LEFT + * indicating p[-1,y] (y = 0..3) is not available. + * availability sets OMX_VC_UPPER, but pSrcAbove is NULL. + * availability sets OMX_VC_LEFT, but pSrcLeft is NULL. + * availability sets OMX_VC_UPPER_LEFT, but pSrcAboveLeft is NULL. + * either pSrcAbove or pDst is not aligned on a 4-byte boundary. + * + * Note: + * pSrcAbove, pSrcAbove, pSrcAboveLeft may be invalid pointers if + * they are not used by intra prediction as implied in predMode. + * + */ +OMXResult omxVCM4P10_PredictIntra_4x4 ( + const OMX_U8 *pSrcLeft, + const OMX_U8 *pSrcAbove, + const OMX_U8 *pSrcAboveLeft, + OMX_U8 *pDst, + OMX_INT leftStep, + OMX_INT dstStep, + OMXVCM4P10Intra4x4PredMode predMode, + OMX_S32 availability +); + + + +/** + * Function: omxVCM4P10_PredictIntra_16x16 (6.3.3.1.2) + * + * Description: + * Perform Intra_16x16 prediction for luma samples. If the upper-right block + * is not available, then duplication work should be handled inside the + * function. Users need not define them outside. + * + * Input Arguments: + * + * pSrcLeft - Pointer to the buffer of 16 left pixels: p[x, y] (x = -1, y = + * 0..15) + * pSrcAbove - Pointer to the buffer of 16 above pixels: p[x,y] (x = 0..15, + * y= -1); must be aligned on a 16-byte boundary. + * pSrcAboveLeft - Pointer to the above left pixels: p[x,y] (x = -1, y = -1) + * leftStep - Step of left pixel buffer; must be a multiple of 16. + * dstStep - Step of the destination buffer; must be a multiple of 16. + * predMode - Intra_16x16 prediction mode, please refer to section 3.4.1. + * availability - Neighboring 16x16 MB availability flag. Refer to + * section 3.4.4. + * + * Output Arguments: + * + * pDst -Pointer to the destination buffer; must be aligned on a 16-byte + * boundary. + * + * Return Value: + * If the function runs without error, it returns OMX_Sts_NoErr. + * If one of the following cases occurs, the function returns + * OMX_Sts_BadArgErr: + * pDst is NULL. + * dstStep < 16. or dstStep is not a multiple of 16. + * leftStep is not a multiple of 16. + * predMode is not in the valid range of enumeration + * OMXVCM4P10Intra16x16PredMode + * predMode is OMX_VC_16X16_VERT, but availability doesn't set + * OMX_VC_UPPER indicating p[x,-1] (x = 0..15) is not available. + * predMode is OMX_VC_16X16_HOR, but availability doesn't set OMX_VC_LEFT + * indicating p[-1,y] (y = 0..15) is not available. + * predMode is OMX_VC_16X16_PLANE, but availability doesn't set + * OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating + * p[x,-1](x = 0..15), or p[-1,y] (y = 0..15), or p[-1,-1] is not + * available. + * availability sets OMX_VC_UPPER, but pSrcAbove is NULL. + * availability sets OMX_VC_LEFT, but pSrcLeft is NULL. + * availability sets OMX_VC_UPPER_LEFT, but pSrcAboveLeft is NULL. + * either pSrcAbove or pDst is not aligned on a 16-byte boundary. + * + * Note: + * pSrcAbove, pSrcAbove, pSrcAboveLeft may be invalid pointers if + * they are not used by intra prediction implied in predMode. + * Note: + * OMX_VC_UPPER_RIGHT is not used in intra_16x16 luma prediction. + * + */ +OMXResult omxVCM4P10_PredictIntra_16x16 ( + const OMX_U8 *pSrcLeft, + const OMX_U8 *pSrcAbove, + const OMX_U8 *pSrcAboveLeft, + OMX_U8 *pDst, + OMX_INT leftStep, + OMX_INT dstStep, + OMXVCM4P10Intra16x16PredMode predMode, + OMX_S32 availability +); + + + +/** + * Function: omxVCM4P10_PredictIntraChroma_8x8 (6.3.3.1.3) + * + * Description: + * Performs intra prediction for chroma samples. + * + * Input Arguments: + * + * pSrcLeft - Pointer to the buffer of 8 left pixels: p[x, y] (x = -1, y= + * 0..7). + * pSrcAbove - Pointer to the buffer of 8 above pixels: p[x,y] (x = 0..7, y + * = -1); must be aligned on an 8-byte boundary. + * pSrcAboveLeft - Pointer to the above left pixels: p[x,y] (x = -1, y = -1) + * leftStep - Step of left pixel buffer; must be a multiple of 8. + * dstStep - Step of the destination buffer; must be a multiple of 8. + * predMode - Intra chroma prediction mode, please refer to section 3.4.3. + * availability - Neighboring chroma block availability flag, please refer + * to "Neighboring Macroblock Availability". + * + * Output Arguments: + * + * pDst - Pointer to the destination buffer; must be aligned on an 8-byte + * boundary. + * + * Return Value: + * If the function runs without error, it returns OMX_Sts_NoErr. + * If any of the following cases occurs, the function returns + * OMX_Sts_BadArgErr: + * pDst is NULL. + * dstStep < 8 or dstStep is not a multiple of 8. + * leftStep is not a multiple of 8. + * predMode is not in the valid range of enumeration + * OMXVCM4P10IntraChromaPredMode. + * predMode is OMX_VC_CHROMA_VERT, but availability doesn't set + * OMX_VC_UPPER indicating p[x,-1] (x = 0..7) is not available. + * predMode is OMX_VC_CHROMA_HOR, but availability doesn't set OMX_VC_LEFT + * indicating p[-1,y] (y = 0..7) is not available. + * predMode is OMX_VC_CHROMA_PLANE, but availability doesn't set + * OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating + * p[x,-1](x = 0..7), or p[-1,y] (y = 0..7), or p[-1,-1] is not + * available. + * availability sets OMX_VC_UPPER, but pSrcAbove is NULL. + * availability sets OMX_VC_LEFT, but pSrcLeft is NULL. + * availability sets OMX_VC_UPPER_LEFT, but pSrcAboveLeft is NULL. + * either pSrcAbove or pDst is not aligned on a 8-byte boundary. + * + * Note: pSrcAbove, pSrcAbove, pSrcAboveLeft may be invalid pointer if + * they are not used by intra prediction implied in predMode. + * + * Note: OMX_VC_UPPER_RIGHT is not used in intra chroma prediction. + * + */ +OMXResult omxVCM4P10_PredictIntraChroma_8x8 ( + const OMX_U8 *pSrcLeft, + const OMX_U8 *pSrcAbove, + const OMX_U8 *pSrcAboveLeft, + OMX_U8 *pDst, + OMX_INT leftStep, + OMX_INT dstStep, + OMXVCM4P10IntraChromaPredMode predMode, + OMX_S32 availability +); + + + +/** + * Function: omxVCM4P10_InterpolateLuma (6.3.3.2.1) + * + * Description: + * Performs quarter-pixel interpolation for inter luma MB. It is assumed that + * the frame is already padded when calling this function. + * + * Input Arguments: + * + * pSrc - Pointer to the source reference frame buffer + * srcStep - reference frame step, in bytes; must be a multiple of roi.width + * dstStep - destination frame step, in bytes; must be a multiple of + * roi.width + * dx - Fractional part of horizontal motion vector component in 1/4 pixel + * unit; valid in the range [0,3] + * dy - Fractional part of vertical motion vector y component in 1/4 pixel + * unit; valid in the range [0,3] + * roi - Dimension of the interpolation region; the parameters roi.width and + * roi.height must be equal to either 4, 8, or 16. + * + * Output Arguments: + * + * pDst - Pointer to the destination frame buffer: + * if roi.width==4, 4-byte alignment required + * if roi.width==8, 8-byte alignment required + * if roi.width==16, 16-byte alignment required + * + * Return Value: + * If the function runs without error, it returns OMX_Sts_NoErr. + * If one of the following cases occurs, the function returns + * OMX_Sts_BadArgErr: + * pSrc or pDst is NULL. + * srcStep or dstStep < roi.width. + * dx or dy is out of range [0,3]. + * roi.width or roi.height is out of range {4, 8, 16}. + * roi.width is equal to 4, but pDst is not 4 byte aligned. + * roi.width is equal to 8 or 16, but pDst is not 8 byte aligned. + * srcStep or dstStep is not a multiple of 8. + * + */ +OMXResult omxVCM4P10_InterpolateLuma ( + const OMX_U8 *pSrc, + OMX_S32 srcStep, + OMX_U8 *pDst, + OMX_S32 dstStep, + OMX_S32 dx, + OMX_S32 dy, + OMXSize roi +); + + + +/** + * Function: omxVCM4P10_InterpolateChroma (6.3.3.2.2) + * + * Description: + * Performs 1/8-pixel interpolation for inter chroma MB. + * + * Input Arguments: + * + * pSrc -Pointer to the source reference frame buffer + * srcStep -Reference frame step in bytes + * dstStep -Destination frame step in bytes; must be a multiple of + * roi.width. + * dx -Fractional part of horizontal motion vector component in 1/8 pixel + * unit; valid in the range [0,7] + * dy -Fractional part of vertical motion vector component in 1/8 pixel + * unit; valid in the range [0,7] + * roi -Dimension of the interpolation region; the parameters roi.width and + * roi.height must be equal to either 2, 4, or 8. + * + * Output Arguments: + * + * pDst -Pointer to the destination frame buffer: + * if roi.width==2, 2-byte alignment required + * if roi.width==4, 4-byte alignment required + * if roi.width==8, 8-byte alignment required + * + * Return Value: + * If the function runs without error, it returns OMX_Sts_NoErr. + * If one of the following cases occurs, the function returns + * OMX_Sts_BadArgErr: + * pSrc or pDst is NULL. + * srcStep or dstStep < 8. + * dx or dy is out of range [0-7]. + * roi.width or roi.height is out of range {2,4,8}. + * roi.width is equal to 2, but pDst is not 2-byte aligned. + * roi.width is equal to 4, but pDst is not 4-byte aligned. + * roi.width is equal to 8, but pDst is not 8 byte aligned. + * srcStep or dstStep is not a multiple of 8. + * + */ +OMXResult omxVCM4P10_InterpolateChroma ( + const OMX_U8 *pSrc, + OMX_S32 srcStep, + OMX_U8 *pDst, + OMX_S32 dstStep, + OMX_S32 dx, + OMX_S32 dy, + OMXSize roi +); + + + +/** + * Function: omxVCM4P10_FilterDeblockingLuma_VerEdge_I (6.3.3.3.1) + * + * Description: + * Performs in-place deblock filtering on four vertical edges of the luma + * macroblock (16x16). + * + * Input Arguments: + * + * pSrcDst - Pointer to the input macroblock; must be 16-byte aligned. + * srcdstStep -Step of the arrays; must be a multiple of 16. + * pAlpha -Array of size 2 of alpha thresholds (the first item is the alpha + * threshold for the external vertical edge, and the second item is + * for the internal vertical edge); per [ISO14496-10] alpha values + * must be in the range [0,255]. + * pBeta -Array of size 2 of beta thresholds (the first item is the beta + * threshold for the external vertical edge, and the second item is + * for the internal vertical edge); per [ISO14496-10] beta values + * must be in the range [0,18]. + * pThresholds -Array of size 16 of Thresholds (TC0) (values for the left + * edge of each 4x4 block, arranged in vertical block order); must + * be aligned on a 4-byte boundary.. Per [ISO14496-10] values must + * be in the range [0,25]. + * pBS -Array of size 16 of BS parameters (arranged in vertical block + * order); valid in the range [0,4] with the following + * restrictions: i) pBS[i]== 4 may occur only for 0<=i<=3, ii) + * pBS[i]== 4 if and only if pBS[i^3]== 4. Must be 4-byte aligned. + * + * Output Arguments: + * + * pSrcDst -Pointer to filtered output macroblock. + * + * Return Value: + * If the function runs without error, it returns OMX_Sts_NoErr. + * If one of the following cases occurs, the function returns + * OMX_Sts_BadArgErr: + * Either of the pointers in pSrcDst, pAlpha, pBeta, pThresholds, or pBS + * is NULL. + * Either pThresholds or pBS is not aligned on a 4-byte boundary. + * pSrcDst is not 16-byte aligned. + * srcdstStep is not a multiple of 16. + * pAlpha[0] and/or pAlpha[1] is outside the range [0,255]. + * pBeta[0] and/or pBeta[1] is outside the range [0,18]. + * One or more entries in the table pThresholds[0..15]is outside of the + * range [0,25]. + * pBS is out of range, i.e., one of the following conditions is true: + * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or (pBS[i]==4 && + * pBS[i^3]!=4) for 0<=i<=3. + * + */ +OMXResult omxVCM4P10_FilterDeblockingLuma_VerEdge_I ( + OMX_U8 *pSrcDst, + OMX_S32 srcdstStep, + const OMX_U8 *pAlpha, + const OMX_U8 *pBeta, + const OMX_U8 *pThresholds, + const OMX_U8 *pBS +); + + + +/** + * Function: omxVCM4P10_FilterDeblockingLuma_HorEdge_I (6.3.3.3.2) + * + * Description: + * Performs in-place deblock filtering on four horizontal edges of the luma + * macroblock (16x16). + * + * Input Arguments: + * + * pSrcDst - pointer to the input macroblock; must be 16-byte aligned. + * srcdstStep - step of the arrays; must be a multiple of 16. + * pAlpha - array of size 2 of alpha thresholds (the first item is the alpha + * threshold for the external vertical edge, and the second item is + * for the internal horizontal edge); per [ISO14496-10] alpha + * values must be in the range [0,255]. + * pBeta - array of size 2 of beta thresholds (the first item is the beta + * threshold for the external horizontal edge, and the second item + * is for the internal horizontal edge). Per [ISO14496-10] beta + * values must be in the range [0,18]. + * pThresholds - array of size 16 containing thresholds, TC0, for the top + * horizontal edge of each 4x4 block, arranged in horizontal block + * order; must be aligned on a 4-byte boundary. Per [ISO14496 10] + * values must be in the range [0,25]. + * pBS - array of size 16 of BS parameters (arranged in horizontal block + * order); valid in the range [0,4] with the following + * restrictions: i) pBS[i]== 4 may occur only for 0<=i<=3, ii) + * pBS[i]== 4 if and only if pBS[i^3]== 4. Must be 4-byte aligned. + * + * Output Arguments: + * + * pSrcDst -Pointer to filtered output macroblock. + * + * Return Value: + * + * OMX_Sts_NoErr, if the function runs without error. + * + * OMX_Sts_BadArgErr, if one of the following cases occurs: + * - one or more of the following pointers is NULL: pSrcDst, pAlpha, + * pBeta, pThresholds, or pBS. + * - either pThresholds or pBS is not aligned on a 4-byte boundary. + * - pSrcDst is not 16-byte aligned. + * - srcdstStep is not a multiple of 16. + * - pAlpha[0] and/or pAlpha[1] is outside the range [0,255]. + * - pBeta[0] and/or pBeta[1] is outside the range [0,18]. + * - One or more entries in the table pThresholds[0..15] is + * outside of the range [0,25]. + * - pBS is out of range, i.e., one of the following conditions is true: + * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or + * (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3. + * + */ +OMXResult omxVCM4P10_FilterDeblockingLuma_HorEdge_I ( + OMX_U8 *pSrcDst, + OMX_S32 srcdstStep, + const OMX_U8 *pAlpha, + const OMX_U8 *pBeta, + const OMX_U8 *pThresholds, + const OMX_U8 *pBS +); + + + +/** + * Function: omxVCM4P10_FilterDeblockingChroma_VerEdge_I (6.3.3.3.3) + * + * Description: + * Performs in-place deblock filtering on four vertical edges of the chroma + * macroblock (8x8). + * + * Input Arguments: + * + * pSrcDst - Pointer to the input macroblock; must be 8-byte aligned. + * srcdstStep - Step of the arrays; must be a multiple of 8. + * pAlpha - Array of size 2 of alpha thresholds (the first item is alpha + * threshold for external vertical edge, and the second item is for + * internal vertical edge); per [ISO14496-10] alpha values must be + * in the range [0,255]. + * pBeta - Array of size 2 of beta thresholds (the first item is the beta + * threshold for the external vertical edge, and the second item is + * for the internal vertical edge); per [ISO14496-10] beta values + * must be in the range [0,18]. + * pThresholds - Array of size 8 containing thresholds, TC0, for the left + * vertical edge of each 4x2 chroma block, arranged in vertical + * block order; must be aligned on a 4-byte boundary. Per + * [ISO14496-10] values must be in the range [0,25]. + * pBS - Array of size 16 of BS parameters (values for each 2x2 chroma + * block, arranged in vertical block order). This parameter is the + * same as the pBS parameter passed into FilterDeblockLuma_VerEdge; + * valid in the range [0,4] with the following restrictions: i) + * pBS[i]== 4 may occur only for 0<=i<=3, ii) pBS[i]== 4 if and + * only if pBS[i^3]== 4. Must be 4 byte aligned. + * + * Output Arguments: + * + * pSrcDst -Pointer to filtered output macroblock. + * + * Return Value: + * + * OMX_Sts_NoErr, if the function runs without error. + * + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - one or more of the following pointers is NULL: pSrcDst, pAlpha, + * pBeta, pThresholds, or pBS. + * - pSrcDst is not 8-byte aligned. + * - srcdstStep is not a multiple of 8. + * - pThresholds is not 4-byte aligned. + * - pAlpha[0] and/or pAlpha[1] is outside the range [0,255]. + * - pBeta[0] and/or pBeta[1] is outside the range [0,18]. + * - One or more entries in the table pThresholds[0..7] is outside + * of the range [0,25]. + * - pBS is out of range, i.e., one of the following conditions is true: + * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or + * (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3. + * - pBS is not 4-byte aligned. + * + */ +OMXResult omxVCM4P10_FilterDeblockingChroma_VerEdge_I ( + OMX_U8 *pSrcDst, + OMX_S32 srcdstStep, + const OMX_U8 *pAlpha, + const OMX_U8 *pBeta, + const OMX_U8 *pThresholds, + const OMX_U8 *pBS +); + + + +/** + * Function: omxVCM4P10_FilterDeblockingChroma_HorEdge_I (6.3.3.3.4) + * + * Description: + * Performs in-place deblock filtering on the horizontal edges of the chroma + * macroblock (8x8). + * + * Input Arguments: + * + * pSrcDst - pointer to the input macroblock; must be 8-byte aligned. + * srcdstStep - array step; must be a multiple of 8. + * pAlpha - array of size 2 containing alpha thresholds; the first element + * contains the threshold for the external horizontal edge, and the + * second element contains the threshold for internal horizontal + * edge. Per [ISO14496-10] alpha values must be in the range + * [0,255]. + * pBeta - array of size 2 containing beta thresholds; the first element + * contains the threshold for the external horizontal edge, and the + * second element contains the threshold for the internal + * horizontal edge. Per [ISO14496-10] beta values must be in the + * range [0,18]. + * pThresholds - array of size 8 containing thresholds, TC0, for the top + * horizontal edge of each 2x4 chroma block, arranged in horizontal + * block order; must be aligned on a 4-byte boundary. Per + * [ISO14496-10] values must be in the range [0,25]. + * pBS - array of size 16 containing BS parameters for each 2x2 chroma + * block, arranged in horizontal block order; valid in the range + * [0,4] with the following restrictions: i) pBS[i]== 4 may occur + * only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^3]== 4. + * Must be 4-byte aligned. + * + * Output Arguments: + * + * pSrcDst -Pointer to filtered output macroblock. + * + * Return Value: + * + * OMX_Sts_NoErr, if the function runs without error. + * + * OMX_Sts_BadArgErr, if one of the following cases occurs: + * - any of the following pointers is NULL: + * pSrcDst, pAlpha, pBeta, pThresholds, or pBS. + * - pSrcDst is not 8-byte aligned. + * - srcdstStep is not a multiple of 8. + * - pThresholds is not 4-byte aligned. + * - pAlpha[0] and/or pAlpha[1] is outside the range [0,255]. + * - pBeta[0] and/or pBeta[1] is outside the range [0,18]. + * - One or more entries in the table pThresholds[0..7] is outside + * of the range [0,25]. + * - pBS is out of range, i.e., one of the following conditions is true: + * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or + * (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3. + * - pBS is not 4-byte aligned. + * + */ +OMXResult omxVCM4P10_FilterDeblockingChroma_HorEdge_I ( + OMX_U8 *pSrcDst, + OMX_S32 srcdstStep, + const OMX_U8 *pAlpha, + const OMX_U8 *pBeta, + const OMX_U8 *pThresholds, + const OMX_U8 *pBS +); + + + +/** + * Function: omxVCM4P10_DeblockLuma_I (6.3.3.3.5) + * + * Description: + * This function performs in-place deblock filtering the horizontal and + * vertical edges of a luma macroblock (16x16). + * + * Input Arguments: + * + * pSrcDst - pointer to the input macroblock; must be 16-byte aligned. + * srcdstStep - image width; must be a multiple of 16. + * pAlpha - pointer to a 2x2 table of alpha thresholds, organized as + * follows: {external vertical edge, internal vertical edge, + * external horizontal edge, internal horizontal edge }. Per + * [ISO14496-10] alpha values must be in the range [0,255]. + * pBeta - pointer to a 2x2 table of beta thresholds, organized as follows: + * {external vertical edge, internal vertical edge, external + * horizontal edge, internal horizontal edge }. Per [ISO14496-10] + * beta values must be in the range [0,18]. + * pThresholds - pointer to a 16x2 table of threshold (TC0), organized as + * follows: {values for the left or above edge of each 4x4 block, + * arranged in vertical block order and then in horizontal block + * order}; must be aligned on a 4-byte boundary. Per [ISO14496-10] + * values must be in the range [0,25]. + * pBS - pointer to a 16x2 table of BS parameters arranged in scan block + * order for vertical edges and then horizontal edges; valid in the + * range [0,4] with the following restrictions: i) pBS[i]== 4 may + * occur only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^3]== + * 4. Must be 4-byte aligned. + * + * Output Arguments: + * + * pSrcDst - pointer to filtered output macroblock. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments + * - one or more of the following pointers is NULL: pSrcDst, pAlpha, + * pBeta, pThresholds or pBS. + * - pSrcDst is not 16-byte aligned. + * - either pThresholds or pBS is not aligned on a 4-byte boundary. + * - one or more entries in the table pAlpha[0..3] is outside the range + * [0,255]. + * - one or more entries in the table pBeta[0..3] is outside the range + * [0,18]. + * - one or more entries in the table pThresholds[0..31]is outside of + * the range [0,25]. + * - pBS is out of range, i.e., one of the following conditions is true: + * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or + * (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3. + * - srcdstStep is not a multiple of 16. + * + */ +OMXResult omxVCM4P10_DeblockLuma_I ( + OMX_U8 *pSrcDst, + OMX_S32 srcdstStep, + const OMX_U8 *pAlpha, + const OMX_U8 *pBeta, + const OMX_U8 *pThresholds, + const OMX_U8 *pBS +); + + + +/** + * Function: omxVCM4P10_DeblockChroma_I (6.3.3.3.6) + * + * Description: + * Performs in-place deblocking filtering on all edges of the chroma + * macroblock (16x16). + * + * Input Arguments: + * + * pSrcDst - pointer to the input macroblock; must be 8-byte aligned. + * srcdstStep - step of the arrays; must be a multiple of 8. + * pAlpha - pointer to a 2x2 array of alpha thresholds, organized as + * follows: {external vertical edge, internal vertical edge, + * external horizontal edge, internal horizontal edge }. Per + * [ISO14496-10] alpha values must be in the range [0,255]. + * pBeta - pointer to a 2x2 array of Beta Thresholds, organized as follows: + * { external vertical edge, internal vertical edge, external + * horizontal edge, internal horizontal edge }. Per [ISO14496-10] + * beta values must be in the range [0,18]. + * pThresholds - array of size 8x2 of Thresholds (TC0) (values for the left + * or above edge of each 4x2 or 2x4 block, arranged in vertical + * block order and then in horizontal block order); must be aligned + * on a 4-byte boundary. Per [ISO14496-10] values must be in the + * range [0,25]. + * pBS - array of size 16x2 of BS parameters (arranged in scan block order + * for vertical edges and then horizontal edges); valid in the + * range [0,4] with the following restrictions: i) pBS[i]== 4 may + * occur only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^3]== + * 4. Must be 4-byte aligned. + * + * Output Arguments: + * + * pSrcDst - pointer to filtered output macroblock. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments + * - one or more of the following pointers is NULL: pSrcDst, pAlpha, + * pBeta, pThresholds, or pBS. + * - pSrcDst is not 8-byte aligned. + * - either pThresholds or pBS is not 4-byte aligned. + * - one or more entries in the table pAlpha[0..3] is outside the range + * [0,255]. + * - one or more entries in the table pBeta[0..3] is outside the range + * [0,18]. + * - one or more entries in the table pThresholds[0..15]is outside of + * the range [0,25]. + * - pBS is out of range, i.e., one of the following conditions is true: + * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or + * (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3. + * - srcdstStep is not a multiple of 8. + * + */ +OMXResult omxVCM4P10_DeblockChroma_I ( + OMX_U8 *pSrcDst, + OMX_S32 srcdstStep, + const OMX_U8 *pAlpha, + const OMX_U8 *pBeta, + const OMX_U8 *pThresholds, + const OMX_U8 *pBS +); + + + +/** + * Function: omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC (6.3.4.1.1) + * + * Description: + * Performs CAVLC decoding and inverse raster scan for a 2x2 block of + * ChromaDCLevel. The decoded coefficients in the packed position-coefficient + * buffer are stored in reverse zig-zag order, i.e., the first buffer element + * contains the last non-zero postion-coefficient pair of the block. Within + * each position-coefficient pair, the position entry indicates the + * raster-scan position of the coefficient, while the coefficient entry + * contains the coefficient value. + * + * Input Arguments: + * + * ppBitStream - Double pointer to current byte in bit stream buffer + * pOffset - Pointer to current bit position in the byte pointed to by + * *ppBitStream; valid in the range [0,7]. + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after each block is decoded + * pOffset - *pOffset is updated after each block is decoded + * pNumCoeff - Pointer to the number of nonzero coefficients in this block + * ppPosCoefBuf - Double pointer to destination residual + * coefficient-position pair buffer. Buffer position + * (*ppPosCoefBuf) is updated upon return, unless there are only + * zero coefficients in the currently decoded block. In this case + * the caller is expected to bypass the transform/dequantization of + * the empty blocks. + * + * Return Value: + * + * OMX_Sts_NoErr, if the function runs without error. + * + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - ppBitStream or pOffset is NULL. + * - ppPosCoefBuf or pNumCoeff is NULL. + * OMX_Sts_Err - if one of the following is true: + * - an illegal code is encountered in the bitstream + * + */ +OMXResult omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC ( + const OMX_U8 **ppBitStream, + OMX_S32*pOffset, + OMX_U8 *pNumCoeff, + OMX_U8 **ppPosCoefbuf +); + + + +/** + * Function: omxVCM4P10_DecodeCoeffsToPairCAVLC (6.3.4.1.2) + * + * Description: + * Performs CAVLC decoding and inverse zigzag scan for 4x4 block of + * Intra16x16DCLevel, Intra16x16ACLevel, LumaLevel, and ChromaACLevel. Inverse + * field scan is not supported. The decoded coefficients in the packed + * position-coefficient buffer are stored in reverse zig-zag order, i.e., the + * first buffer element contains the last non-zero postion-coefficient pair of + * the block. Within each position-coefficient pair, the position entry + * indicates the raster-scan position of the coefficient, while the + * coefficient entry contains the coefficient value. + * + * Input Arguments: + * + * ppBitStream -Double pointer to current byte in bit stream buffer + * pOffset - Pointer to current bit position in the byte pointed to by + * *ppBitStream; valid in the range [0,7]. + * sMaxNumCoeff - Maximum the number of non-zero coefficients in current + * block + * sVLCSelect - VLC table selector, obtained from the number of non-zero + * coefficients contained in the above and left 4x4 blocks. It is + * equivalent to the variable nC described in H.264 standard table + * 9 5, except its value can t be less than zero. + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after each block is decoded. + * Buffer position (*ppPosCoefBuf) is updated upon return, unless + * there are only zero coefficients in the currently decoded block. + * In this case the caller is expected to bypass the + * transform/dequantization of the empty blocks. + * pOffset - *pOffset is updated after each block is decoded + * pNumCoeff - Pointer to the number of nonzero coefficients in this block + * ppPosCoefBuf - Double pointer to destination residual + * coefficient-position pair buffer + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - ppBitStream or pOffset is NULL. + * - ppPosCoefBuf or pNumCoeff is NULL. + * - sMaxNumCoeff is not equal to either 15 or 16. + * - sVLCSelect is less than 0. + * + * OMX_Sts_Err - if one of the following is true: + * - an illegal code is encountered in the bitstream + * + */ +OMXResult omxVCM4P10_DecodeCoeffsToPairCAVLC ( + const OMX_U8 **ppBitStream, + OMX_S32 *pOffset, + OMX_U8 *pNumCoeff, + OMX_U8 **ppPosCoefbuf, + OMX_INT sVLCSelect, + OMX_INT sMaxNumCoeff +); + + + +/** + * Function: omxVCM4P10_TransformDequantLumaDCFromPair (6.3.4.2.1) + * + * Description: + * Reconstructs the 4x4 LumaDC block from the coefficient-position pair + * buffer, performs integer inverse, and dequantization for 4x4 LumaDC + * coefficients, and updates the pair buffer pointer to the next non-empty + * block. + * + * Input Arguments: + * + * ppSrc - Double pointer to residual coefficient-position pair buffer + * output by CALVC decoding + * QP - Quantization parameter QpY + * + * Output Arguments: + * + * ppSrc - *ppSrc is updated to the start of next non empty block + * pDst - Pointer to the reconstructed 4x4 LumaDC coefficients buffer; must + * be aligned on a 8-byte boundary. + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - ppSrc or pDst is NULL. + * - pDst is not 8 byte aligned. + * - QP is not in the range of [0-51]. + * + */ +OMXResult omxVCM4P10_TransformDequantLumaDCFromPair ( + const OMX_U8 **ppSrc, + OMX_S16 *pDst, + OMX_INT QP +); + + + +/** + * Function: omxVCM4P10_TransformDequantChromaDCFromPair (6.3.4.2.2) + * + * Description: + * Reconstruct the 2x2 ChromaDC block from coefficient-position pair buffer, + * perform integer inverse transformation, and dequantization for 2x2 chroma + * DC coefficients, and update the pair buffer pointer to next non-empty + * block. + * + * Input Arguments: + * + * ppSrc - Double pointer to residual coefficient-position pair buffer + * output by CALVC decoding + * QP - Quantization parameter QpC + * + * Output Arguments: + * + * ppSrc - *ppSrc is updated to the start of next non empty block + * pDst - Pointer to the reconstructed 2x2 ChromaDC coefficients buffer; + * must be aligned on a 4-byte boundary. + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - ppSrc or pDst is NULL. + * - pDst is not 4-byte aligned. + * - QP is not in the range of [0-51]. + * + */ +OMXResult omxVCM4P10_TransformDequantChromaDCFromPair ( + const OMX_U8 **ppSrc, + OMX_S16 *pDst, + OMX_INT QP +); + + + +/** + * Function: omxVCM4P10_DequantTransformResidualFromPairAndAdd (6.3.4.2.3) + * + * Description: + * Reconstruct the 4x4 residual block from coefficient-position pair buffer, + * perform dequantization and integer inverse transformation for 4x4 block of + * residuals with previous intra prediction or motion compensation data, and + * update the pair buffer pointer to next non-empty block. If pDC == NULL, + * there re 16 non-zero AC coefficients at most in the packed buffer starting + * from 4x4 block position 0; If pDC != NULL, there re 15 non-zero AC + * coefficients at most in the packet buffer starting from 4x4 block position + * 1. + * + * Input Arguments: + * + * ppSrc - Double pointer to residual coefficient-position pair buffer + * output by CALVC decoding + * pPred - Pointer to the predicted 4x4 block; must be aligned on a 4-byte + * boundary + * predStep - Predicted frame step size in bytes; must be a multiple of 4 + * dstStep - Destination frame step in bytes; must be a multiple of 4 + * pDC - Pointer to the DC coefficient of this block, NULL if it doesn't + * exist + * QP - QP Quantization parameter. It should be QpC in chroma 4x4 block + * decoding, otherwise it should be QpY. + * AC - Flag indicating if at least one non-zero AC coefficient exists + * + * Output Arguments: + * + * pDst - pointer to the reconstructed 4x4 block data; must be aligned on a + * 4-byte boundary + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - pPred or pDst is NULL. + * - pPred or pDst is not 4-byte aligned. + * - predStep or dstStep is not a multiple of 4. + * - AC !=0 and Qp is not in the range of [0-51] or ppSrc == NULL. + * - AC ==0 && pDC ==NULL. + * + */ +OMXResult omxVCM4P10_DequantTransformResidualFromPairAndAdd ( + const OMX_U8 **ppSrc, + const OMX_U8 *pPred, + const OMX_S16 *pDC, + OMX_U8 *pDst, + OMX_INT predStep, + OMX_INT dstStep, + OMX_INT QP, + OMX_INT AC +); + + + +/** + * Function: omxVCM4P10_MEGetBufSize (6.3.5.1.1) + * + * Description: + * Computes the size, in bytes, of the vendor-specific specification + * structure for the omxVCM4P10 motion estimation functions BlockMatch_Integer + * and MotionEstimationMB. + * + * Input Arguments: + * + * MEmode - motion estimation mode; available modes are defined by the + * enumerated type OMXVCM4P10MEMode + * pMEParams -motion estimation parameters + * + * Output Arguments: + * + * pSize - pointer to the number of bytes required for the motion + * estimation specification structure + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - pMEParams or pSize is NULL. + * - an invalid MEMode is specified. + * + */ +OMXResult omxVCM4P10_MEGetBufSize ( + OMXVCM4P10MEMode MEmode, + const OMXVCM4P10MEParams *pMEParams, + OMX_U32 *pSize +); + + + +/** + * Function: omxVCM4P10_MEInit (6.3.5.1.2) + * + * Description: + * Initializes the vendor-specific specification structure required for the + * omxVCM4P10 motion estimation functions: BlockMatch_Integer and + * MotionEstimationMB. Memory for the specification structure *pMESpec must be + * allocated prior to calling the function, and should be aligned on a 4-byte + * boundary. The number of bytes required for the specification structure can + * be determined using the function omxVCM4P10_MEGetBufSize. Following + * initialization by this function, the vendor-specific structure *pMESpec + * should contain an implementation-specific representation of all motion + * estimation parameters received via the structure pMEParams, for example + * searchRange16x16, searchRange8x8, etc. + * + * Input Arguments: + * + * MEmode - motion estimation mode; available modes are defined by the + * enumerated type OMXVCM4P10MEMode + * pMEParams - motion estimation parameters + * pMESpec - pointer to the uninitialized ME specification structure + * + * Output Arguments: + * + * pMESpec - pointer to the initialized ME specification structure + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - pMEParams or pSize is NULL. + * - an invalid value was specified for the parameter MEmode + * - a negative or zero value was specified for one of the search ranges + * (e.g., pMBParams >searchRange8x8, pMEParams->searchRange16x16, etc.) + * - either in isolation or in combination, one or more of the enables or + * search ranges in the structure *pMEParams were configured such + * that the requested behavior fails to comply with [ISO14496-10]. + * + */ +OMXResult omxVCM4P10_MEInit ( + OMXVCM4P10MEMode MEmode, + const OMXVCM4P10MEParams *pMEParams, + void *pMESpec +); + + + +/** + * Function: omxVCM4P10_BlockMatch_Integer (6.3.5.2.1) + * + * Description: + * Performs integer block match. Returns best MV and associated cost. + * + * Input Arguments: + * + * pSrcOrgY - Pointer to the top-left corner of the current block: + * If iBlockWidth==4, 4-byte alignment required. + * If iBlockWidth==8, 8-byte alignment required. + * If iBlockWidth==16, 16-byte alignment required. + * pSrcRefY - Pointer to the top-left corner of the co-located block in the + * reference picture: + * If iBlockWidth==4, 4-byte alignment required. + * If iBlockWidth==8, 8-byte alignment required. + * If iBlockWidth==16, 16-byte alignment required. + * nSrcOrgStep - Stride of the original picture plane, expressed in terms + * of integer pixels; must be a multiple of iBlockWidth. + * nSrcRefStep - Stride of the reference picture plane, expressed in terms + * of integer pixels + * pRefRect - pointer to the valid reference rectangle inside the reference + * picture plane + * nCurrPointPos - position of the current block in the current plane + * iBlockWidth - Width of the current block, expressed in terms of integer + * pixels; must be equal to either 4, 8, or 16. + * iBlockHeight - Height of the current block, expressed in terms of + * integer pixels; must be equal to either 4, 8, or 16. + * nLamda - Lamda factor; used to compute motion cost + * pMVPred - Predicted MV; used to compute motion cost, expressed in terms + * of 1/4-pel units + * pMVCandidate - Candidate MV; used to initialize the motion search, + * expressed in terms of integer pixels + * pMESpec - pointer to the ME specification structure + * + * Output Arguments: + * + * pDstBestMV - Best MV resulting from integer search, expressed in terms + * of 1/4-pel units + * pBestCost - Motion cost associated with the best MV; computed as + * SAD+Lamda*BitsUsedByMV + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - any of the following pointers are NULL: + * pSrcOrgY, pSrcRefY, pRefRect, pMVPred, pMVCandidate, or pMESpec. + * - Either iBlockWidth or iBlockHeight are values other than 4, 8, or 16. + * - Any alignment restrictions are violated + * + */ +OMXResult omxVCM4P10_BlockMatch_Integer ( + const OMX_U8 *pSrcOrgY, + OMX_S32 nSrcOrgStep, + const OMX_U8 *pSrcRefY, + OMX_S32 nSrcRefStep, + const OMXRect *pRefRect, + const OMXVCM4P2Coordinate *pCurrPointPos, + OMX_U8 iBlockWidth, + OMX_U8 iBlockHeight, + OMX_U32 nLamda, + const OMXVCMotionVector *pMVPred, + const OMXVCMotionVector *pMVCandidate, + OMXVCMotionVector *pBestMV, + OMX_S32 *pBestCost, + void *pMESpec +); + + + +/** + * Function: omxVCM4P10_BlockMatch_Half (6.3.5.2.2) + * + * Description: + * Performs a half-pel block match using results from a prior integer search. + * Returns the best MV and associated cost. This function estimates the + * half-pixel motion vector by interpolating the integer resolution motion + * vector referenced by the input parameter pSrcDstBestMV, i.e., the initial + * integer MV is generated externally. The function + * omxVCM4P10_BlockMatch_Integer may be used for integer motion estimation. + * + * Input Arguments: + * + * pSrcOrgY - Pointer to the current position in original picture plane: + * If iBlockWidth==4, 4-byte alignment required. + * If iBlockWidth==8, 8-byte alignment required. + * If iBlockWidth==16, 16-byte alignment required. + * pSrcRefY - Pointer to the top-left corner of the co-located block in the + * reference picture: + * If iBlockWidth==4, 4-byte alignment required. + * If iBlockWidth==8, 8-byte alignment required. + * If iBlockWidth==16, 16-byte alignment required. + * nSrcOrgStep - Stride of the original picture plane in terms of full + * pixels; must be a multiple of iBlockWidth. + * nSrcRefStep - Stride of the reference picture plane in terms of full + * pixels + * iBlockWidth - Width of the current block in terms of full pixels; must + * be equal to either 4, 8, or 16. + * iBlockHeight - Height of the current block in terms of full pixels; must + * be equal to either 4, 8, or 16. + * nLamda - Lamda factor, used to compute motion cost + * pMVPred - Predicted MV, represented in terms of 1/4-pel units; used to + * compute motion cost + * pSrcDstBestMV - The best MV resulting from a prior integer search, + * represented in terms of 1/4-pel units + * + * Output Arguments: + * + * pSrcDstBestMV - Best MV resulting from the half-pel search, expressed in + * terms of 1/4-pel units + * pBestCost - Motion cost associated with the best MV; computed as + * SAD+Lamda*BitsUsedByMV + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - any of the following pointers is NULL: pSrcOrgY, pSrcRefY, + * pSrcDstBestMV, pMVPred, pBestCost + * - iBlockWidth or iBlockHeight are equal to values other than 4, 8, or 16. + * - Any alignment restrictions are violated + * + */ +OMXResult omxVCM4P10_BlockMatch_Half ( + const OMX_U8 *pSrcOrgY, + OMX_S32 nSrcOrgStep, + const OMX_U8 *pSrcRefY, + OMX_S32 nSrcRefStep, + OMX_U8 iBlockWidth, + OMX_U8 iBlockHeight, + OMX_U32 nLamda, + const OMXVCMotionVector *pMVPred, + OMXVCMotionVector *pSrcDstBestMV, + OMX_S32 *pBestCost +); + + + +/** + * Function: omxVCM4P10_BlockMatch_Quarter (6.3.5.2.3) + * + * Description: + * Performs a quarter-pel block match using results from a prior half-pel + * search. Returns the best MV and associated cost. This function estimates + * the quarter-pixel motion vector by interpolating the half-pel resolution + * motion vector referenced by the input parameter pSrcDstBestMV, i.e., the + * initial half-pel MV is generated externally. The function + * omxVCM4P10_BlockMatch_Half may be used for half-pel motion estimation. + * + * Input Arguments: + * + * pSrcOrgY - Pointer to the current position in original picture plane: + * If iBlockWidth==4, 4-byte alignment required. + * If iBlockWidth==8, 8-byte alignment required. + * If iBlockWidth==16, 16-byte alignment required. + * pSrcRefY - Pointer to the top-left corner of the co-located block in the + * reference picture: + * If iBlockWidth==4, 4-byte alignment required. + * If iBlockWidth==8, 8-byte alignment required. + * If iBlockWidth==16, 16-byte alignment required. + * nSrcOrgStep - Stride of the original picture plane in terms of full + * pixels; must be a multiple of iBlockWidth. + * nSrcRefStep - Stride of the reference picture plane in terms of full + * pixels + * iBlockWidth - Width of the current block in terms of full pixels; must + * be equal to either 4, 8, or 16. + * iBlockHeight - Height of the current block in terms of full pixels; must + * be equal to either 4, 8, or 16. + * nLamda - Lamda factor, used to compute motion cost + * pMVPred - Predicted MV, represented in terms of 1/4-pel units; used to + * compute motion cost + * pSrcDstBestMV - The best MV resulting from a prior half-pel search, + * represented in terms of 1/4 pel units + * + * Output Arguments: + * + * pSrcDstBestMV - Best MV resulting from the quarter-pel search, expressed + * in terms of 1/4-pel units + * pBestCost - Motion cost associated with the best MV; computed as + * SAD+Lamda*BitsUsedByMV + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - One or more of the following pointers is NULL: + * pSrcOrgY, pSrcRefY, pSrcDstBestMV, pMVPred, pBestCost + * - iBlockWidth or iBlockHeight are equal to values other than 4, 8, or 16. + * - Any alignment restrictions are violated + * + */ +OMXResult omxVCM4P10_BlockMatch_Quarter ( + const OMX_U8 *pSrcOrgY, + OMX_S32 nSrcOrgStep, + const OMX_U8 *pSrcRefY, + OMX_S32 nSrcRefStep, + OMX_U8 iBlockWidth, + OMX_U8 iBlockHeight, + OMX_U32 nLamda, + const OMXVCMotionVector *pMVPred, + OMXVCMotionVector *pSrcDstBestMV, + OMX_S32 *pBestCost +); + + + +/** + * Function: omxVCM4P10_MotionEstimationMB (6.3.5.3.1) + * + * Description: + * Performs MB-level motion estimation and selects best motion estimation + * strategy from the set of modes supported in baseline profile [ISO14496-10]. + * + * Input Arguments: + * + * pSrcCurrBuf - Pointer to the current position in original picture plane; + * 16-byte alignment required + * pSrcRefBufList - Pointer to an array with 16 entries. Each entry points + * to the top-left corner of the co-located MB in a reference + * picture. The array is filled from low-to-high with valid + * reference frame pointers; the unused high entries should be set + * to NULL. Ordering of the reference frames should follow + * [ISO14496-10] subclause 8.2.4 Decoding Process for Reference + * Picture Lists. The entries must be 16-byte aligned. + * pSrcRecBuf - Pointer to the top-left corner of the co-located MB in the + * reconstructed picture; must be 16-byte aligned. + * SrcCurrStep - Width of the original picture plane in terms of full + * pixels; must be a multiple of 16. + * SrcRefStep - Width of the reference picture plane in terms of full + * pixels; must be a multiple of 16. + * SrcRecStep - Width of the reconstructed picture plane in terms of full + * pixels; must be a multiple of 16. + * pRefRect - Pointer to the valid reference rectangle; relative to the + * image origin. + * pCurrPointPos - Position of the current macroblock in the current plane. + * Lambda - Lagrange factor for computing the cost function + * pMESpec - Pointer to the motion estimation specification structure; must + * have been allocated and initialized prior to calling this + * function. + * pMBInter - Array, of dimension four, containing pointers to information + * associated with four adjacent type INTER MBs (Left, Top, + * Top-Left, Top-Right). Any pointer in the array may be set equal + * to NULL if the corresponding MB doesn t exist or is not of type + * INTER. + * - pMBInter[0] - Pointer to left MB information + * - pMBInter[1] - Pointer to top MB information + * - pMBInter[2] - Pointer to top-left MB information + * - pMBInter[3] - Pointer to top-right MB information + * pMBIntra - Array, of dimension four, containing pointers to information + * associated with four adjacent type INTRA MBs (Left, Top, + * Top-Left, Top-Right). Any pointer in the array may be set equal + * to NULL if the corresponding MB doesn t exist or is not of type + * INTRA. + * - pMBIntra[0] - Pointer to left MB information + * - pMBIntra[1] - Pointer to top MB information + * - pMBIntra[2] - Pointer to top-left MB information + * - pMBIntra[3] - Pointer to top-right MB information + * pSrcDstMBCurr - Pointer to information structure for the current MB. + * The following entries should be set prior to calling the + * function: sliceID - the number of the slice the to which the + * current MB belongs. + * + * Output Arguments: + * + * pDstCost - Pointer to the minimum motion cost for the current MB. + * pDstBlockSAD - Pointer to the array of SADs for each of the sixteen luma + * 4x4 blocks in each MB. The block SADs are in scan order for + * each MB. For implementations that cannot compute the SAD values + * individually, the maximum possible value (0xffff) is returned + * for each of the 16 block SAD entries. + * pSrcDstMBCurr - Pointer to updated information structure for the current + * MB after MB-level motion estimation has been completed. The + * following fields are updated by the ME function. The following + * parameter set quantifies the MB-level ME search results: + * - MbType + * - subMBType[4] + * - pMV0[4][4] + * - pMVPred[4][4] + * - pRefL0Idx[4] + * - Intra16x16PredMode + * - pIntra4x4PredMode[4][4] + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - One or more of the following pointers is NULL: pSrcCurrBuf, + * pSrcRefBufList, pSrcRecBuf, pRefRect, pCurrPointPos, pMESpec, + * pMBInter, pMBIntra,pSrcDstMBCurr, pDstCost, pSrcRefBufList[0] + * - SrcRefStep, SrcRecStep are not multiples of 16 + * - iBlockWidth or iBlockHeight are values other than 4, 8, or 16. + * - Any alignment restrictions are violated + * + */ +OMXResult omxVCM4P10_MotionEstimationMB ( + const OMX_U8 *pSrcCurrBuf, + OMX_S32 SrcCurrStep, + const OMX_U8 *pSrcRefBufList[15], + OMX_S32 SrcRefStep, + const OMX_U8 *pSrcRecBuf, + OMX_S32 SrcRecStep, + const OMXRect *pRefRect, + const OMXVCM4P2Coordinate *pCurrPointPos, + OMX_U32 Lambda, + void *pMESpec, + const OMXVCM4P10MBInfoPtr *pMBInter, + const OMXVCM4P10MBInfoPtr *pMBIntra, + OMXVCM4P10MBInfoPtr pSrcDstMBCurr, + OMX_INT *pDstCost, + OMX_U16 *pDstBlockSAD +); + + + +/** + * Function: omxVCM4P10_SAD_4x (6.3.5.4.1) + * + * Description: + * This function calculates the SAD for 4x8 and 4x4 blocks. + * + * Input Arguments: + * + * pSrcOrg -Pointer to the original block; must be aligned on a 4-byte + * boundary. + * iStepOrg -Step of the original block buffer; must be a multiple of 4. + * pSrcRef -Pointer to the reference block + * iStepRef -Step of the reference block buffer + * iHeight -Height of the block; must be equal to either 4 or 8. + * + * Output Arguments: + * + * pDstSAD -Pointer of result SAD + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - One or more of the following pointers is NULL: + * pSrcOrg, pSrcRef, or pDstSAD + * - iHeight is not equal to either 4 or 8. + * - iStepOrg is not a multiple of 4 + * - Any alignment restrictions are violated + * + */ +OMXResult omxVCM4P10_SAD_4x ( + const OMX_U8 *pSrcOrg, + OMX_U32 iStepOrg, + const OMX_U8 *pSrcRef, + OMX_U32 iStepRef, + OMX_S32 *pDstSAD, + OMX_U32 iHeight +); + + + +/** + * Function: omxVCM4P10_SADQuar_4x (6.3.5.4.2) + * + * Description: + * This function calculates the SAD between one block (pSrc) and the average + * of the other two (pSrcRef0 and pSrcRef1) for 4x8 or 4x4 blocks. Rounding + * is applied according to the convention (a+b+1)>>1. + * + * Input Arguments: + * + * pSrc - Pointer to the original block; must be aligned on a 4-byte + * boundary. + * pSrcRef0 - Pointer to reference block 0 + * pSrcRef1 - Pointer to reference block 1 + * iSrcStep - Step of the original block buffer; must be a multiple of 4. + * iRefStep0 - Step of reference block 0 + * iRefStep1 - Step of reference block 1 + * iHeight - Height of the block; must be equal to either 4 or 8. + * + * Output Arguments: + * + * pDstSAD - Pointer of result SAD + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - iHeight is not equal to either 4 or 8. + * - One or more of the following pointers is NULL: pSrc, pSrcRef0, + * pSrcRef1, pDstSAD. + * - iSrcStep is not a multiple of 4 + * - Any alignment restrictions are violated + * + */ +OMXResult omxVCM4P10_SADQuar_4x ( + const OMX_U8 *pSrc, + const OMX_U8 *pSrcRef0, + const OMX_U8 *pSrcRef1, + OMX_U32 iSrcStep, + OMX_U32 iRefStep0, + OMX_U32 iRefStep1, + OMX_U32 *pDstSAD, + OMX_U32 iHeight +); + + + +/** + * Function: omxVCM4P10_SADQuar_8x (6.3.5.4.3) + * + * Description: + * This function calculates the SAD between one block (pSrc) and the average + * of the other two (pSrcRef0 and pSrcRef1) for 8x16, 8x8, or 8x4 blocks. + * Rounding is applied according to the convention (a+b+1)>>1. + * + * Input Arguments: + * + * pSrc - Pointer to the original block; must be aligned on an 8-byte + * boundary. + * pSrcRef0 - Pointer to reference block 0 + * pSrcRef1 - Pointer to reference block 1 + * iSrcStep - Step of the original block buffer; must be a multiple of 8. + * iRefStep0 - Step of reference block 0 + * iRefStep1 - Step of reference block 1 + * iHeight - Height of the block; must be equal either 4, 8, or 16. + * + * Output Arguments: + * + * pDstSAD - Pointer of result SAD + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - iHeight is not equal to either 4, 8, or 16. + * - One or more of the following pointers is NULL: pSrc, pSrcRef0, + * pSrcRef1, pDstSAD. + * - iSrcStep is not a multiple of 8 + * - Any alignment restrictions are violated + * + */ +OMXResult omxVCM4P10_SADQuar_8x ( + const OMX_U8 *pSrc, + const OMX_U8 *pSrcRef0, + const OMX_U8 *pSrcRef1, + OMX_U32 iSrcStep, + OMX_U32 iRefStep0, + OMX_U32 iRefStep1, + OMX_U32 *pDstSAD, + OMX_U32 iHeight +); + + + +/** + * Function: omxVCM4P10_SADQuar_16x (6.3.5.4.4) + * + * Description: + * This function calculates the SAD between one block (pSrc) and the average + * of the other two (pSrcRef0 and pSrcRef1) for 16x16 or 16x8 blocks. + * Rounding is applied according to the convention (a+b+1)>>1. + * + * Input Arguments: + * + * pSrc - Pointer to the original block; must be aligned on a 16-byte + * boundary. + * pSrcRef0 - Pointer to reference block 0 + * pSrcRef1 - Pointer to reference block 1 + * iSrcStep - Step of the original block buffer; must be a multiple of 16 + * iRefStep0 - Step of reference block 0 + * iRefStep1 - Step of reference block 1 + * iHeight - Height of the block; must be equal to either 8 or 16 + * + * Output Arguments: + * + * pDstSAD -Pointer of result SAD + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - iHeight is not equal to either 8 or 16. + * - One or more of the following pointers is NULL: pSrc, pSrcRef0, + * pSrcRef1, pDstSAD. + * - iSrcStep is not a multiple of 16 + * - Any alignment restrictions are violated + * + */ +OMXResult omxVCM4P10_SADQuar_16x ( + const OMX_U8 *pSrc, + const OMX_U8 *pSrcRef0, + const OMX_U8 *pSrcRef1, + OMX_U32 iSrcStep, + OMX_U32 iRefStep0, + OMX_U32 iRefStep1, + OMX_U32 *pDstSAD, + OMX_U32 iHeight +); + + + +/** + * Function: omxVCM4P10_SATD_4x4 (6.3.5.4.5) + * + * Description: + * This function calculates the sum of absolute transform differences (SATD) + * for a 4x4 block by applying a Hadamard transform to the difference block + * and then calculating the sum of absolute coefficient values. + * + * Input Arguments: + * + * pSrcOrg - Pointer to the original block; must be aligned on a 4-byte + * boundary + * iStepOrg - Step of the original block buffer; must be a multiple of 4 + * pSrcRef - Pointer to the reference block; must be aligned on a 4-byte + * boundary + * iStepRef - Step of the reference block buffer; must be a multiple of 4 + * + * Output Arguments: + * + * pDstSAD - pointer to the resulting SAD + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: + * pSrcOrg, pSrcRef, or pDstSAD either pSrcOrg + * - pSrcRef is not aligned on a 4-byte boundary + * - iStepOrg <= 0 or iStepOrg is not a multiple of 4 + * - iStepRef <= 0 or iStepRef is not a multiple of 4 + * + */ +OMXResult omxVCM4P10_SATD_4x4 ( + const OMX_U8 *pSrcOrg, + OMX_U32 iStepOrg, + const OMX_U8 *pSrcRef, + OMX_U32 iStepRef, + OMX_U32 *pDstSAD +); + + + +/** + * Function: omxVCM4P10_InterpolateHalfHor_Luma (6.3.5.5.1) + * + * Description: + * This function performs interpolation for two horizontal 1/2-pel positions + * (-1/2,0) and (1/2, 0) - around a full-pel position. + * + * Input Arguments: + * + * pSrc - Pointer to the top-left corner of the block used to interpolate in + * the reconstruction frame plane. + * iSrcStep - Step of the source buffer. + * iDstStep - Step of the destination(interpolation) buffer; must be a + * multiple of iWidth. + * iWidth - Width of the current block; must be equal to either 4, 8, or 16 + * iHeight - Height of the current block; must be equal to 4, 8, or 16 + * + * Output Arguments: + * + * pDstLeft -Pointer to the interpolation buffer of the left -pel position + * (-1/2, 0) + * If iWidth==4, 4-byte alignment required. + * If iWidth==8, 8-byte alignment required. + * If iWidth==16, 16-byte alignment required. + * pDstRight -Pointer to the interpolation buffer of the right -pel + * position (1/2, 0) + * If iWidth==4, 4-byte alignment required. + * If iWidth==8, 8-byte alignment required. + * If iWidth==16, 16-byte alignment required. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: + * pSrc, pDstLeft, or pDstRight + * - iWidth or iHeight have values other than 4, 8, or 16 + * - iWidth==4 but pDstLeft and/or pDstRight is/are not aligned on a 4-byte boundary + * - iWidth==8 but pDstLeft and/or pDstRight is/are not aligned on a 8-byte boundary + * - iWidth==16 but pDstLeft and/or pDstRight is/are not aligned on a 16-byte boundary + * - any alignment restrictions are violated + * + */ +OMXResult omxVCM4P10_InterpolateHalfHor_Luma ( + const OMX_U8 *pSrc, + OMX_U32 iSrcStep, + OMX_U8 *pDstLeft, + OMX_U8 *pDstRight, + OMX_U32 iDstStep, + OMX_U32 iWidth, + OMX_U32 iHeight +); + + + +/** + * Function: omxVCM4P10_InterpolateHalfVer_Luma (6.3.5.5.2) + * + * Description: + * This function performs interpolation for two vertical 1/2-pel positions - + * (0, -1/2) and (0, 1/2) - around a full-pel position. + * + * Input Arguments: + * + * pSrc - Pointer to top-left corner of block used to interpolate in the + * reconstructed frame plane + * iSrcStep - Step of the source buffer. + * iDstStep - Step of the destination (interpolation) buffer; must be a + * multiple of iWidth. + * iWidth - Width of the current block; must be equal to either 4, 8, or 16 + * iHeight - Height of the current block; must be equal to either 4, 8, or 16 + * + * Output Arguments: + * + * pDstUp -Pointer to the interpolation buffer of the -pel position above + * the current full-pel position (0, -1/2) + * If iWidth==4, 4-byte alignment required. + * If iWidth==8, 8-byte alignment required. + * If iWidth==16, 16-byte alignment required. + * pDstDown -Pointer to the interpolation buffer of the -pel position below + * the current full-pel position (0, 1/2) + * If iWidth==4, 4-byte alignment required. + * If iWidth==8, 8-byte alignment required. + * If iWidth==16, 16-byte alignment required. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: + * pSrc, pDstUp, or pDstDown + * - iWidth or iHeight have values other than 4, 8, or 16 + * - iWidth==4 but pDstUp and/or pDstDown is/are not aligned on a 4-byte boundary + * - iWidth==8 but pDstUp and/or pDstDown is/are not aligned on a 8-byte boundary + * - iWidth==16 but pDstUp and/or pDstDown is/are not aligned on a 16-byte boundary + * + */ +OMXResult omxVCM4P10_InterpolateHalfVer_Luma ( + const OMX_U8 *pSrc, + OMX_U32 iSrcStep, + OMX_U8 *pDstUp, + OMX_U8 *pDstDown, + OMX_U32 iDstStep, + OMX_U32 iWidth, + OMX_U32 iHeight +); + + + +/** + * Function: omxVCM4P10_Average_4x (6.3.5.5.3) + * + * Description: + * This function calculates the average of two 4x4, 4x8 blocks. The result + * is rounded according to (a+b+1)/2. + * + * Input Arguments: + * + * pPred0 - Pointer to the top-left corner of reference block 0 + * pPred1 - Pointer to the top-left corner of reference block 1 + * iPredStep0 - Step of reference block 0; must be a multiple of 4. + * iPredStep1 - Step of reference block 1; must be a multiple of 4. + * iDstStep - Step of the destination buffer; must be a multiple of 4. + * iHeight - Height of the blocks; must be either 4 or 8. + * + * Output Arguments: + * + * pDstPred - Pointer to the destination buffer. 4-byte alignment required. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: + * pPred0, pPred1, or pDstPred + * - pDstPred is not aligned on a 4-byte boundary + * - iPredStep0 <= 0 or iPredStep0 is not a multiple of 4 + * - iPredStep1 <= 0 or iPredStep1 is not a multiple of 4 + * - iDstStep <= 0 or iDstStep is not a multiple of 4 + * - iHeight is not equal to either 4 or 8 + * + */ +OMXResult omxVCM4P10_Average_4x ( + const OMX_U8 *pPred0, + const OMX_U8 *pPred1, + OMX_U32 iPredStep0, + OMX_U32 iPredStep1, + OMX_U8 *pDstPred, + OMX_U32 iDstStep, + OMX_U32 iHeight +); + + + +/** + * Function: omxVCM4P10_TransformQuant_ChromaDC (6.3.5.6.1) + * + * Description: + * This function performs 2x2 Hadamard transform of chroma DC coefficients + * and then quantizes the coefficients. + * + * Input Arguments: + * + * pSrcDst - Pointer to the 2x2 array of chroma DC coefficients. 8-byte + * alignment required. + * iQP - Quantization parameter; must be in the range [0,51]. + * bIntra - Indicate whether this is an INTRA block. 1-INTRA, 0-INTER + * + * Output Arguments: + * + * pSrcDst - Pointer to transformed and quantized coefficients. 8-byte + * alignment required. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: + * pSrcDst + * - pSrcDst is not aligned on an 8-byte boundary + * + */ +OMXResult omxVCM4P10_TransformQuant_ChromaDC ( + OMX_S16 *pSrcDst, + OMX_U32 iQP, + OMX_U8 bIntra +); + + + +/** + * Function: omxVCM4P10_TransformQuant_LumaDC (6.3.5.6.2) + * + * Description: + * This function performs a 4x4 Hadamard transform of luma DC coefficients + * and then quantizes the coefficients. + * + * Input Arguments: + * + * pSrcDst - Pointer to the 4x4 array of luma DC coefficients. 16-byte + * alignment required. + * iQP - Quantization parameter; must be in the range [0,51]. + * + * Output Arguments: + * + * pSrcDst - Pointer to transformed and quantized coefficients. 16-byte + * alignment required. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: pSrcDst + * - pSrcDst is not aligned on an 16-byte boundary + * + */ +OMXResult omxVCM4P10_TransformQuant_LumaDC ( + OMX_S16 *pSrcDst, + OMX_U32 iQP +); + + + +/** + * Function: omxVCM4P10_InvTransformDequant_LumaDC (6.3.5.6.3) + * + * Description: + * This function performs inverse 4x4 Hadamard transform and then dequantizes + * the coefficients. + * + * Input Arguments: + * + * pSrc - Pointer to the 4x4 array of the 4x4 Hadamard-transformed and + * quantized coefficients. 16 byte alignment required. + * iQP - Quantization parameter; must be in the range [0,51]. + * + * Output Arguments: + * + * pDst - Pointer to inverse-transformed and dequantized coefficients. + * 16-byte alignment required. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: pSrc + * - pSrc or pDst is not aligned on a 16-byte boundary + * + */ +OMXResult omxVCM4P10_InvTransformDequant_LumaDC ( + const OMX_S16 *pSrc, + OMX_S16 *pDst, + OMX_U32 iQP +); + + + +/** + * Function: omxVCM4P10_InvTransformDequant_ChromaDC (6.3.5.6.4) + * + * Description: + * This function performs inverse 2x2 Hadamard transform and then dequantizes + * the coefficients. + * + * Input Arguments: + * + * pSrc - Pointer to the 2x2 array of the 2x2 Hadamard-transformed and + * quantized coefficients. 8 byte alignment required. + * iQP - Quantization parameter; must be in the range [0,51]. + * + * Output Arguments: + * + * pDst - Pointer to inverse-transformed and dequantized coefficients. + * 8-byte alignment required. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: pSrc + * - pSrc or pDst is not aligned on an 8-byte boundary + * + */ +OMXResult omxVCM4P10_InvTransformDequant_ChromaDC ( + const OMX_S16 *pSrc, + OMX_S16 *pDst, + OMX_U32 iQP +); + + + +/** + * Function: omxVCM4P10_InvTransformResidualAndAdd (6.3.5.7.1) + * + * Description: + * This function performs inverse an 4x4 integer transformation to produce + * the difference signal and then adds the difference to the prediction to get + * the reconstructed signal. + * + * Input Arguments: + * + * pSrcPred - Pointer to prediction signal. 4-byte alignment required. + * pDequantCoeff - Pointer to the transformed coefficients. 8-byte + * alignment required. + * iSrcPredStep - Step of the prediction buffer; must be a multiple of 4. + * iDstReconStep - Step of the destination reconstruction buffer; must be a + * multiple of 4. + * bAC - Indicate whether there is AC coefficients in the coefficients + * matrix. + * + * Output Arguments: + * + * pDstRecon -Pointer to the destination reconstruction buffer. 4-byte + * alignment required. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: + * pSrcPred, pDequantCoeff, pDstRecon + * - pSrcPred is not aligned on a 4-byte boundary + * - iSrcPredStep or iDstReconStep is not a multiple of 4. + * - pDequantCoeff is not aligned on an 8-byte boundary + * + */ +OMXResult omxVCM4P10_InvTransformResidualAndAdd ( + const OMX_U8 *pSrcPred, + const OMX_S16 *pDequantCoeff, + OMX_U8 *pDstRecon, + OMX_U32 iSrcPredStep, + OMX_U32 iDstReconStep, + OMX_U8 bAC +); + + + +/** + * Function: omxVCM4P10_SubAndTransformQDQResidual (6.3.5.8.1) + * + * Description: + * This function subtracts the prediction signal from the original signal to + * produce the difference signal and then performs a 4x4 integer transform and + * quantization. The quantized transformed coefficients are stored as + * pDstQuantCoeff. This function can also output dequantized coefficients or + * unquantized DC coefficients optionally by setting the pointers + * pDstDeQuantCoeff, pDCCoeff. + * + * Input Arguments: + * + * pSrcOrg - Pointer to original signal. 4-byte alignment required. + * pSrcPred - Pointer to prediction signal. 4-byte alignment required. + * iSrcOrgStep - Step of the original signal buffer; must be a multiple of + * 4. + * iSrcPredStep - Step of the prediction signal buffer; must be a multiple + * of 4. + * pNumCoeff -Number of non-zero coefficients after quantization. If this + * parameter is not required, it is set to NULL. + * nThreshSAD - Zero-block early detection threshold. If this parameter is + * not required, it is set to 0. + * iQP - Quantization parameter; must be in the range [0,51]. + * bIntra - Indicates whether this is an INTRA block, either 1-INTRA or + * 0-INTER + * + * Output Arguments: + * + * pDstQuantCoeff - Pointer to the quantized transformed coefficients. + * 8-byte alignment required. + * pDstDeQuantCoeff - Pointer to the dequantized transformed coefficients + * if this parameter is not equal to NULL. 8-byte alignment + * required. + * pDCCoeff - Pointer to the unquantized DC coefficient if this parameter + * is not equal to NULL. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: + * pSrcOrg, pSrcPred, pNumCoeff, pDstQuantCoeff, + * pDstDeQuantCoeff, pDCCoeff + * - pSrcOrg is not aligned on a 4-byte boundary + * - pSrcPred is not aligned on a 4-byte boundary + * - iSrcOrgStep is not a multiple of 4 + * - iSrcPredStep is not a multiple of 4 + * - pDstQuantCoeff or pDstDeQuantCoeff is not aligned on an 8-byte boundary + * + */ +OMXResult omxVCM4P10_SubAndTransformQDQResidual ( + const OMX_U8 *pSrcOrg, + const OMX_U8 *pSrcPred, + OMX_U32 iSrcOrgStep, + OMX_U32 iSrcPredStep, + OMX_S16 *pDstQuantCoeff, + OMX_S16 *pDstDeQuantCoeff, + OMX_S16 *pDCCoeff, + OMX_S8 *pNumCoeff, + OMX_U32 nThreshSAD, + OMX_U32 iQP, + OMX_U8 bIntra +); + + + +/** + * Function: omxVCM4P10_GetVLCInfo (6.3.5.9.1) + * + * Description: + * This function extracts run-length encoding (RLE) information from the + * coefficient matrix. The results are returned in an OMXVCM4P10VLCInfo + * structure. + * + * Input Arguments: + * + * pSrcCoeff - pointer to the transform coefficient matrix. 8-byte + * alignment required. + * pScanMatrix - pointer to the scan order definition matrix. For a luma + * block the scan matrix should follow [ISO14496-10] section 8.5.4, + * and should contain the values 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, + * 10, 7, 11, 14, 15. For a chroma block, the scan matrix should + * contain the values 0, 1, 2, 3. + * bAC - indicates presence of a DC coefficient; 0 = DC coefficient + * present, 1= DC coefficient absent. + * MaxNumCoef - specifies the number of coefficients contained in the + * transform coefficient matrix, pSrcCoeff. The value should be 16 + * for blocks of type LUMADC, LUMAAC, LUMALEVEL, and CHROMAAC. The + * value should be 4 for blocks of type CHROMADC. + * + * Output Arguments: + * + * pDstVLCInfo - pointer to structure that stores information for + * run-length coding. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: + * pSrcCoeff, pScanMatrix, pDstVLCInfo + * - pSrcCoeff is not aligned on an 8-byte boundary + * + */ +OMXResult omxVCM4P10_GetVLCInfo ( + const OMX_S16 *pSrcCoeff, + const OMX_U8 *pScanMatrix, + OMX_U8 bAC, + OMX_U32 MaxNumCoef, + OMXVCM4P10VLCInfo*pDstVLCInfo +); + + + +#ifdef __cplusplus +} +#endif + +#endif /** end of #define _OMXVC_H_ */ + +/** EOF */ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/armVCCOMM_Average.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/armVCCOMM_Average.c new file mode 100644 index 0000000000000000000000000000000000000000..1e5107786a9d97f408de2cbc6a63745710dbf817 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/armVCCOMM_Average.c @@ -0,0 +1,78 @@ +/** + * + * File Name: armVCCOMM_Average.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * Description: + * This function will calculate Average of two blocks if size iWidth X iHeight + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armVC.h" + +/** + * Function: armVCCOMM_Average + * + * Description: + * This function calculates the average of two blocks and stores the result. + * + * Remarks: + * + * [in] pPred0 Pointer to the top-left corner of reference block 0 + * [in] pPred1 Pointer to the top-left corner of reference block 1 + * [in] iPredStep0 Step of reference block 0 + * [in] iPredStep1 Step of reference block 1 + * [in] iDstStep Step of the destination buffer + * [in] iWidth Width of the blocks + * [in] iHeight Height of the blocks + * [out] pDstPred Pointer to the destination buffer + * + * Return Value: + * Standard OMXResult value. + * + */ + OMXResult armVCCOMM_Average ( + const OMX_U8* pPred0, + const OMX_U8* pPred1, + OMX_U32 iPredStep0, + OMX_U32 iPredStep1, + OMX_U8* pDstPred, + OMX_U32 iDstStep, + OMX_U32 iWidth, + OMX_U32 iHeight +) +{ + OMX_U32 x, y; + + /* check for argument error */ + armRetArgErrIf(pPred0 == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(pPred1 == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(pDstPred == NULL, OMX_Sts_BadArgErr) + + for (y = 0; y < iHeight; y++) + { + for (x = 0; x < iWidth; x++) + { + pDstPred [y * iDstStep + x] = + (OMX_U8)(((OMX_U32)pPred0 [y * iPredStep0 + x] + + pPred1 [y * iPredStep1 + x] + 1) >> 1); + } + } + + return OMX_Sts_NoErr; +} + +/***************************************************************************** + * END OF FILE + *****************************************************************************/ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/armVCCOMM_SAD.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/armVCCOMM_SAD.c new file mode 100644 index 0000000000000000000000000000000000000000..d41ac9a050bcc96ae4b7c60525ef005213f2c583 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/armVCCOMM_SAD.c @@ -0,0 +1,75 @@ +/** + * + * File Name: armVCCOMM_SAD.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * Description: + * This function will calculate SAD for NxM blocks + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" + +/** + * Function: armVCCOMM_SAD + * + * Description: + * This function calculate the SAD for NxM blocks. + * + * Remarks: + * + * [in] pSrcOrg Pointer to the original block + * [in] iStepOrg Step of the original block buffer + * [in] pSrcRef Pointer to the reference block + * [in] iStepRef Step of the reference block buffer + * [in] iHeight Height of the block + * [in] iWidth Width of the block + * [out] pDstSAD Pointer of result SAD + * + * Return Value: + * Standard OMXResult value. + * + */ +OMXResult armVCCOMM_SAD( + const OMX_U8* pSrcOrg, + OMX_U32 iStepOrg, + const OMX_U8* pSrcRef, + OMX_U32 iStepRef, + OMX_S32* pDstSAD, + OMX_U32 iHeight, + OMX_U32 iWidth +) +{ + OMX_INT x, y; + + /* check for argument error */ + armRetArgErrIf(pSrcOrg == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(pSrcRef == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(pDstSAD == NULL, OMX_Sts_BadArgErr) + + *pDstSAD = 0; + for (y = 0; y < iHeight; y++) + { + for (x = 0; x < iWidth; x++) + { + *pDstSAD += armAbs(pSrcOrg [(y * iStepOrg) + x] - + pSrcRef [(y * iStepRef) + x]); + } + } + + return OMX_Sts_NoErr; +} + +/***************************************************************************** + * END OF FILE + *****************************************************************************/ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_Average_16x.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_Average_16x.c new file mode 100644 index 0000000000000000000000000000000000000000..6d1447e19be909656b3c93aab56823eddf67ca0b --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_Average_16x.c @@ -0,0 +1,86 @@ +/** + * + * File Name: omxVCCOMM_Average_16x.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * Description: + * This function will calculate Average of two 16x16 or 16x8 blocks + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armVC.h" + +/** + * Function: omxVCCOMM_Average_16x (6.1.3.1.2) + * + * Description: + * This function calculates the average of two 16x16 or 16x8 blocks. The + * result is rounded according to (a+b+1)/2. The block average function can + * be used in conjunction with half-pixel interpolation to obtain quarter + * pixel motion estimates, as described in [ISO14496-10], subclause 8.4.2.2.1. + * + * Input Arguments: + * + * pPred0 - Pointer to the top-left corner of reference block 0 + * pPred1 - Pointer to the top-left corner of reference block 1 + * iPredStep0 - Step of reference block 0 + * iPredStep1 - Step of reference block 1 + * iDstStep - Step of the destination buffer + * iHeight - Height of the blocks + * + * Output Arguments: + * + * pDstPred - Pointer to the destination buffer. 16-byte aligned. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned under any of the following + * conditions: + * - one or more of the following pointers is NULL: pPred0, pPred1, or + * pDstPred. + * - pDstPred is not aligned on a 16-byte boundary. + * - iPredStep0 <= 0 or iPredStep0 is not a multiple of 16. + * - iPredStep1 <= 0 or iPredStep1 is not a multiple of 16. + * - iDstStep <= 0 or iDstStep is not a multiple of 16. + * - iHeight is not 8 or 16. + * + */ + OMXResult omxVCCOMM_Average_16x ( + const OMX_U8* pPred0, + const OMX_U8* pPred1, + OMX_U32 iPredStep0, + OMX_U32 iPredStep1, + OMX_U8* pDstPred, + OMX_U32 iDstStep, + OMX_U32 iHeight +) +{ + /* check for argument error */ + armRetArgErrIf(pPred0 == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(pPred1 == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(pDstPred == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf((iHeight != 8) && (iHeight != 16), OMX_Sts_BadArgErr) + armRetArgErrIf(armNot16ByteAligned(pDstPred), OMX_Sts_BadArgErr) + armRetArgErrIf((iPredStep0 == 0) || (iPredStep0 & 15), OMX_Sts_BadArgErr) + armRetArgErrIf((iPredStep1 == 0) || (iPredStep1 & 15), OMX_Sts_BadArgErr) + armRetArgErrIf((iDstStep == 0) || (iDstStep & 15), OMX_Sts_BadArgErr) + + return armVCCOMM_Average + (pPred0, pPred1, iPredStep0, iPredStep1, pDstPred, iDstStep, 16, iHeight); +} + +/***************************************************************************** + * END OF FILE + *****************************************************************************/ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_Average_8x.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_Average_8x.c new file mode 100644 index 0000000000000000000000000000000000000000..17b1326c68b039f7dc9a51c060abc47d0097b3f3 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_Average_8x.c @@ -0,0 +1,87 @@ +/** + * + * File Name: omxVCCOMM_Average_8x.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * Description: + * This function will calculate Average of two 8x4 or 8x8 or 8x16 blocks + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armVC.h" + +/** + * Function: omxVCCOMM_Average_8x (6.1.3.1.1) + * + * Description: + * This function calculates the average of two 8x4, 8x8, or 8x16 blocks. The + * result is rounded according to (a+b+1)/2. The block average function can + * be used in conjunction with half-pixel interpolation to obtain quarter + * pixel motion estimates, as described in [ISO14496-10], subclause 8.4.2.2.1. + * + * Input Arguments: + * + * pPred0 - Pointer to the top-left corner of reference block 0 + * pPred1 - Pointer to the top-left corner of reference block 1 + * iPredStep0 - Step of reference block 0 + * iPredStep1 - Step of reference block 1 + * iDstStep - Step of the destination buffer. + * iHeight - Height of the blocks + * + * Output Arguments: + * + * pDstPred - Pointer to the destination buffer. 8-byte aligned. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned under any of the following + * conditions: + * - one or more of the following pointers is NULL: pPred0, pPred1, or + * pDstPred. + * - pDstPred is not aligned on an 8-byte boundary. + * - iPredStep0 <= 0 or iPredStep0 is not a multiple of 8. + * - iPredStep1 <= 0 or iPredStep1 is not a multiple of 8. + * - iDstStep <= 0 or iDstStep is not a multiple of 8. + * - iHeight is not 4, 8, or 16. + * + */ + OMXResult omxVCCOMM_Average_8x ( + const OMX_U8* pPred0, + const OMX_U8* pPred1, + OMX_U32 iPredStep0, + OMX_U32 iPredStep1, + OMX_U8* pDstPred, + OMX_U32 iDstStep, + OMX_U32 iHeight +) +{ + /* check for argument error */ + armRetArgErrIf(pPred0 == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(pPred1 == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(pDstPred == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf((iPredStep0 == 0) || (iPredStep0 & 7), OMX_Sts_BadArgErr) + armRetArgErrIf((iPredStep1 == 0) || (iPredStep1 & 7), OMX_Sts_BadArgErr) + armRetArgErrIf((iDstStep == 0) || (iDstStep & 7), OMX_Sts_BadArgErr) + armRetArgErrIf((iHeight != 4) && (iHeight != 8) && (iHeight != 16), OMX_Sts_BadArgErr) + armRetArgErrIf(armNot8ByteAligned(pDstPred), OMX_Sts_BadArgErr) + + return armVCCOMM_Average + (pPred0, pPred1, iPredStep0, iPredStep1, pDstPred, iDstStep, 8, iHeight); +} + + +/***************************************************************************** + * END OF FILE + *****************************************************************************/ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_ComputeTextureErrorBlock.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_ComputeTextureErrorBlock.c new file mode 100644 index 0000000000000000000000000000000000000000..e559adf11e1c90c00d3189554e3c8a02004029e0 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_ComputeTextureErrorBlock.c @@ -0,0 +1,88 @@ +/** + * + * File Name: omxVCCOMM_ComputeTextureErrorBlock.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Contains module computing the error for a MB of size 8x8 + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" +#include "armCOMM.h" + +/** + * Function: omxVCCOMM_ComputeTextureErrorBlock (6.1.4.1.2) + * + * Description: + * Computes the texture error of the block. + * + * Input Arguments: + * + * pSrc - pointer to the source plane. This should be aligned on an 8-byte + * boundary. + * srcStep - step of the source plane + * pSrcRef - pointer to the reference buffer, an 8x8 block. This should be + * aligned on an 8-byte boundary. + * + * Output Arguments: + * + * pDst - pointer to the destination buffer, an 8x8 block. This should be + * aligned on an 8-byte boundary. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments: + * - At least one of the following pointers is NULL: + * pSrc, pSrcRef, pDst. + * - pSrc is not 8-byte aligned. + * - SrcStep <= 0 or srcStep is not a multiple of 8. + * - pSrcRef is not 8-byte aligned. + * - pDst is not 8-byte aligned + * + */ + +OMXResult omxVCCOMM_ComputeTextureErrorBlock( + const OMX_U8 *pSrc, + OMX_INT srcStep, + const OMX_U8 *pSrcRef, + OMX_S16 * pDst +) +{ + + OMX_INT x, y, count; + + /* Argument error checks */ + armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pSrcRef == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(!armIs8ByteAligned(pSrc), OMX_Sts_BadArgErr); + armRetArgErrIf(!armIs8ByteAligned(pSrcRef), OMX_Sts_BadArgErr); + armRetArgErrIf(!armIs8ByteAligned(pDst), OMX_Sts_BadArgErr); + armRetArgErrIf((srcStep <= 0) || (srcStep & 7), OMX_Sts_BadArgErr); + + /* Calculate the error block */ + for (y = 0, count = 0; + y < 8; + y++, pSrc += srcStep) + { + for (x = 0; x < 8; x++, count++) + { + pDst[count] = pSrc[x] - pSrcRef[count]; + } + } + + return OMX_Sts_NoErr; + +} + +/* End of file */ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_ComputeTextureErrorBlock_SAD.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_ComputeTextureErrorBlock_SAD.c new file mode 100644 index 0000000000000000000000000000000000000000..c4731aadb8064cc47ead62c216381f4bea6aaab3 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_ComputeTextureErrorBlock_SAD.c @@ -0,0 +1,93 @@ +/** + * + * File Name: omxVCCOMM_ComputeTextureErrorBlock_SAD.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Contains module computing the error for a MB of size 8x8 + * + */ +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" + + +/** + * Function: omxVCCOMM_ComputeTextureErrorBlock_SAD (6.1.4.1.1) + * + * Description: + * Computes texture error of the block; also returns SAD. + * + * Input Arguments: + * + * pSrc - pointer to the source plane; must be aligned on an 8-byte + * boundary. + * srcStep - step of the source plane + * pSrcRef - pointer to the reference buffer, an 8x8 block; must be aligned + * on an 8-byte boundary. + * + * Output Arguments: + * + * pDst - pointer to the destination buffer, an 8x8 block; must be aligned + * on an 8-byte boundary. + * pDstSAD - pointer to the Sum of Absolute Differences (SAD) value + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments + * - At least one of the following + * pointers is NULL: pSrc, pSrcRef, pDst and pDstSAD. + * - pSrc is not 8-byte aligned. + * - SrcStep <= 0 or srcStep is not a multiple of 8. + * - pSrcRef is not 8-byte aligned. + * - pDst is not 8-byte aligned. + * + */ + +OMXResult omxVCCOMM_ComputeTextureErrorBlock_SAD( + const OMX_U8 *pSrc, + OMX_INT srcStep, + const OMX_U8 *pSrcRef, + OMX_S16 * pDst, + OMX_INT *pDstSAD +) +{ + + OMX_INT x, y, count; + + /* Argument error checks */ + armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pSrcRef == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pDstSAD == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(!armIs8ByteAligned(pSrc), OMX_Sts_BadArgErr); + armRetArgErrIf(!armIs8ByteAligned(pSrcRef), OMX_Sts_BadArgErr); + armRetArgErrIf(!armIs8ByteAligned(pDst), OMX_Sts_BadArgErr); + armRetArgErrIf((srcStep <= 0) || (srcStep & 7), OMX_Sts_BadArgErr); + + /* Calculate the error block */ + for (y = 0, count = 0, *pDstSAD = 0; + y < 8; + y++, pSrc += srcStep) + { + for (x = 0; x < 8; x++, count++) + { + pDst[count] = pSrc[x] - pSrcRef[count]; + *pDstSAD += armAbs(pDst[count]); + } + } + + return OMX_Sts_NoErr; + +} + +/* End of file */ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_Copy16x16.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_Copy16x16.c new file mode 100644 index 0000000000000000000000000000000000000000..48570242227b1a94f52d7afa3aaa7a812fe21d47 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_Copy16x16.c @@ -0,0 +1,79 @@ +/** + * + * File Name: omxVCCOMM_Copy16x16.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * Description: + * MPEG4 16x16 Copy module + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" + +/** + * Function: omxVCCOMM_Copy16x16 (6.1.3.3.2) + * + * Description: + * Copies the reference 16x16 macroblock to the current macroblock. + * + * Input Arguments: + * + * pSrc - pointer to the reference macroblock in the source frame; must be + * aligned on a 16-byte boundary. + * step - distance between the starts of consecutive lines in the reference + * frame, in bytes; must be a multiple of 16 and must be larger + * than or equal to 16. + * + * Output Arguments: + * + * pDst - pointer to the destination macroblock; must be aligned on a + * 16-byte boundary. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned under any of the following + * conditions: + * - one or more of the following pointers is NULL: pSrc, pDst + * - one or more of the following pointers is not aligned on a 16-byte + * boundary: pSrc, pDst + * - step <16 or step is not a multiple of 16. + * + */ + +OMXResult omxVCCOMM_Copy16x16( + const OMX_U8 *pSrc, + OMX_U8 *pDst, + OMX_INT step) + { + /* Definitions and Initializations*/ + + OMX_INT count,index, x, y; + + /* Argument error checks */ + armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(!armIs16ByteAligned(pSrc), OMX_Sts_BadArgErr); + armRetArgErrIf(!armIs16ByteAligned(pDst), OMX_Sts_BadArgErr); + armRetArgErrIf(((step < 16) || (step % 16)), OMX_Sts_BadArgErr); + + + /* Copying the ref 16x16 blk to the curr blk */ + for (y = 0, count = 0, index = 0; y < 16; y++, count = count + step - 16) + { + for (x = 0; x < 16; x++, count++, index++) + { + pDst[index] = pSrc[count]; + } + } + return OMX_Sts_NoErr; + } diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_Copy8x8.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_Copy8x8.c new file mode 100644 index 0000000000000000000000000000000000000000..a4f9dde674f57c79d2912630e63c02d18f06e584 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_Copy8x8.c @@ -0,0 +1,79 @@ +/** + * + * File Name: omxVCCOMM_Copy8x8.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * Description: + * MPEG4 8x8 Copy module + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" + +/** + * Function: omxVCCOMM_Copy8x8 (6.1.3.3.1) + * + * Description: + * Copies the reference 8x8 block to the current block. + * + * Input Arguments: + * + * pSrc - pointer to the reference block in the source frame; must be + * aligned on an 8-byte boundary. + * step - distance between the starts of consecutive lines in the reference + * frame, in bytes; must be a multiple of 8 and must be larger than + * or equal to 8. + * + * Output Arguments: + * + * pDst - pointer to the destination block; must be aligned on an 8-byte + * boundary. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned under any of the following + * conditions: + * - one or more of the following pointers is NULL: pSrc, pDst + * - one or more of the following pointers is not aligned on an 8-byte + * boundary: pSrc, pDst + * - step <8 or step is not a multiple of 8. + * + */ + +OMXResult omxVCCOMM_Copy8x8( + const OMX_U8 *pSrc, + OMX_U8 *pDst, + OMX_INT step) + { + /* Definitions and Initializations*/ + + OMX_INT count,index, x, y; + + /* Argument error checks */ + armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(!armIs8ByteAligned(pSrc), OMX_Sts_BadArgErr); + armRetArgErrIf(!armIs8ByteAligned(pDst), OMX_Sts_BadArgErr); + armRetArgErrIf(((step < 8) || (step % 8)), OMX_Sts_BadArgErr); + + + /* Copying the ref 8x8 blk to the curr blk */ + for (y = 0, count = 0, index = 0; y < 8; y++, count = count + step - 8) + { + for (x = 0; x < 8; x++, count++, index++) + { + pDst[index] = pSrc[count]; + } + } + return OMX_Sts_NoErr; + } diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_ExpandFrame_I.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_ExpandFrame_I.c new file mode 100644 index 0000000000000000000000000000000000000000..9536df75bb0ce81055bb262e1114e6cd94b60648 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_ExpandFrame_I.c @@ -0,0 +1,126 @@ +/** + * + * File Name: omxVCCOMM_ExpandFrame_I.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * Description: + * This function will Expand Frame boundary pixels into Plane + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" + +/** + * Function: omxVCCOMM_ExpandFrame_I (6.1.3.2.1) + * + * Description: + * This function expands a reconstructed frame in-place. The unexpanded + * source frame should be stored in a plane buffer with sufficient space + * pre-allocated for edge expansion, and the input frame should be located in + * the plane buffer center. This function executes the pixel expansion by + * replicating source frame edge pixel intensities in the empty pixel + * locations (expansion region) between the source frame edge and the plane + * buffer edge. The width/height of the expansion regions on the + * horizontal/vertical edges is controlled by the parameter iExpandPels. + * + * Input Arguments: + * + * pSrcDstPlane - pointer to the top-left corner of the frame to be + * expanded; must be aligned on an 8-byte boundary. + * iFrameWidth - frame width; must be a multiple of 8. + * iFrameHeight -frame height; must be a multiple of 8. + * iExpandPels - number of pixels to be expanded in the horizontal and + * vertical directions; must be a multiple of 8. + * iPlaneStep - distance, in bytes, between the start of consecutive lines + * in the plane buffer; must be larger than or equal to + * (iFrameWidth + 2 * iExpandPels). + * + * Output Arguments: + * + * pSrcDstPlane -Pointer to the top-left corner of the frame (NOT the + * top-left corner of the plane); must be aligned on an 8-byte + * boundary. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned under any of the following + * conditions: + * - pSrcDstPlane is NULL. + * - pSrcDstPlane is not aligned on an 8-byte boundary. + * - one of the following parameters is either equal to zero or is a + * non-multiple of 8: iFrameHeight, iFrameWidth, iPlaneStep, or + * iExpandPels. + * - iPlaneStep < (iFrameWidth + 2 * iExpandPels). + * + */ +OMXResult omxVCCOMM_ExpandFrame_I( + OMX_U8* pSrcDstPlane, + OMX_U32 iFrameWidth, + OMX_U32 iFrameHeight, + OMX_U32 iExpandPels, + OMX_U32 iPlaneStep +) +{ + OMX_INT x, y; + OMX_U8* pLeft; + OMX_U8* pRight; + OMX_U8* pTop; + OMX_U8* pBottom; + + /* check for argument error */ + armRetArgErrIf(pSrcDstPlane == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(armNot8ByteAligned(pSrcDstPlane), OMX_Sts_BadArgErr) + armRetArgErrIf(iFrameWidth == 0 || iFrameWidth & 7, OMX_Sts_BadArgErr) + armRetArgErrIf(iFrameHeight== 0 || iFrameHeight & 7, OMX_Sts_BadArgErr) + armRetArgErrIf(iExpandPels == 0 || iExpandPels & 7, OMX_Sts_BadArgErr) + armRetArgErrIf(iPlaneStep == 0 || iPlaneStep & 7, OMX_Sts_BadArgErr) + armRetArgErrIf(iPlaneStep < (iFrameWidth + 2 * iExpandPels), + OMX_Sts_BadArgErr) + + /* Top and Bottom */ + pTop = pSrcDstPlane - (iExpandPels * iPlaneStep); + pBottom = pSrcDstPlane + (iFrameHeight * iPlaneStep); + + for (y = 0; y < (OMX_INT)iExpandPels; y++) + { + for (x = 0; x < (OMX_INT)iFrameWidth; x++) + { + pTop [y * iPlaneStep + x] = + pSrcDstPlane [x]; + pBottom [y * iPlaneStep + x] = + pSrcDstPlane [(iFrameHeight - 1) * iPlaneStep + x]; + } + } + + /* Left, Right and Corners */ + pLeft = pSrcDstPlane - iExpandPels; + pRight = pSrcDstPlane + iFrameWidth; + + for (y = -(OMX_INT)iExpandPels; y < (OMX_INT)(iFrameHeight + iExpandPels); y++) + { + for (x = 0; x < (OMX_INT)iExpandPels; x++) + { + pLeft [y * iPlaneStep + x] = + pSrcDstPlane [y * iPlaneStep + 0]; + pRight [y * iPlaneStep + x] = + pSrcDstPlane [y * iPlaneStep + (iFrameWidth - 1)]; + } + } + + return OMX_Sts_NoErr; +} + +/***************************************************************************** + * END OF FILE + *****************************************************************************/ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_LimitMVToRect.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_LimitMVToRect.c new file mode 100644 index 0000000000000000000000000000000000000000..af045829ab87493c9ee5de3a638dc2f19578fe4a --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_LimitMVToRect.c @@ -0,0 +1,81 @@ +/** + * + * File Name: omxVCCOMM_LimitMVToRect.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Contains module for limiting the MV + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" + +/** + * Function: omxVCCOMM_LimitMVToRect (6.1.4.1.3) + * + * Description: + * Limits the motion vector associated with the current block/macroblock to + * prevent the motion compensated block/macroblock from moving outside a + * bounding rectangle as shown in Figure 6-1. + * + * Input Arguments: + * + * pSrcMV - pointer to the motion vector associated with the current block + * or macroblock + * pRectVOPRef - pointer to the bounding rectangle + * Xcoord, Ycoord - coordinates of the current block or macroblock + * size - size of the current block or macroblock; must be equal to 8 or + * 16. + * + * Output Arguments: + * + * pDstMV - pointer to the limited motion vector + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments. Returned if one or more of the + * following conditions is true: + * - at least one of the following pointers is NULL: + * pSrcMV, pDstMV, or pRectVOPRef. + * - size is not equal to either 8 or 16. + * - the width or height of the bounding rectangle is less than + * twice the block size. + */ +OMXResult omxVCCOMM_LimitMVToRect( + const OMXVCMotionVector * pSrcMV, + OMXVCMotionVector *pDstMV, + const OMXRect * pRectVOPRef, + OMX_INT Xcoord, + OMX_INT Ycoord, + OMX_INT size +) +{ + /* Argument error checks */ + armRetArgErrIf(pSrcMV == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pDstMV == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pRectVOPRef == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf((size != 8) && (size != 16), OMX_Sts_BadArgErr); + armRetArgErrIf((pRectVOPRef->width < (2* size)), OMX_Sts_BadArgErr); + armRetArgErrIf((pRectVOPRef->height < (2* size)), OMX_Sts_BadArgErr); + + pDstMV->dx = armMin (armMax (pSrcMV->dx, 2*pRectVOPRef->x - Xcoord), + (2*pRectVOPRef->x + pRectVOPRef->width - Xcoord - size)); + pDstMV->dy = armMin (armMax (pSrcMV->dy, 2*pRectVOPRef->y - Ycoord), + (2*pRectVOPRef->y + pRectVOPRef->height - Ycoord - size)); + + + return OMX_Sts_NoErr; +} + +/* End of file */ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_SAD_16x.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_SAD_16x.c new file mode 100644 index 0000000000000000000000000000000000000000..0f0cedb806f762dbf972fb1ef69892d143c92c82 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_SAD_16x.c @@ -0,0 +1,80 @@ +/** + * + * File Name: omxVCCOMM_SAD_16x.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * Description: + * This function will calculate SAD for 16x16 and 16x8 blocks + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armVC.h" +#include "armCOMM.h" + +/** + * Function: omxVCCOMM_SAD_16x (6.1.4.1.4) + * + * Description: + * This function calculates the SAD for 16x16 and 16x8 blocks. + * + * Input Arguments: + * + * pSrcOrg - Pointer to the original block; must be aligned on a 16-byte + * boundary. + * iStepOrg - Step of the original block buffer + * pSrcRef - Pointer to the reference block + * iStepRef - Step of the reference block buffer + * iHeight - Height of the block + * + * Output Arguments: + * + * pDstSAD - Pointer of result SAD + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments. Returned if one or more of the + * following conditions is true: + * - at least one of the following pointers is NULL: + * pSrcOrg, pDstSAD, or pSrcRef + * - pSrcOrg is not 16-byte aligned. + * - iStepOrg <= 0 or iStepOrg is not a multiple of 16 + * - iStepRef <= 0 or iStepRef is not a multiple of 16 + * - iHeight is not 8 or 16 + * + */ +OMXResult omxVCCOMM_SAD_16x( + const OMX_U8* pSrcOrg, + OMX_U32 iStepOrg, + const OMX_U8* pSrcRef, + OMX_U32 iStepRef, + OMX_S32* pDstSAD, + OMX_U32 iHeight +) +{ + /* check for argument error */ + armRetArgErrIf(pSrcOrg == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(pSrcRef == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(pDstSAD == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf((iHeight != 16) && (iHeight != 8), OMX_Sts_BadArgErr) + armRetArgErrIf(armNot16ByteAligned(pSrcOrg), OMX_Sts_BadArgErr) + armRetArgErrIf((iStepOrg == 0) || (iStepOrg & 15), OMX_Sts_BadArgErr) + armRetArgErrIf((iStepRef == 0) || (iStepRef & 15), OMX_Sts_BadArgErr) + + return armVCCOMM_SAD + (pSrcOrg, iStepOrg, pSrcRef, iStepRef, pDstSAD, iHeight, 16); +} + +/***************************************************************************** + * END OF FILE + *****************************************************************************/ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_SAD_8x.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_SAD_8x.c new file mode 100644 index 0000000000000000000000000000000000000000..1421d9940964b6e54c5b9cc9bc5026ce19145c72 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_SAD_8x.c @@ -0,0 +1,80 @@ +/** + * + * File Name: omxVCCOMM_SAD_8x.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * Description: + * This function will calculate SAD for 8x16, 8x8, 8x4 blocks + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armVC.h" +#include "armCOMM.h" + +/** + * Function: omxVCCOMM_SAD_8x (6.1.4.1.5) + * + * Description: + * This function calculates the SAD for 8x16, 8x8, 8x4 blocks. + * + * Input Arguments: + * + * pSrcOrg - Pointer to the original block; must be aligned on a 8-byte + * boundary. + * iStepOrg - Step of the original block buffer + * pSrcRef - Pointer to the reference block + * iStepRef - Step of the reference block buffer + * iHeight - Height of the block + * + * Output Arguments: + * + * pDstSAD -Pointer of result SAD + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments. Returned if one or more of the + * following conditions is true: + * - at least one of the following pointers is NULL: + * pSrcOrg, pDstSAD, or pSrcRef + * - pSrcOrg is not 8-byte aligned. + * - iStepOrg <= 0 or iStepOrg is not a multiple of 8 + * - iStepRef <= 0 or iStepRef is not a multiple of 8 + * - iHeight is not 4, 8 or 16 + * + */ +OMXResult omxVCCOMM_SAD_8x( + const OMX_U8* pSrcOrg, + OMX_U32 iStepOrg, + const OMX_U8* pSrcRef, + OMX_U32 iStepRef, + OMX_S32* pDstSAD, + OMX_U32 iHeight +) +{ + /* check for argument error */ + armRetArgErrIf(pSrcOrg == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(pSrcRef == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(pDstSAD == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf((iHeight != 16) && (iHeight != 8) && (iHeight != 4), OMX_Sts_BadArgErr) + armRetArgErrIf(armNot8ByteAligned(pSrcOrg), OMX_Sts_BadArgErr) + armRetArgErrIf((iStepOrg == 0) || (iStepOrg & 7), OMX_Sts_BadArgErr) + armRetArgErrIf((iStepRef == 0) || (iStepRef & 7), OMX_Sts_BadArgErr) + + return armVCCOMM_SAD + (pSrcOrg, iStepOrg, pSrcRef, iStepRef, pDstSAD, iHeight, 8); +} + +/***************************************************************************** + * END OF FILE + *****************************************************************************/ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/api/armVCM4P10_CAVLCTables.h b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/api/armVCM4P10_CAVLCTables.h new file mode 100644 index 0000000000000000000000000000000000000000..8d18a8f4f750d654300872b00b7ce3c929403f4a --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/api/armVCM4P10_CAVLCTables.h @@ -0,0 +1,34 @@ +/* ---------------------------------------------------------------- + * + * + * File Name: armVCM4P10_CAVLCTables.h + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * ---------------------------------------------------------------- + * File: armVCM4P10_CAVLCTables.h + * ---------------------------------------------------------------- + * + * Header file for ARM implementation of OpenMAX VCM4P10 + * + */ + +#ifndef ARMVCM4P10_CAVLCTABLES_H +#define ARMVCM4P10_CAVLCTABLES_H + +/* CAVLC tables */ + +extern const OMX_U8 armVCM4P10_CAVLCTrailingOnes[62]; +extern const OMX_U8 armVCM4P10_CAVLCTotalCoeff[62]; +extern const ARM_VLC32 *armVCM4P10_CAVLCCoeffTokenTables[5]; +extern const ARM_VLC32 armVCM4P10_CAVLCLevelPrefix[17]; +extern const ARM_VLC32 *armVCM4P10_CAVLCTotalZeroTables[15]; +extern const ARM_VLC32 *armVCM4P10_CAVLCTotalZeros2x2Tables[3]; +extern const ARM_VLC32 *armVCM4P10_CAVLCRunBeforeTables[7]; + +#endif diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_CAVLCTables.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_CAVLCTables.c new file mode 100644 index 0000000000000000000000000000000000000000..f4e36ad6cf985ea8a27d391d076da2f12df45387 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_CAVLCTables.c @@ -0,0 +1,703 @@ +/* ---------------------------------------------------------------- + * + * + * File Name: armVCM4P10_CAVLCTables.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * CAVLC tables for H.264 + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM_Bitstream.h" +#include "armVC.h" +#include "armVCM4P10_CAVLCTables.h" + +/* Tables mapping a code to TrailingOnes and TotalCoeff */ + +const OMX_U8 armVCM4P10_CAVLCTrailingOnes[62] = { + 0, + 0, 1, + 0, 1, 2, + 0, 1, 2, 3, + 0, 1, 2, 3, + 0, 1, 2, 3, + 0, 1, 2, 3, + 0, 1, 2, 3, + 0, 1, 2, 3, + 0, 1, 2, 3, + 0, 1, 2, 3, + 0, 1, 2, 3, + 0, 1, 2, 3, + 0, 1, 2, 3, + 0, 1, 2, 3, + 0, 1, 2, 3, + 0, 1, 2, 3 +}; + +const OMX_U8 armVCM4P10_CAVLCTotalCoeff[62] = { + 0, + 1, 1, + 2, 2, 2, + 3, 3, 3, 3, + 4, 4, 4, 4, + 5, 5, 5, 5, + 6, 6, 6, 6, + 7, 7, 7, 7, + 8, 8, 8, 8, + 9, 9, 9, 9, + 10, 10, 10, 10, + 11, 11, 11, 11, + 12, 12, 12, 12, + 13, 13, 13, 13, + 14, 14, 14, 14, + 15, 15, 15, 15, + 16, 16, 16, 16 +}; + +static const ARM_VLC32 armVCM4P10_CAVLCCoeffToken0[63] = { + { 1, 0x0001 }, + { 6, 0x0005 }, + { 2, 0x0001 }, + { 8, 0x0007 }, + { 6, 0x0004 }, + { 3, 0x0001 }, + { 9, 0x0007 }, + { 8, 0x0006 }, + { 7, 0x0005 }, + { 5, 0x0003 }, + { 10, 0x0007 }, + { 9, 0x0006 }, + { 8, 0x0005 }, + { 6, 0x0003 }, + { 11, 0x0007 }, + { 10, 0x0006 }, + { 9, 0x0005 }, + { 7, 0x0004 }, + { 13, 0x000f }, + { 11, 0x0006 }, + { 10, 0x0005 }, + { 8, 0x0004 }, + { 13, 0x000b }, + { 13, 0x000e }, + { 11, 0x0005 }, + { 9, 0x0004 }, + { 13, 0x0008 }, + { 13, 0x000a }, + { 13, 0x000d }, + { 10, 0x0004 }, + { 14, 0x000f }, + { 14, 0x000e }, + { 13, 0x0009 }, + { 11, 0x0004 }, + { 14, 0x000b }, + { 14, 0x000a }, + { 14, 0x000d }, + { 13, 0x000c }, + { 15, 0x000f }, + { 15, 0x000e }, + { 14, 0x0009 }, + { 14, 0x000c }, + { 15, 0x000b }, + { 15, 0x000a }, + { 15, 0x000d }, + { 14, 0x0008 }, + { 16, 0x000f }, + { 15, 0x0001 }, + { 15, 0x0009 }, + { 15, 0x000c }, + { 16, 0x000b }, + { 16, 0x000e }, + { 16, 0x000d }, + { 15, 0x0008 }, + { 16, 0x0007 }, + { 16, 0x000a }, + { 16, 0x0009 }, + { 16, 0x000c }, + { 16, 0x0004 }, + { 16, 0x0006 }, + { 16, 0x0005 }, + { 16, 0x0008 }, + { 0, 0x0000 } +}; + +static const ARM_VLC32 armVCM4P10_CAVLCCoeffToken1[63] = { + { 2, 0x0003 }, + { 6, 0x000b }, + { 2, 0x0002 }, + { 6, 0x0007 }, + { 5, 0x0007 }, + { 3, 0x0003 }, + { 7, 0x0007 }, + { 6, 0x000a }, + { 6, 0x0009 }, + { 4, 0x0005 }, + { 8, 0x0007 }, + { 6, 0x0006 }, + { 6, 0x0005 }, + { 4, 0x0004 }, + { 8, 0x0004 }, + { 7, 0x0006 }, + { 7, 0x0005 }, + { 5, 0x0006 }, + { 9, 0x0007 }, + { 8, 0x0006 }, + { 8, 0x0005 }, + { 6, 0x0008 }, + { 11, 0x000f }, + { 9, 0x0006 }, + { 9, 0x0005 }, + { 6, 0x0004 }, + { 11, 0x000b }, + { 11, 0x000e }, + { 11, 0x000d }, + { 7, 0x0004 }, + { 12, 0x000f }, + { 11, 0x000a }, + { 11, 0x0009 }, + { 9, 0x0004 }, + { 12, 0x000b }, + { 12, 0x000e }, + { 12, 0x000d }, + { 11, 0x000c }, + { 12, 0x0008 }, + { 12, 0x000a }, + { 12, 0x0009 }, + { 11, 0x0008 }, + { 13, 0x000f }, + { 13, 0x000e }, + { 13, 0x000d }, + { 12, 0x000c }, + { 13, 0x000b }, + { 13, 0x000a }, + { 13, 0x0009 }, + { 13, 0x000c }, + { 13, 0x0007 }, + { 14, 0x000b }, + { 13, 0x0006 }, + { 13, 0x0008 }, + { 14, 0x0009 }, + { 14, 0x0008 }, + { 14, 0x000a }, + { 13, 0x0001 }, + { 14, 0x0007 }, + { 14, 0x0006 }, + { 14, 0x0005 }, + { 14, 0x0004 }, + { 0, 0x0000 } +}; + +static const ARM_VLC32 armVCM4P10_CAVLCCoeffToken2[63] = { + { 4, 0x000f }, + { 6, 0x000f }, + { 4, 0x000e }, + { 6, 0x000b }, + { 5, 0x000f }, + { 4, 0x000d }, + { 6, 0x0008 }, + { 5, 0x000c }, + { 5, 0x000e }, + { 4, 0x000c }, + { 7, 0x000f }, + { 5, 0x000a }, + { 5, 0x000b }, + { 4, 0x000b }, + { 7, 0x000b }, + { 5, 0x0008 }, + { 5, 0x0009 }, + { 4, 0x000a }, + { 7, 0x0009 }, + { 6, 0x000e }, + { 6, 0x000d }, + { 4, 0x0009 }, + { 7, 0x0008 }, + { 6, 0x000a }, + { 6, 0x0009 }, + { 4, 0x0008 }, + { 8, 0x000f }, + { 7, 0x000e }, + { 7, 0x000d }, + { 5, 0x000d }, + { 8, 0x000b }, + { 8, 0x000e }, + { 7, 0x000a }, + { 6, 0x000c }, + { 9, 0x000f }, + { 8, 0x000a }, + { 8, 0x000d }, + { 7, 0x000c }, + { 9, 0x000b }, + { 9, 0x000e }, + { 8, 0x0009 }, + { 8, 0x000c }, + { 9, 0x0008 }, + { 9, 0x000a }, + { 9, 0x000d }, + { 8, 0x0008 }, + { 10, 0x000d }, + { 9, 0x0007 }, + { 9, 0x0009 }, + { 9, 0x000c }, + { 10, 0x0009 }, + { 10, 0x000c }, + { 10, 0x000b }, + { 10, 0x000a }, + { 10, 0x0005 }, + { 10, 0x0008 }, + { 10, 0x0007 }, + { 10, 0x0006 }, + { 10, 0x0001 }, + { 10, 0x0004 }, + { 10, 0x0003 }, + { 10, 0x0002 }, + { 0, 0x0000 } +}; + +static const ARM_VLC32 armVCM4P10_CAVLCCoeffToken3[63] = { + { 6, 0x0003 }, + { 6, 0x0000 }, + { 6, 0x0001 }, + { 6, 0x0004 }, + { 6, 0x0005 }, + { 6, 0x0006 }, + { 6, 0x0008 }, + { 6, 0x0009 }, + { 6, 0x000a }, + { 6, 0x000b }, + { 6, 0x000c }, + { 6, 0x000d }, + { 6, 0x000e }, + { 6, 0x000f }, + { 6, 0x0010 }, + { 6, 0x0011 }, + { 6, 0x0012 }, + { 6, 0x0013 }, + { 6, 0x0014 }, + { 6, 0x0015 }, + { 6, 0x0016 }, + { 6, 0x0017 }, + { 6, 0x0018 }, + { 6, 0x0019 }, + { 6, 0x001a }, + { 6, 0x001b }, + { 6, 0x001c }, + { 6, 0x001d }, + { 6, 0x001e }, + { 6, 0x001f }, + { 6, 0x0020 }, + { 6, 0x0021 }, + { 6, 0x0022 }, + { 6, 0x0023 }, + { 6, 0x0024 }, + { 6, 0x0025 }, + { 6, 0x0026 }, + { 6, 0x0027 }, + { 6, 0x0028 }, + { 6, 0x0029 }, + { 6, 0x002a }, + { 6, 0x002b }, + { 6, 0x002c }, + { 6, 0x002d }, + { 6, 0x002e }, + { 6, 0x002f }, + { 6, 0x0030 }, + { 6, 0x0031 }, + { 6, 0x0032 }, + { 6, 0x0033 }, + { 6, 0x0034 }, + { 6, 0x0035 }, + { 6, 0x0036 }, + { 6, 0x0037 }, + { 6, 0x0038 }, + { 6, 0x0039 }, + { 6, 0x003a }, + { 6, 0x003b }, + { 6, 0x003c }, + { 6, 0x003d }, + { 6, 0x003e }, + { 6, 0x003f }, + { 0, 0x0000 } +}; + +static const ARM_VLC32 armVCM4P10_CAVLCCoeffToken4[15] = { + { 2, 0x0001 }, + { 6, 0x0007 }, + { 1, 0x0001 }, + { 6, 0x0004 }, + { 6, 0x0006 }, + { 3, 0x0001 }, + { 6, 0x0003 }, + { 7, 0x0003 }, + { 7, 0x0002 }, + { 6, 0x0005 }, + { 6, 0x0002 }, + { 8, 0x0003 }, + { 8, 0x0002 }, + { 7, 0x0000 }, + { 0, 0x0000 } +}; + + +const ARM_VLC32 *armVCM4P10_CAVLCCoeffTokenTables[5] = { + armVCM4P10_CAVLCCoeffToken0, + armVCM4P10_CAVLCCoeffToken1, + armVCM4P10_CAVLCCoeffToken2, + armVCM4P10_CAVLCCoeffToken3, + armVCM4P10_CAVLCCoeffToken4 +}; + +/* Table for level_prefix */ + +const ARM_VLC32 armVCM4P10_CAVLCLevelPrefix[17] = { + { 1, 1}, + { 2, 1}, + { 3, 1}, + { 4, 1}, + { 5, 1}, + { 6, 1}, + { 7, 1}, + { 8, 1}, + { 9, 1}, + { 10, 1}, + { 11, 1}, + { 12, 1}, + { 13, 1}, + { 14, 1}, + { 15, 1}, + { 16, 1}, + { 0, 0} +}; + +/* Tables for total_zeros */ + +static const ARM_VLC32 armVCM4P10_CAVLCTotalZeros1[17] = { + { 1, 0x0001 }, + { 3, 0x0003 }, + { 3, 0x0002 }, + { 4, 0x0003 }, + { 4, 0x0002 }, + { 5, 0x0003 }, + { 5, 0x0002 }, + { 6, 0x0003 }, + { 6, 0x0002 }, + { 7, 0x0003 }, + { 7, 0x0002 }, + { 8, 0x0003 }, + { 8, 0x0002 }, + { 9, 0x0003 }, + { 9, 0x0002 }, + { 9, 0x0001 }, + { 0, 0x0000 } +}; + +static const ARM_VLC32 armVCM4P10_CAVLCTotalZeros2[16] = { + { 3, 0x0007 }, + { 3, 0x0006 }, + { 3, 0x0005 }, + { 3, 0x0004 }, + { 3, 0x0003 }, + { 4, 0x0005 }, + { 4, 0x0004 }, + { 4, 0x0003 }, + { 4, 0x0002 }, + { 5, 0x0003 }, + { 5, 0x0002 }, + { 6, 0x0003 }, + { 6, 0x0002 }, + { 6, 0x0001 }, + { 6, 0x0000 }, + { 0, 0x0000 } +}; + +static const ARM_VLC32 armVCM4P10_CAVLCTotalZeros3[15] = { + { 4, 0x0005 }, + { 3, 0x0007 }, + { 3, 0x0006 }, + { 3, 0x0005 }, + { 4, 0x0004 }, + { 4, 0x0003 }, + { 3, 0x0004 }, + { 3, 0x0003 }, + { 4, 0x0002 }, + { 5, 0x0003 }, + { 5, 0x0002 }, + { 6, 0x0001 }, + { 5, 0x0001 }, + { 6, 0x0000 }, + { 0, 0x0000 } +}; + +static const ARM_VLC32 armVCM4P10_CAVLCTotalZeros4[14] = { + { 5, 0x0003 }, + { 3, 0x0007 }, + { 4, 0x0005 }, + { 4, 0x0004 }, + { 3, 0x0006 }, + { 3, 0x0005 }, + { 3, 0x0004 }, + { 4, 0x0003 }, + { 3, 0x0003 }, + { 4, 0x0002 }, + { 5, 0x0002 }, + { 5, 0x0001 }, + { 5, 0x0000 }, + { 0, 0x0000 } +}; + +static const ARM_VLC32 armVCM4P10_CAVLCTotalZeros5[13] = { + { 4, 0x0005 }, + { 4, 0x0004 }, + { 4, 0x0003 }, + { 3, 0x0007 }, + { 3, 0x0006 }, + { 3, 0x0005 }, + { 3, 0x0004 }, + { 3, 0x0003 }, + { 4, 0x0002 }, + { 5, 0x0001 }, + { 4, 0x0001 }, + { 5, 0x0000 }, + { 0, 0x0000 } +}; + +static const ARM_VLC32 armVCM4P10_CAVLCTotalZeros6[12] = { + { 6, 0x0001 }, + { 5, 0x0001 }, + { 3, 0x0007 }, + { 3, 0x0006 }, + { 3, 0x0005 }, + { 3, 0x0004 }, + { 3, 0x0003 }, + { 3, 0x0002 }, + { 4, 0x0001 }, + { 3, 0x0001 }, + { 6, 0x0000 }, + { 0, 0x0000 } +}; + +static const ARM_VLC32 armVCM4P10_CAVLCTotalZeros7[11] = { + { 6, 0x0001 }, + { 5, 0x0001 }, + { 3, 0x0005 }, + { 3, 0x0004 }, + { 3, 0x0003 }, + { 2, 0x0003 }, + { 3, 0x0002 }, + { 4, 0x0001 }, + { 3, 0x0001 }, + { 6, 0x0000 }, + { 0, 0x0000 } +}; + +static const ARM_VLC32 armVCM4P10_CAVLCTotalZeros8[10] = { + { 6, 0x0001 }, + { 4, 0x0001 }, + { 5, 0x0001 }, + { 3, 0x0003 }, + { 2, 0x0003 }, + { 2, 0x0002 }, + { 3, 0x0002 }, + { 3, 0x0001 }, + { 6, 0x0000 }, + { 0, 0x0000 } +}; + +static const ARM_VLC32 armVCM4P10_CAVLCTotalZeros9[9] = { + { 6, 0x0001 }, + { 6, 0x0000 }, + { 4, 0x0001 }, + { 2, 0x0003 }, + { 2, 0x0002 }, + { 3, 0x0001 }, + { 2, 0x0001 }, + { 5, 0x0001 }, + { 0, 0x0000 } +}; + +static const ARM_VLC32 armVCM4P10_CAVLCTotalZeros10[8] = { + { 5, 0x0001 }, + { 5, 0x0000 }, + { 3, 0x0001 }, + { 2, 0x0003 }, + { 2, 0x0002 }, + { 2, 0x0001 }, + { 4, 0x0001 }, + { 0, 0x0000 } +}; + +static const ARM_VLC32 armVCM4P10_CAVLCTotalZeros11[7] = { + { 4, 0x0000 }, + { 4, 0x0001 }, + { 3, 0x0001 }, + { 3, 0x0002 }, + { 1, 0x0001 }, + { 3, 0x0003 }, + { 0, 0x0000 } +}; + +static const ARM_VLC32 armVCM4P10_CAVLCTotalZeros12[6] = { + { 4, 0x0000 }, + { 4, 0x0001 }, + { 2, 0x0001 }, + { 1, 0x0001 }, + { 3, 0x0001 }, + { 0, 0x0000 } +}; + +static const ARM_VLC32 armVCM4P10_CAVLCTotalZeros13[5] = { + { 3, 0x0000 }, + { 3, 0x0001 }, + { 1, 0x0001 }, + { 2, 0x0001 }, + { 0, 0x0000 } +}; + +static const ARM_VLC32 armVCM4P10_CAVLCTotalZeros14[4] = { + { 2, 0x0000 }, + { 2, 0x0001 }, + { 1, 0x0001 }, + { 0, 0x0000 } +}; + +static const ARM_VLC32 armVCM4P10_CAVLCTotalZeros15[3] = { + { 1, 0x0000 }, + { 1, 0x0001 }, + { 0, 0x0000 } +}; + +const ARM_VLC32 *armVCM4P10_CAVLCTotalZeroTables[15] = { + armVCM4P10_CAVLCTotalZeros1, + armVCM4P10_CAVLCTotalZeros2, + armVCM4P10_CAVLCTotalZeros3, + armVCM4P10_CAVLCTotalZeros4, + armVCM4P10_CAVLCTotalZeros5, + armVCM4P10_CAVLCTotalZeros6, + armVCM4P10_CAVLCTotalZeros7, + armVCM4P10_CAVLCTotalZeros8, + armVCM4P10_CAVLCTotalZeros9, + armVCM4P10_CAVLCTotalZeros10, + armVCM4P10_CAVLCTotalZeros11, + armVCM4P10_CAVLCTotalZeros12, + armVCM4P10_CAVLCTotalZeros13, + armVCM4P10_CAVLCTotalZeros14, + armVCM4P10_CAVLCTotalZeros15 +}; + +static const ARM_VLC32 armVCM4P10_CAVLCTotalZeros2x2_1[5] = { + { 1, 1 }, + { 2, 1 }, + { 3, 1 }, + { 3, 0 }, + { 0, 0 } +}; + +static const ARM_VLC32 armVCM4P10_CAVLCTotalZeros2x2_2[4] = { + { 1, 1 }, + { 2, 1 }, + { 2, 0 }, + { 0, 0 } +}; + +static const ARM_VLC32 armVCM4P10_CAVLCTotalZeros2x2_3[3] = { + { 1, 1 }, + { 1, 0 }, + { 0, 0 } +}; + +const ARM_VLC32 *armVCM4P10_CAVLCTotalZeros2x2Tables[3] = { + armVCM4P10_CAVLCTotalZeros2x2_1, + armVCM4P10_CAVLCTotalZeros2x2_2, + armVCM4P10_CAVLCTotalZeros2x2_3 +}; + + +/* Tables for run_before */ + +static const ARM_VLC32 armVCM4P10_CAVLCRunBefore1[3] = { + { 1, 0x0001 }, + { 1, 0x0000 }, + { 0, 0x0000 } +}; + +static const ARM_VLC32 armVCM4P10_CAVLCRunBefore2[4] = { + { 1, 0x0001 }, + { 2, 0x0001 }, + { 2, 0x0000 }, + { 0, 0x0000 } +}; + +static const ARM_VLC32 armVCM4P10_CAVLCRunBefore3[5] = { + { 2, 0x0003 }, + { 2, 0x0002 }, + { 2, 0x0001 }, + { 2, 0x0000 }, + { 0, 0x0000 } +}; + +static const ARM_VLC32 armVCM4P10_CAVLCRunBefore4[6] = { + { 2, 0x0003 }, + { 2, 0x0002 }, + { 2, 0x0001 }, + { 3, 0x0001 }, + { 3, 0x0000 }, + { 0, 0x0000 } +}; + +static const ARM_VLC32 armVCM4P10_CAVLCRunBefore5[7] = { + { 2, 0x0003 }, + { 2, 0x0002 }, + { 3, 0x0003 }, + { 3, 0x0002 }, + { 3, 0x0001 }, + { 3, 0x0000 }, + { 0, 0x0000 } +}; + +static const ARM_VLC32 armVCM4P10_CAVLCRunBefore6[8] = { + { 2, 0x0003 }, + { 3, 0x0000 }, + { 3, 0x0001 }, + { 3, 0x0003 }, + { 3, 0x0002 }, + { 3, 0x0005 }, + { 3, 0x0004 }, + { 0, 0x0000 } +}; + +static const ARM_VLC32 armVCM4P10_CAVLCRunBefore7[16] = { + { 3, 0x0007 }, + { 3, 0x0006 }, + { 3, 0x0005 }, + { 3, 0x0004 }, + { 3, 0x0003 }, + { 3, 0x0002 }, + { 3, 0x0001 }, + { 4, 0x0001 }, + { 5, 0x0001 }, + { 6, 0x0001 }, + { 7, 0x0001 }, + { 8, 0x0001 }, + { 9, 0x0001 }, + { 10, 0x0001 }, + { 11, 0x0001 }, + { 0, 0x0000 } +}; + +const ARM_VLC32 *armVCM4P10_CAVLCRunBeforeTables[7] = { + armVCM4P10_CAVLCRunBefore1, + armVCM4P10_CAVLCRunBefore2, + armVCM4P10_CAVLCRunBefore3, + armVCM4P10_CAVLCRunBefore4, + armVCM4P10_CAVLCRunBefore5, + armVCM4P10_CAVLCRunBefore6, + armVCM4P10_CAVLCRunBefore7 +}; diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_CompareMotionCostToMV.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_CompareMotionCostToMV.c new file mode 100644 index 0000000000000000000000000000000000000000..e4bedc26b9dc5dd54dae726c4e8e26d9800f425a --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_CompareMotionCostToMV.c @@ -0,0 +1,133 @@ +/** + * + * File Name: armVCM4P10_CompareMotionCostToMV.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Contains module for comparing motion vectors and SAD's to decide + * the best MV and SAD + * + */ + +#include "omxtypes.h" +#include "armOMX.h" + +#include "armVC.h" +#include "armCOMM.h" + +/** + * Function: armVCM4P10_ExpGolBitsUsed + * + * Description: + * Performs calculating Exp-Golomb code length for a given values + * + * Remarks: + * + * Parameters: + * [in] val Signed number for which Exp-Golomb code length has + * to be calculated + * + * Return Value: + * Returns the length of the Exp-Golomb code for val + */ + +static OMX_U16 armVCM4P10_ExpGolBitsUsed (OMX_S16 val) +{ + OMX_U16 sizeCodeNum, codeNum; + + /* Mapping val to codeNum */ + codeNum = armAbs (val); + if (val > 0) + { + codeNum = (2 * codeNum) - 1; + } + else + { + codeNum = 2 * codeNum; + } + + /* Size of the exp-golomb code */ + sizeCodeNum = (2 * armLogSize (codeNum + 1)) - 1; + + return sizeCodeNum; +} + + +/** + * Function: armVCM4P10_CompareMotionCostToMV + * + * Description: + * Performs comparision of motion vectors and Motion cost to decide the + * best MV and best MC + * + * Remarks: + * + * Parameters: + * [in] mvX x coordinate of the candidate motion vector in 1/4 pel units + * [in] mvY y coordinate of the candidate motion vector in 1/4 pel units + * [in] diffMV differential MV + * [in] candSAD Candidate SAD + * [in] bestMV Best MV, contains best MV till the previous interation. + * [in] nLamda Lamda factor; used to compute motion cost + * [in] *pBestCost Contains the current best motion cost. + * [out] *pBestCost pBestCost Motion cost will be associated with the best MV + * after judgement; + * computed as SAD+Lamda*BitsUsedByMV, if the candCost is less + * than the best cost passed then the *pBestCost will be equal to candCost + * [out] bestMV Finally will have the best MV after the judgement. + * + * Return Value: + * OMX_INT -- 1 to indicate that the current motion cost is the best + * 0 to indicate that it is NOT the best motion cost + */ + +OMX_INT armVCM4P10_CompareMotionCostToMV ( + OMX_S16 mvX, + OMX_S16 mvY, + OMXVCMotionVector diffMV, + OMX_INT candSAD, + OMXVCMotionVector *bestMV, + OMX_U32 nLamda, + OMX_S32 *pBestCost +) +{ + OMX_S32 candCost; + OMX_U16 sizeCodeNum; + + sizeCodeNum = armVCM4P10_ExpGolBitsUsed (diffMV.dx); + sizeCodeNum += armVCM4P10_ExpGolBitsUsed (diffMV.dy); + + /* Motion cost = SAD + lamda * ((bitsused(diffMVx) + (bitsused(diffMVy))*/ + candCost = candSAD + (nLamda * sizeCodeNum); + + /* Calculate candCost */ + if (candCost < *pBestCost) + { + *pBestCost = candCost; + bestMV->dx = mvX; + bestMV->dy = mvY; + return 1; + } + if (candCost > *pBestCost) + { + return 0; + } + /* shorter motion vector */ + if ( (mvX * mvX + mvY * mvY) < ((bestMV->dx * bestMV->dx) + (bestMV->dy * bestMV->dy)) ) + { + *pBestCost = candCost; + bestMV->dx = mvX; + bestMV->dy = mvY; + return 1; + } + + return 0; +} + +/*End of File*/ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_DeBlockPixel.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_DeBlockPixel.c new file mode 100644 index 0000000000000000000000000000000000000000..f4fb1d9fae48a2a09fd597b0e478488561b166a0 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_DeBlockPixel.c @@ -0,0 +1,151 @@ +/* ---------------------------------------------------------------- + * + * + * File Name: armVCM4P10_DeBlockPixel.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * H.264 luma deblock module + * + */ + +#ifdef DEBUG_ARMVCM4P10_DEBLOCKPIXEL +#undef DEBUG_ON +#define DEBUG_ON +#endif /* DEBUG_ARMVCM4P10_DEBLOCKPIXEL */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armVC.h" + +/* + * Description + * Deblock one boundary pixel + * + * Parameters: + * [in] pQ0 Pointer to pixel q0 + * [in] Step Step between pixels q0 and q1 + * [in] tC0 Edge threshold value + * [in] alpha alpha threshold value + * [in] beta beta threshold value + * [in] bS deblocking strength + * [in] ChromaFlag True for chroma blocks + * [out] pQ0 Deblocked pixels + * + */ + +void armVCM4P10_DeBlockPixel( + OMX_U8 *pQ0, /* pointer to the pixel q0 */ + int Step, /* step between pixels q0 and q1 */ + int tC0, /* edge threshold value */ + int alpha, /* alpha */ + int beta, /* beta */ + int bS, /* deblocking strength */ + int ChromaFlag +) +{ + int p3, p2, p1, p0, q0, q1, q2, q3; + int ap, aq, delta; + + if (bS==0) + { + return; + } + + p3 = pQ0[-4*Step]; + p2 = pQ0[-3*Step]; + p1 = pQ0[-2*Step]; + p0 = pQ0[-1*Step]; + q0 = pQ0[ 0*Step]; + q1 = pQ0[ 1*Step]; + q2 = pQ0[ 2*Step]; + q3 = pQ0[ 3*Step]; + + if (armAbs(p0-q0)>=alpha || armAbs(p1-p0)>=beta || armAbs(q1-q0)>=beta) + { + DEBUG_PRINTF_10("DeBlockPixel: %02x %02x %02x %02x | %02x %02x %02x %02x alpha=%d beta=%d\n", + p3, p2, p1, p0, q0, q1, q2, q3, alpha, beta); + return; + } + + ap = armAbs(p2 - p0); + aq = armAbs(q2 - q0); + + if (bS < 4) + { + int tC = tC0; + + if (ChromaFlag) + { + tC++; + } + else + { + if (ap < beta) + { + tC++; + } + if (aq < beta) + { + tC++; + } + } + + delta = (((q0-p0)<<2) + (p1-q1) + 4) >> 3; + delta = armClip(-tC, tC, delta); + + pQ0[-1*Step] = (OMX_U8)armClip(0, 255, p0 + delta); + pQ0[ 0*Step] = (OMX_U8)armClip(0, 255, q0 - delta); + + if (ChromaFlag==0 && ap>1) - (p1<<1))>>1; + delta = armClip(-tC0, tC0, delta); + pQ0[-2*Step] = (OMX_U8)(p1 + delta); + } + + if (ChromaFlag==0 && aq>1) - (q1<<1))>>1; + delta = armClip(-tC0, tC0, delta); + pQ0[ 1*Step] = (OMX_U8)(q1 + delta); + } + } + else /* bS==4 */ + { + if (ChromaFlag==0 && ap>2)+2)) + { + pQ0[-1*Step] = (OMX_U8)((p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4)>>3); + pQ0[-2*Step] = (OMX_U8)((p2 + p1 + p0 + q0 + 2)>>2); + pQ0[-3*Step] = (OMX_U8)((2*p3 + 3*p2 + p1 + p0 + q0 + 4)>>3); + } + else + { + pQ0[-1*Step] = (OMX_U8)((2*p1 + p0 + q1 + 2)>>2); + } + + if (ChromaFlag==0 && aq>2)+2)) + { + pQ0[ 0*Step] = (OMX_U8)((q2 + 2*q1 + 2*q0 + 2*p0 + p1 + 4)>>3); + pQ0[ 1*Step] = (OMX_U8)((q2 + q1 + p0 + q0 + 2)>>2); + pQ0[ 2*Step] = (OMX_U8)((2*q3 + 3*q2 + q1 + q0 + p0 + 4)>>3); + } + else + { + pQ0[ 0*Step] = (OMX_U8)((2*q1 + q0 + p1 + 2)>>2); + } + } + + DEBUG_PRINTF_13("DeBlockPixel: %02x %02x %02x %02x | %02x %02x %02x %02x bS=%d -> %02x %02x %02x %02x\n", + p3, p2, p1, p0, q0, q1, q2, q3, bS, + pQ0[-2*Step], pQ0[-1*Step],pQ0[0*Step],pQ0[1*Step]); + +} diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair.c new file mode 100644 index 0000000000000000000000000000000000000000..7616add02dfd6ffafd28835823b3d9ed024ec294 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair.c @@ -0,0 +1,267 @@ +/* ---------------------------------------------------------------- + * + * + * File Name: armVCM4P10_DecodeCoeffsToPair.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * H.264 decode coefficients module + * + */ + +#ifdef DEBUG_ARMVCM4P10_DECODECOEFFSTOPAIR +#undef DEBUG_ON +#define DEBUG_ON +#endif + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armCOMM_Bitstream.h" +#include "armVCM4P10_CAVLCTables.h" + +/* 4x4 DeZigZag table */ + +static const OMX_U8 armVCM4P10_ZigZag[16] = +{ + 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 +}; + +/* + * Description: + * This function perform the work required by the OpenMAX + * DecodeCoeffsToPair function and DecodeChromaDCCoeffsToPair. + * Since most of the code is common we share it here. + * + * Parameters: + * [in] ppBitStream Double pointer to current byte in bit stream buffer + * [in] pOffset Pointer to current bit position in the byte pointed + * to by *ppBitStream + * [in] sMaxNumCoeff Maximum number of non-zero coefficients in current + * block (4,15 or 16) + * [in] nTable Table number (0 to 4) according to the five columns + * of Table 9-5 in the H.264 spec + * [out] ppBitStream *ppBitStream is updated after each block is decoded + * [out] pOffset *pOffset is updated after each block is decoded + * [out] pNumCoeff Pointer to the number of nonzero coefficients in + * this block + * [out] ppPosCoefbuf Double pointer to destination residual + * coefficient-position pair buffer + * Return Value: + * Standard omxError result. See enumeration for possible result codes. + + */ + +OMXResult armVCM4P10_DecodeCoeffsToPair( + const OMX_U8** ppBitStream, + OMX_S32* pOffset, + OMX_U8* pNumCoeff, + OMX_U8 **ppPosCoefbuf, + OMX_INT nTable, + OMX_INT sMaxNumCoeff + ) +{ + int CoeffToken, TotalCoeff, TrailingOnes; + int Level, LevelCode, LevelPrefix, LevelSuffix, LevelSuffixSize; + int SuffixLength, Run, ZerosLeft,CoeffNum; + int i, Flags; + OMX_U8 *pPosCoefbuf = *ppPosCoefbuf; + OMX_S16 pLevel[16]; + OMX_U8 pRun[16]; + + CoeffToken = armUnPackVLC32(ppBitStream, pOffset, armVCM4P10_CAVLCCoeffTokenTables[nTable]); + armRetDataErrIf(CoeffToken == ARM_NO_CODEBOOK_INDEX, OMX_Sts_Err); + + TrailingOnes = armVCM4P10_CAVLCTrailingOnes[CoeffToken]; + TotalCoeff = armVCM4P10_CAVLCTotalCoeff[CoeffToken]; + *pNumCoeff = (OMX_U8)TotalCoeff; + + DEBUG_PRINTF_2("TotalCoeff = %d, TrailingOnes = %d\n", TotalCoeff, TrailingOnes); + + if (TotalCoeff == 0) + { + /* Nothing to do */ + return OMX_Sts_NoErr; + } + + /* Decode trailing ones */ + for (i=TotalCoeff-1; i>=TotalCoeff-TrailingOnes; i--) + { + if (armGetBits(ppBitStream, pOffset, 1)) + { + Level = -1; + } + else + { + Level = +1; + } + pLevel[i] = (OMX_S16)Level; + + DEBUG_PRINTF_2("Level[%d] = %d\n", i, pLevel[i]); + } + + /* Decode (non zero) level values */ + SuffixLength = 0; + if (TotalCoeff>10 && TrailingOnes<3) + { + SuffixLength=1; + } + for ( ; i>=0; i--) + { + LevelPrefix = armUnPackVLC32(ppBitStream, pOffset, armVCM4P10_CAVLCLevelPrefix); + armRetDataErrIf(LevelPrefix == ARM_NO_CODEBOOK_INDEX, OMX_Sts_Err); + + LevelSuffixSize = SuffixLength; + if (LevelPrefix==14 && SuffixLength==0) + { + LevelSuffixSize = 4; + } + if (LevelPrefix==15) + { + LevelSuffixSize = 12; + } + + LevelSuffix = 0; + if (LevelSuffixSize > 0) + { + LevelSuffix = armGetBits(ppBitStream, pOffset, LevelSuffixSize); + } + + LevelCode = (LevelPrefix << SuffixLength) + LevelSuffix; + + + if (LevelPrefix==15 && SuffixLength==0) + { + LevelCode += 15; + } + + /* LevelCode = 2*(magnitude-1) + sign */ + + if (i==TotalCoeff-1-TrailingOnes && TrailingOnes<3) + { + /* Level magnitude can't be 1 */ + LevelCode += 2; + } + if (LevelCode & 1) + { + /* 2a+1 maps to -a-1 */ + Level = (-LevelCode-1)>>1; + } + else + { + /* 2a+0 maps to +a+1 */ + Level = (LevelCode+2)>>1; + } + pLevel[i] = (OMX_S16)Level; + + DEBUG_PRINTF_2("Level[%d] = %d\n", i, pLevel[i]); + + if (SuffixLength==0) + { + SuffixLength=1; + } + if ( ((LevelCode>>1)+1)>(3<<(SuffixLength-1)) && SuffixLength<6 ) + { + SuffixLength++; + } + } + + /* Decode run values */ + ZerosLeft = 0; + if (TotalCoeff < sMaxNumCoeff) + { + /* Decode TotalZeros VLC */ + if (sMaxNumCoeff==4) + { + ZerosLeft = armUnPackVLC32(ppBitStream, pOffset, armVCM4P10_CAVLCTotalZeros2x2Tables[TotalCoeff-1]); + armRetDataErrIf(ZerosLeft ==ARM_NO_CODEBOOK_INDEX , OMX_Sts_Err); + } + else + { + ZerosLeft = armUnPackVLC32(ppBitStream, pOffset, armVCM4P10_CAVLCTotalZeroTables[TotalCoeff-1]); + armRetDataErrIf(ZerosLeft ==ARM_NO_CODEBOOK_INDEX , OMX_Sts_Err); + } + } + + DEBUG_PRINTF_1("TotalZeros = %d\n", ZerosLeft); + + CoeffNum=ZerosLeft+TotalCoeff-1; + + for (i=TotalCoeff-1; i>0; i--) + { + Run = 0; + if (ZerosLeft > 0) + { + int Table = ZerosLeft; + if (Table > 6) + { + Table = 7; + } + Run = armUnPackVLC32(ppBitStream, pOffset, armVCM4P10_CAVLCRunBeforeTables[Table-1]); + armRetDataErrIf(Run == ARM_NO_CODEBOOK_INDEX, OMX_Sts_Err); + } + pRun[i] = (OMX_U8)Run; + + DEBUG_PRINTF_2("Run[%d] = %d\n", i, pRun[i]); + + ZerosLeft -= Run; + } + pRun[0] = (OMX_U8)ZerosLeft; + + DEBUG_PRINTF_1("Run[0] = %d\n", pRun[i]); + + + /* Fill in coefficients */ + + if (sMaxNumCoeff==15) + { + CoeffNum++; /* Skip the DC position */ + } + + /*for (i=0;i=0; i--) + { + /*CoeffNum += pRun[i]+1;*/ + Level = pLevel[i]; + + DEBUG_PRINTF_2("Coef[%d] = %d\n", CoeffNum, Level); + + Flags = CoeffNum; + CoeffNum -= (pRun[i]+1); + if (sMaxNumCoeff>4) + { + /* Perform 4x4 DeZigZag */ + Flags = armVCM4P10_ZigZag[Flags]; + } + if (i==0) + { + /* End of block flag */ + Flags += 0x20; + } + if (Level<-128 || Level>127) + { + /* Overflow flag */ + Flags += 0x10; + } + + *pPosCoefbuf++ = (OMX_U8)(Flags); + *pPosCoefbuf++ = (OMX_U8)(Level & 0xFF); + if (Flags & 0x10) + { + *pPosCoefbuf++ = (OMX_U8)(Level>>8); + } + } + + *ppPosCoefbuf = pPosCoefbuf; + + return OMX_Sts_NoErr; +} diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_DequantTables.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_DequantTables.c new file mode 100644 index 0000000000000000000000000000000000000000..d9c2541e5eaaef5a4658459b4c6cb1f5a980454c --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_DequantTables.c @@ -0,0 +1,45 @@ +/* ---------------------------------------------------------------- + * + * + * File Name: armVCM4P10_DequantTables.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * H.264 inverse quantize tables + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" +#include "armVC.h" + + +const OMX_U8 armVCM4P10_PosToVCol4x4[16] = +{ + 0, 2, 0, 2, + 2, 1, 2, 1, + 0, 2, 0, 2, + 2, 1, 2, 1 +}; + +const OMX_U8 armVCM4P10_PosToVCol2x2[4] = +{ + 0, 2, + 2, 1 +}; + +const OMX_U8 armVCM4P10_VMatrix[6][3] = +{ + { 10, 16, 13 }, + { 11, 18, 14 }, + { 13, 20, 16 }, + { 14, 23, 18 }, + { 16, 25, 20 }, + { 18, 29, 23 } +}; diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_FwdTransformResidual4x4.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_FwdTransformResidual4x4.c new file mode 100644 index 0000000000000000000000000000000000000000..93d54c3db124d7cc4a1e46356556497c680686d7 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_FwdTransformResidual4x4.c @@ -0,0 +1,78 @@ +/* ---------------------------------------------------------------- + * + * + * File Name: armVCM4P10_FwdTransformResidual4x4.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * H.264 transform module + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armVC.h" + +/* + * Description: + * Forward Transform Residual 4x4 Coefficients + * + * Parameters: + * [in] pSrc Source 4x4 block + * [out] pDst Destination 4x4 block + * + */ +void armVCM4P10_FwdTransformResidual4x4(OMX_S16* pDst, OMX_S16 *pSrc) +{ + int i; + + /* Transform rows */ + for (i=0; i<16; i+=4) + { + int d0 = pSrc[i+0]; + int d1 = pSrc[i+1]; + int d2 = pSrc[i+2]; + int d3 = pSrc[i+3]; + int e0 = d0 + d3; + int e1 = d0 - d3; + int e2 = d1 + d2; + int e3 = d1 - d2; + int f0 = e0 + e2; + int f1 = (e1 << 1) + e3; + int f2 = e0 - e2; + int f3 = e1 - (e3 << 1); + pDst[i+0] = (OMX_S16)f0; + pDst[i+1] = (OMX_S16)f1; + pDst[i+2] = (OMX_S16)f2; + pDst[i+3] = (OMX_S16)f3; + } + + /* Transform columns */ + for (i=0; i<4; i++) + { + int f0 = pDst[i+0]; + int f1 = pDst[i+4]; + int f2 = pDst[i+8]; + int f3 = pDst[i+12]; + int g0 = f0 + f3; + int g1 = f0 - f3; + int g2 = f1 + f2; + int g3 = f1 - f2; + int h0 = g0 + g2; + int h1 = (g1 << 1) + g3; + int h2 = g0 - g2; + int h3 = g1 - (g3 << 1); + pDst[i+0] = (OMX_S16) h0; + pDst[i+4] = (OMX_S16) h1; + pDst[i+8] = (OMX_S16) h2; + pDst[i+12] = (OMX_S16) h3; + } +} diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_InterpolateHalfDiag_Luma.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_InterpolateHalfDiag_Luma.c new file mode 100644 index 0000000000000000000000000000000000000000..8732f4fb62b1da93e987253b6bdd6c05d4d7f5af --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_InterpolateHalfDiag_Luma.c @@ -0,0 +1,106 @@ +/** + * + * File Name: armVCM4P10_InterpolateHalfDiag_Luma.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * Description: + * This functions will help to calculate Half Pel luma interpolation + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armVC.h" + + +/** + * Function: armVCM4P10_InterpolateHalfDiag_Luma + * + * Description: + * This function performs interpolation for (1/2, 1/2) positions + * around a full-pel position. + * + * Remarks: + * + * [in] pSrc Pointer to top-left corner of block used to interpolate + * in the reconstructed frame plane + * [in] iSrcStep Step of the source buffer. + * [in] iDstStep Step of the destination(interpolation) buffer. + * [in] iWidth Width of the current block + * [in] iHeight Height of the current block + * [out] pDst Pointer to the interpolation buffer of the (1/2,1/2)-pel + * + * Return Value: + * Standard OMXResult value. + * + */ + +OMXResult armVCM4P10_InterpolateHalfDiag_Luma( + const OMX_U8* pSrc, + OMX_U32 iSrcStep, + OMX_U8* pDst, + OMX_U32 iDstStep, + OMX_U32 iWidth, + OMX_U32 iHeight +) +{ + OMX_S32 HalfCoeff, pos; + OMX_S16 Buf [21 * 16]; /* 21 rows by 16 pixels per row */ + OMX_U32 y, x; + + /* check for argument error */ + armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr) + + /* + * Intermediate values will be 1/2 pel at Horizontal direction + * Starting at (0.5, -2) at top extending to (0.5, height + 3) at bottom + * Buf contains a 2D array of size (iWidth)X(iHeight + 5) + */ + for (y = 0; y < iHeight + 5; y++) + { + for (x = 0; x < iWidth; x++) + { + pos = (y-2) * iSrcStep + x; + HalfCoeff = + pSrc [pos - 2] - + 5 * pSrc [pos - 1] + + 20 * pSrc [pos] + + 20 * pSrc [pos + 1] - + 5 * pSrc [pos + 2] + + pSrc [pos + 3]; + Buf [y * iWidth + x] = (OMX_S16)HalfCoeff; + } /* x */ + } /* y */ + + /* Vertical interpolate */ + for (y = 0; y < iHeight; y++) + { + for (x = 0; x < iWidth; x++) + { + pos = y * iWidth + x; + HalfCoeff = + Buf [pos] - + 5 * Buf [pos + 1 * iWidth] + + 20 * Buf [pos + 2 * iWidth] + + 20 * Buf [pos + 3 * iWidth] - + 5 * Buf [pos + 4 * iWidth] + + Buf [pos + 5 * iWidth]; + + HalfCoeff = (HalfCoeff + 512) >> 10; + HalfCoeff = armClip(0, 255, HalfCoeff); + + pDst [y * iDstStep + x] = (OMX_U8) HalfCoeff; + } + } + + return OMX_Sts_NoErr; +} diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_InterpolateHalfHor_Luma.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_InterpolateHalfHor_Luma.c new file mode 100644 index 0000000000000000000000000000000000000000..89c00798aa072fc95d9b0a857205018a08d5b011 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_InterpolateHalfHor_Luma.c @@ -0,0 +1,82 @@ +/** + * + * File Name: armVCM4P10_InterpolateHalfHor_Luma.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * Description: + * This functions will help to calculate Half Pel luma interpolation + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armVC.h" + +/** + * Function: armVCM4P10_InterpolateHalfHor_Luma + * + * Description: + * This function performs interpolation for horizontal 1/2-pel positions + * + * Remarks: + * + * [in] pSrc Pointer to top-left corner of block used to interpolate + * in the reconstructed frame plane + * [in] iSrcStep Step of the source buffer. + * [in] iDstStep Step of the destination(interpolation) buffer. + * [in] iWidth Width of the current block + * [in] iHeight Height of the current block + * [out] pDst Pointer to the interpolation buffer of the 1/2-pel + * + * Return Value: + * Standard OMXResult value. + * + */ + +OMXResult armVCM4P10_InterpolateHalfHor_Luma( + const OMX_U8* pSrc, + OMX_U32 iSrcStep, + OMX_U8* pDst, + OMX_U32 iDstStep, + OMX_U32 iWidth, + OMX_U32 iHeight +) +{ + OMX_INT x, y; + OMX_S32 HalfCoeff, pos; + + /* check for argument error */ + armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr) + + for (y = 0; y < iHeight; y++) + { + for (x = 0; x < iWidth; x++) + { + pos = y * iSrcStep + x; + HalfCoeff = + pSrc [pos - 2] - + 5 * pSrc [pos - 1] + + 20 * pSrc [pos] + + 20 * pSrc [pos + 1] - + 5 * pSrc [pos + 2] + + pSrc [pos + 3]; + + HalfCoeff = (HalfCoeff + 16) >> 5; + HalfCoeff = armClip(0, 255, HalfCoeff); + + pDst [y * iDstStep + x] = HalfCoeff; + } /* x */ + } /* y */ + + return OMX_Sts_NoErr; +} + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_InterpolateHalfVer_Luma.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_InterpolateHalfVer_Luma.c new file mode 100644 index 0000000000000000000000000000000000000000..f7ecfc5e9684a9d3600a41896e130b2505b5d153 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_InterpolateHalfVer_Luma.c @@ -0,0 +1,84 @@ +/** + * + * File Name: armVCM4P10_InterpolateHalfVer_Luma.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * Description: + * This functions will help to calculate Half Pel luma interpolation + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armVC.h" + +/** + * Function: armVCM4P10_InterpolateHalfVer_Luma + * + * Description: + * This function performs interpolation for vertical 1/2-pel positions + * around a full-pel position. + * + * Remarks: + * + * [in] pSrc Pointer to top-left corner of block used to interpolate + * in the reconstructed frame plane + * [in] iSrcStep Step of the source buffer. + * [in] iDstStep Step of the destination(interpolation) buffer. + * [in] iWidth Width of the current block + * [in] iHeight Height of the current block + * [out] pDst Pointer to the interpolation buffer of the 1/2-pel + * + * Return Value: + * Standard OMXResult value. + * + */ + +OMXResult armVCM4P10_InterpolateHalfVer_Luma( + const OMX_U8* pSrc, + OMX_U32 iSrcStep, + OMX_U8* pDst, + OMX_U32 iDstStep, + OMX_U32 iWidth, + OMX_U32 iHeight +) +{ + OMX_S32 HalfCoeff, pos; + OMX_INT y, x; + + /* check for argument error */ + armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr) + + + for (y = 0; y < iHeight; y++) + { + for (x = 0; x < iWidth; x++) + { + pos = y * iSrcStep + x; + HalfCoeff = + pSrc [pos - 2 * iSrcStep] - + 5 * pSrc [pos - 1 * iSrcStep] + + 20 * pSrc [pos] + + 20 * pSrc [pos + 1 * iSrcStep] - + 5 * pSrc [pos + 2 * iSrcStep] + + pSrc [pos + 3 * iSrcStep]; + + HalfCoeff = (HalfCoeff + 16) >> 5; + HalfCoeff = armClip(0, 255, HalfCoeff); + + pDst [y * iDstStep + x] = (OMX_U8) HalfCoeff; + } + } + + return OMX_Sts_NoErr; +} + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_Interpolate_Chroma.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_Interpolate_Chroma.c new file mode 100644 index 0000000000000000000000000000000000000000..1507d2319b4958c4a1cf2ce8a1cbc32dec6a0e81 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_Interpolate_Chroma.c @@ -0,0 +1,109 @@ +/** + * + * File Name: armVCM4P10_Interpolate_Chroma.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * Description: + * This function will calculate interpolation for chroma components + * + */ + +#include "omxtypes.h" +#include "armOMX.h" + +#include "armCOMM.h" + +/** + * Function: armVCM4P10_Interpolate_Chroma + * + * Description: + * This function performs interpolation for chroma components. + * + * Remarks: + * + * [in] pSrc Pointer to top-left corner of block used to + * interpolate in the reconstructed frame plane + * [in] iSrcStep Step of the source buffer. + * [in] iDstStep Step of the destination(interpolation) buffer. + * [in] iWidth Width of the current block + * [in] iHeight Height of the current block + * [in] dx Fractional part of horizontal motion vector + * component in 1/8 pixel unit (0~7) + * [in] dy Fractional part of vertical motion vector + * component in 1/8 pixel unit (0~7) + * [out] pDst Pointer to the interpolation buffer + * + * Return Value: + * Standard OMXResult value. + * + */ + OMXResult armVCM4P10_Interpolate_Chroma( + OMX_U8 *pSrc, + OMX_U32 iSrcStep, + OMX_U8 *pDst, + OMX_U32 iDstStep, + OMX_U32 iWidth, + OMX_U32 iHeight, + OMX_U32 dx, + OMX_U32 dy +) +{ + OMX_U32 EightMinusdx = 8 - dx; + OMX_U32 EightMinusdy = 8 - dy; + OMX_U32 ACoeff, BCoeff, CCoeff, DCoeff; + OMX_U32 x, y; + + /* check for argument error */ + armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(dx > 7, OMX_Sts_BadArgErr) + armRetArgErrIf(dy > 7, OMX_Sts_BadArgErr) + armRetArgErrIf(iSrcStep == 0, OMX_Sts_BadArgErr) + armRetArgErrIf(iDstStep == 0, OMX_Sts_BadArgErr) + armRetArgErrIf(iWidth == 0, OMX_Sts_BadArgErr) + armRetArgErrIf(iHeight == 0, OMX_Sts_BadArgErr) + + /* if fractionl mv is not (0, 0) */ + if (dx != 0 || dy != 0) + { + ACoeff = EightMinusdx * EightMinusdy; + BCoeff = dx * EightMinusdy; + CCoeff = EightMinusdx * dy; + DCoeff = dx * dy; + + for (y = 0; y < iHeight; y++) + { + for (x = 0; x < iWidth; x++) + { + pDst [y * iDstStep + x] = ( + ACoeff * pSrc [y * iSrcStep + x] + + BCoeff * pSrc [y * iSrcStep + x + 1] + + CCoeff * pSrc [(y + 1) * iSrcStep + x] + + DCoeff * pSrc [(y + 1) * iSrcStep + x + 1] + + 32) >> 6; + } + } + } + else + { + for (y = 0; y < iHeight; y++) + { + for (x = 0; x < iWidth; x++) + { + pDst [y * iDstStep + x] = pSrc [y * iSrcStep + x]; + } + } + } + + return OMX_Sts_NoErr; +} + +/***************************************************************************** + * END OF FILE + *****************************************************************************/ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_Interpolate_Luma.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_Interpolate_Luma.c new file mode 100644 index 0000000000000000000000000000000000000000..89978dd5c3e85a43b5efaadc116ced31230a6b72 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_Interpolate_Luma.c @@ -0,0 +1,195 @@ +/** + * + * File Name: armVCM4P10_Interpolate_Luma.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * Description: + * This function will calculate interpolation for luma components + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armVC.h" + +/** + * Function: armM4P10_Copy + * + * Description: + * This function performs copy a block of data from source to destination + * + * Remarks: + * + * [in] pSrc Pointer to top-left corner of block + * [in] iSrcStep Step of the source buffer. + * [in] iDstStep Step of the destination buffer. + * [in] iWidth Width of the current block + * [in] iHeight Height of the current block + * [out] pDst Pointer to the interpolation buffer + * + * Return Value: + * Standard OMXResult value. + * + */ +static OMXResult armM4P10_Copy( + const OMX_U8* pSrc, + OMX_U32 iSrcStep, + OMX_U8* pDst, + OMX_U32 iDstStep, + OMX_U32 iWidth, + OMX_U32 iHeight +) +{ + OMX_U32 x, y; + + for (y = 0; y < iHeight; y++) + { + for (x = 0; x < iWidth; x++) + { + pDst [y * iDstStep + x] = pSrc [y * iSrcStep + x]; + } + } + + return OMX_Sts_NoErr; +} + +/** + * Function: armVCM4P10_Interpolate_Luma + * + * Description: + * This function performs interpolation for luma components. + * + * Remarks: + * + * [in] pSrc Pointer to top-left corner of block used to + * interpolate in the reconstructed frame plane + * [in] iSrcStep Step of the source buffer. + * [in] iDstStep Step of the destination(interpolation) buffer. + * [in] iWidth Width of the current block + * [in] iHeight Height of the current block + * [in] dx Fractional part of horizontal motion vector + * component in 1/4 pixel unit (0~3) + * [in] dy Fractional part of vertical motion vector + * component in 1/4 pixel unit (0~3) + * [out] pDst Pointer to the interpolation buffer + * + * Return Value: + * Standard OMXResult value. + * + */ + + OMXResult armVCM4P10_Interpolate_Luma( + const OMX_U8 *pSrc, + OMX_U32 iSrcStep, + OMX_U8 *pDst, + OMX_U32 iDstStep, + OMX_U32 iWidth, + OMX_U32 iHeight, + OMX_U32 dx, + OMX_U32 dy +) +{ + OMX_U8 pBuf1 [16*16]; + const OMX_U8 *pSrcHalfHor = pSrc; + const OMX_U8 *pSrcHalfVer = pSrc; + + /* check for argument error */ + armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(dx > 3, OMX_Sts_BadArgErr) + armRetArgErrIf(dy > 3, OMX_Sts_BadArgErr) + + /* Work out positions for half pixel interpolation */ + if (dx == 3) + { + pSrcHalfVer += 1; + } + if (dy == 3) + { + pSrcHalfHor += iSrcStep; + } + + /* Switch on type of pixel + * Pixels are named 'a' to 's' as in the H.264 standard + */ + if (dx == 0 && dy == 0) + { + /* G */ + armM4P10_Copy(pSrc, iSrcStep, pDst, iDstStep, iWidth, iHeight); + } + else if (dy == 0) + { + /* a, b, c */ + armVCM4P10_InterpolateHalfHor_Luma + (pSrcHalfHor, iSrcStep, pDst, iDstStep, iWidth, iHeight); + + if (dx == 1 || dx == 3) + { + armVCCOMM_Average + (pDst, pSrcHalfVer, iDstStep, iSrcStep, pDst, iDstStep, iWidth, iHeight); + } + } + else if (dx == 0) + { + /* d, h, n */ + armVCM4P10_InterpolateHalfVer_Luma + (pSrcHalfVer, iSrcStep, pDst, iDstStep, iWidth, iHeight); + + if (dy == 1 || dy == 3) + { + armVCCOMM_Average + (pDst, pSrcHalfHor, iDstStep, iSrcStep, pDst, iDstStep, iWidth, iHeight); + } + } + else if (dx == 2 || dy == 2) + { + /* j */ + armVCM4P10_InterpolateHalfDiag_Luma + (pSrc, iSrcStep, pDst, iDstStep, iWidth, iHeight); + + if (dx == 1 || dx == 3) + { + /* i, k */ + armVCM4P10_InterpolateHalfVer_Luma + (pSrcHalfVer, iSrcStep, pBuf1, iWidth, iWidth, iHeight); + + armVCCOMM_Average + (pDst, pBuf1, iDstStep, iWidth, pDst, iDstStep, iWidth, iHeight); + } + if (dy == 1 || dy == 3) + { + /* f,q */ + armVCM4P10_InterpolateHalfHor_Luma + (pSrcHalfHor, iSrcStep, pBuf1, iWidth, iWidth, iHeight); + + armVCCOMM_Average + (pDst, pBuf1, iDstStep, iWidth, pDst, iDstStep, iWidth, iHeight); + } + } + else /* dx=1,3 and dy=1,3 */ + { + /* e, g, p, r */ + armVCM4P10_InterpolateHalfHor_Luma + (pSrcHalfHor, iSrcStep, pBuf1, iWidth, iWidth, iHeight); + + armVCM4P10_InterpolateHalfVer_Luma + (pSrcHalfVer, iSrcStep, pDst, iDstStep, iWidth, iHeight); + + armVCCOMM_Average + (pBuf1, pDst, iWidth, iDstStep, pDst, iDstStep, iWidth, iHeight); + } + + return OMX_Sts_NoErr; +} + +/***************************************************************************** + * END OF FILE + *****************************************************************************/ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_PredictIntraDC4x4.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_PredictIntraDC4x4.c new file mode 100644 index 0000000000000000000000000000000000000000..b713073d16a1f2a07f0651c059f4041814849e18 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_PredictIntraDC4x4.c @@ -0,0 +1,88 @@ +/* ---------------------------------------------------------------- + * + * + * File Name: armVCM4P10_PredictIntraDC4x4.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * H.264 4x4 intra prediction module + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armVC.h" + +/* + * Description: + * Perform DC style intra prediction, averaging upper and left block + * + * Parameters: + * [in] pSrcLeft Pointer to the buffer of 16 left coefficients: + * p[x, y] (x = -1, y = 0..3) + * [in] pSrcAbove Pointer to the buffer of 16 above coefficients: + * p[x,y] (x = 0..3, y = -1) + * [in] leftStep Step of left coefficient buffer + * [in] dstStep Step of the destination buffer + * [in] availability Neighboring 16x16 MB availability flag + * [out] pDst Pointer to the destination buffer + * + * Return Value: + * None + */ + +void armVCM4P10_PredictIntraDC4x4( + const OMX_U8* pSrcLeft, + const OMX_U8 *pSrcAbove, + OMX_U8* pDst, + OMX_INT leftStep, + OMX_INT dstStep, + OMX_S32 availability +) +{ + int x, y, Sum=0, Count = 0; + + if (availability & OMX_VC_LEFT) + { + for (y=0; y<4; y++) + { + Sum += pSrcLeft[y*leftStep]; + } + Count++; + } + if (availability & OMX_VC_UPPER) + { + for (x=0; x<4; x++) + { + Sum += pSrcAbove[x]; + } + Count++; + } + if (Count==0) + { + Sum = 128; + } + else if (Count==1) + { + Sum = (Sum + 2) >> 2; + } + else /* Count = 2 */ + { + Sum = (Sum + 4) >> 3; + } + for (y=0; y<4; y++) + { + for (x=0; x<4; x++) + { + pDst[y*dstStep+x] = (OMX_U8)Sum; + } + } +} diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_QuantTables.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_QuantTables.c new file mode 100644 index 0000000000000000000000000000000000000000..f0b5bb0c9023d4eba46a144701bcfe27889fc8a6 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_QuantTables.c @@ -0,0 +1,31 @@ +/* ---------------------------------------------------------------- + * + * + * File Name: armVCM4P10_QuantTables.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * H.264 inverse quantize tables + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armVC.h" + +const OMX_U32 armVCM4P10_MFMatrix[6][3] = +{ + {13107, 5243, 8066}, + {11916, 4660, 7490}, + {10082, 4194, 6554}, + { 9362, 3647, 5825}, + { 8192, 3355, 5243}, + { 7282, 2893, 4559} +}; diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_SADQuar.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_SADQuar.c new file mode 100644 index 0000000000000000000000000000000000000000..a41e04bddca5c88fef5817e785acff966e6a5b08 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_SADQuar.c @@ -0,0 +1,84 @@ +/** + * + * File Name: armVCM4P10_SADQuar.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * Description: + * This function will calculate SAD of pSrc with average of two Ref blocks + * + */ + +#include "omxtypes.h" +#include "armOMX.h" + +#include "armVC.h" +#include "armCOMM.h" + +/** + * Function: armVCM4P10_SADQuar + * + * Description: + * This function calculates the SAD between one block (pSrc) and the + * average of the other two (pSrcRef0 and pSrcRef1) + * + * Remarks: + * + * [in] pSrc Pointer to the original block + * [in] pSrcRef0 Pointer to reference block 0 + * [in] pSrcRef1 Pointer to reference block 1 + * [in] iSrcStep Step of the original block buffer + * [in] iRefStep0 Step of reference block 0 + * [in] iRefStep1 Step of reference block 1 + * [in] iHeight Height of the block + * [in] iWidth Width of the block + * [out] pDstSAD Pointer of result SAD + * + * Return Value: + * Standard OMXResult value. + * + */ +OMXResult armVCM4P10_SADQuar( + const OMX_U8* pSrc, + const OMX_U8* pSrcRef0, + const OMX_U8* pSrcRef1, + OMX_U32 iSrcStep, + OMX_U32 iRefStep0, + OMX_U32 iRefStep1, + OMX_U32* pDstSAD, + OMX_U32 iHeight, + OMX_U32 iWidth +) +{ + OMX_INT x, y; + OMX_S32 SAD = 0; + + /* check for argument error */ + armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(pSrcRef0 == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(pSrcRef1 == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(pDstSAD == NULL, OMX_Sts_BadArgErr) + + for (y = 0; y < iHeight; y++) + { + for (x = 0; x < iWidth; x++) + { + SAD += armAbs(pSrc [y * iSrcStep + x] - (( + pSrcRef0 [y * iRefStep0 + x] + + pSrcRef1 [y * iRefStep1 + x] + 1) >> 1)); + } + } + + *pDstSAD = SAD; + + return OMX_Sts_NoErr; +} + +/***************************************************************************** + * END OF FILE + *****************************************************************************/ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_TransformResidual4x4.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_TransformResidual4x4.c new file mode 100644 index 0000000000000000000000000000000000000000..f9f756a6f27bd58709ad5f70b0ac1b1cadbd8f0b --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_TransformResidual4x4.c @@ -0,0 +1,80 @@ +/* ---------------------------------------------------------------- + * + * + * File Name: armVCM4P10_TransformResidual4x4.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * H.264 transform module + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armVC.h" + +/* + * Description: + * Transform Residual 4x4 Coefficients + * + * Parameters: + * [in] pSrc Source 4x4 block + * [out] pDst Destination 4x4 block + * + */ + +void armVCM4P10_TransformResidual4x4(OMX_S16* pDst, OMX_S16 *pSrc) +{ + int i; + + /* Transform rows */ + for (i=0; i<16; i+=4) + { + int d0 = pSrc[i+0]; + int d1 = pSrc[i+1]; + int d2 = pSrc[i+2]; + int d3 = pSrc[i+3]; + int e0 = d0 + d2; + int e1 = d0 - d2; + int e2 = (d1>>1) - d3; + int e3 = d1 + (d3>>1); + int f0 = e0 + e3; + int f1 = e1 + e2; + int f2 = e1 - e2; + int f3 = e0 - e3; + pDst[i+0] = (OMX_S16)f0; + pDst[i+1] = (OMX_S16)f1; + pDst[i+2] = (OMX_S16)f2; + pDst[i+3] = (OMX_S16)f3; + } + + /* Transform columns */ + for (i=0; i<4; i++) + { + int f0 = pDst[i+0]; + int f1 = pDst[i+4]; + int f2 = pDst[i+8]; + int f3 = pDst[i+12]; + int g0 = f0 + f2; + int g1 = f0 - f2; + int g2 = (f1>>1) - f3; + int g3 = f1 + (f3>>1); + int h0 = g0 + g3; + int h1 = g1 + g2; + int h2 = g1 - g2; + int h3 = g0 - g3; + pDst[i+0] = (OMX_S16)((h0+32)>>6); + pDst[i+4] = (OMX_S16)((h1+32)>>6); + pDst[i+8] = (OMX_S16)((h2+32)>>6); + pDst[i+12] = (OMX_S16)((h3+32)>>6); + } +} + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_UnpackBlock2x2.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_UnpackBlock2x2.c new file mode 100644 index 0000000000000000000000000000000000000000..dda49f67d404a75dc923e769b631c18853b9cd7b --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_UnpackBlock2x2.c @@ -0,0 +1,78 @@ +/* ---------------------------------------------------------------- + * + * + * File Name: armVCM4P10_UnpackBlock2x2.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * H.264 inverse quantize and transform helper module + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armVC.h" + +/* + * Description + * Unpack a 2x2 block of coefficient-residual pair values + * + * Parameters: + * [in] ppSrc Double pointer to residual coefficient-position pair + * buffer output by CALVC decoding + * [out] ppSrc *ppSrc is updated to the start of next non empty block + * [out] pDst Pointer to unpacked 4x4 block + */ + +void armVCM4P10_UnpackBlock2x2( + const OMX_U8 **ppSrc, + OMX_S16* pDst +) +{ + const OMX_U8 *pSrc = *ppSrc; + int i; + int Flag, Value; + + for (i=0; i<4; i++) + { + pDst[i] = 0; + } + + do + { + Flag = *pSrc++; + if (Flag & 0x10) + { + /* 16 bit */ + Value = *pSrc++; + Value = Value | ((*pSrc++)<<8); + if (Value & 0x8000) + { + Value -= 0x10000; + } + } + else + { + /* 8 bit */ + Value = *pSrc++; + if (Value & 0x80) + { + Value -= 0x100; + } + } + i = Flag & 15; + pDst[i] = (OMX_S16)Value; + } + while ((Flag & 0x20)==0); + + *ppSrc = pSrc; +} + +/* End of file */ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_UnpackBlock4x4.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_UnpackBlock4x4.c new file mode 100644 index 0000000000000000000000000000000000000000..3c0dcbd1baafc1f848e78b87939176779faf6943 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_UnpackBlock4x4.c @@ -0,0 +1,78 @@ +/* ---------------------------------------------------------------- + * + * + * File Name: armVCM4P10_UnpackBlock4x4.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * H.264 inverse quantize and transform helper module + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armVC.h" + +/* + * Description + * Unpack a 4x4 block of coefficient-residual pair values + * + * Parameters: + * [in] ppSrc Double pointer to residual coefficient-position pair + * buffer output by CALVC decoding + * [out] ppSrc *ppSrc is updated to the start of next non empty block + * [out] pDst Pointer to unpacked 4x4 block + */ + +void armVCM4P10_UnpackBlock4x4( + const OMX_U8 **ppSrc, + OMX_S16* pDst +) +{ + const OMX_U8 *pSrc = *ppSrc; + int i; + int Flag, Value; + + for (i=0; i<16; i++) + { + pDst[i] = 0; + } + + do + { + Flag = *pSrc++; + if (Flag & 0x10) + { + /* 16 bit */ + Value = *pSrc++; + Value = Value | ((*pSrc++)<<8); + if (Value & 0x8000) + { + Value -= 0x10000; + } + } + else + { + /* 8 bit */ + Value = *pSrc++; + if (Value & 0x80) + { + Value -= 0x100; + } + } + i = Flag & 15; + pDst[i] = (OMX_S16)Value; + } + while ((Flag & 0x20)==0); + + *ppSrc = pSrc; +} + +/* End of file */ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_Average_4x.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_Average_4x.c new file mode 100644 index 0000000000000000000000000000000000000000..ac0d5235970642e0f413476feda8e7698e78ace5 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_Average_4x.c @@ -0,0 +1,84 @@ +/** + * + * File Name: omxVCM4P10_Average_4x.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * Description: + * This function will calculate Average of two 4x4 or 4x8 blocks + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armVC.h" + +/** + * Function: omxVCM4P10_Average_4x (6.3.5.5.3) + * + * Description: + * This function calculates the average of two 4x4, 4x8 blocks. The result + * is rounded according to (a+b+1)/2. + * + * Input Arguments: + * + * pPred0 - Pointer to the top-left corner of reference block 0 + * pPred1 - Pointer to the top-left corner of reference block 1 + * iPredStep0 - Step of reference block 0; must be a multiple of 4. + * iPredStep1 - Step of reference block 1; must be a multiple of 4. + * iDstStep - Step of the destination buffer; must be a multiple of 4. + * iHeight - Height of the blocks; must be either 4 or 8. + * + * Output Arguments: + * + * pDstPred - Pointer to the destination buffer. 4-byte alignment required. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: + * pPred0, pPred1, or pDstPred + * - pDstPred is not aligned on a 4-byte boundary + * - iPredStep0 <= 0 or iPredStep0 is not a multiple of 4 + * - iPredStep1 <= 0 or iPredStep1 is not a multiple of 4 + * - iDstStep <= 0 or iDstStep is not a multiple of 4 + * - iHeight is not equal to either 4 or 8 + * + */ + OMXResult omxVCM4P10_Average_4x ( + const OMX_U8* pPred0, + const OMX_U8* pPred1, + OMX_U32 iPredStep0, + OMX_U32 iPredStep1, + OMX_U8* pDstPred, + OMX_U32 iDstStep, + OMX_U32 iHeight +) +{ + /* check for argument error */ + armRetArgErrIf(pPred0 == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(pPred1 == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(pDstPred == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf((iHeight != 4) && (iHeight != 8), OMX_Sts_BadArgErr) + armRetArgErrIf((iPredStep0 == 0) || (iPredStep0 & 3), OMX_Sts_BadArgErr) + armRetArgErrIf((iPredStep1 == 0) || (iPredStep1 & 3), OMX_Sts_BadArgErr) + armRetArgErrIf((iDstStep == 0) || (iDstStep & 3), OMX_Sts_BadArgErr) + armRetArgErrIf(armNot4ByteAligned(pDstPred), OMX_Sts_BadArgErr) + + return armVCCOMM_Average + (pPred0, pPred1, iPredStep0, iPredStep1, pDstPred, iDstStep, 4, iHeight); +} + +/***************************************************************************** + * END OF FILE + *****************************************************************************/ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_BlockMatch_Half.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_BlockMatch_Half.c new file mode 100644 index 0000000000000000000000000000000000000000..c490e104e4129763bfd6c3618950fe3d3554df7f --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_BlockMatch_Half.c @@ -0,0 +1,191 @@ +/** + * + * File Name: omxVCM4P10_BlockMatch_Half.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Contains modules for half pel Block matching, + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armVC.h" +#include "armCOMM.h" + + +/** + * Function: omxVCM4P10_BlockMatch_Half (6.3.5.2.2) + * + * Description: + * Performs a half-pel block match using results from a prior integer search. + * Returns the best MV and associated cost. This function estimates the + * half-pixel motion vector by interpolating the integer resolution motion + * vector referenced by the input parameter pSrcDstBestMV, i.e., the initial + * integer MV is generated externally. The function + * omxVCM4P10_BlockMatch_Integer may be used for integer motion estimation. + * + * Input Arguments: + * + * pSrcOrgY - Pointer to the current position in original picture plane. If + * iBlockWidth==4, 4-byte alignment required. If iBlockWidth==8, + * 8-byte alignment required. If iBlockWidth==16, 16-byte alignment + * required. + * pSrcRefY - Pointer to the top-left corner of the co-located block in the + * reference picture If iBlockWidth==4, 4-byte alignment + * required. If iBlockWidth==8, 8-byte alignment required. If + * iBlockWidth==16, 16-byte alignment required. + * nSrcOrgStep - Stride of the original picture plane in terms of full + * pixels; must be a multiple of iBlockWidth. + * nSrcRefStep - Stride of the reference picture plane in terms of full + * pixels + * iBlockWidth - Width of the current block in terms of full pixels; must + * be equal to either 4, 8, or 16. + * iBlockHeight - Height of the current block in terms of full pixels; must + * be equal to either 4, 8, or 16. + * nLamda - Lamda factor, used to compute motion cost + * pMVPred - Predicted MV, represented in terms of 1/4-pel units; used to + * compute motion cost + * pSrcDstBestMV - The best MV resulting from a prior integer search, + * represented in terms of 1/4-pel units + * + * Output Arguments: + * + * pSrcDstBestMV - Best MV resulting from the half-pel search, expressed in + * terms of 1/4-pel units + * pBestCost - Motion cost associated with the best MV; computed as + * SAD+Lamda*BitsUsedByMV + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - any of the following pointers is NULL: pSrcOrgY, pSrcRefY, + * pSrcDstBestMV, pMVPred, pBestCost + * - iBlockWidth or iBlockHeight are equal to values other than 4, 8, or 16. + * - Any alignment restrictions are violated + * + */ + +OMXResult omxVCM4P10_BlockMatch_Half( + const OMX_U8* pSrcOrgY, + OMX_S32 nSrcOrgStep, + const OMX_U8* pSrcRefY, + OMX_S32 nSrcRefStep, + OMX_U8 iBlockWidth, + OMX_U8 iBlockHeight, + OMX_U32 nLamda, + const OMXVCMotionVector* pMVPred, + OMXVCMotionVector* pSrcDstBestMV, + OMX_S32* pBestCost +) +{ + /* Definitions and Initializations*/ + OMX_INT candSAD; + OMX_INT fromX, toX, fromY, toY; + /* Offset to the reference at the begining of the bounding box */ + const OMX_U8 *pTempSrcRefY, *pTempSrcOrgY; + OMX_S16 x, y; + OMXVCMotionVector diffMV, candMV, integerMV; + OMX_U8 interpolY[256]; + + /* Argument error checks */ + armRetArgErrIf((iBlockWidth == 4) && (!armIs4ByteAligned(pSrcOrgY)), OMX_Sts_BadArgErr); + armRetArgErrIf((iBlockWidth == 8) && (!armIs8ByteAligned(pSrcOrgY)), OMX_Sts_BadArgErr); + armRetArgErrIf((iBlockWidth == 16) && (!armIs16ByteAligned(pSrcOrgY)), OMX_Sts_BadArgErr); + armRetArgErrIf((iBlockWidth == 4) && (!armIs4ByteAligned(pSrcRefY)), OMX_Sts_BadArgErr); + armRetArgErrIf((iBlockWidth == 8) && (!armIs8ByteAligned(pSrcRefY)), OMX_Sts_BadArgErr); + armRetArgErrIf((iBlockWidth == 16) && (!armIs16ByteAligned(pSrcRefY)), OMX_Sts_BadArgErr); + armRetArgErrIf((nSrcOrgStep % iBlockWidth), OMX_Sts_BadArgErr); + armRetArgErrIf(pSrcOrgY == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pSrcRefY == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pMVPred == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pSrcDstBestMV == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pBestCost == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(((iBlockWidth!=4)&&(iBlockWidth!=8)&&(iBlockWidth!=16)) , OMX_Sts_BadArgErr); + armRetArgErrIf(((iBlockHeight!=4)&&(iBlockHeight!=8)&&(iBlockHeight!=16)) , OMX_Sts_BadArgErr); + + + /* Check for valid region */ + fromX = 1; + toX = 1; + fromY = 1; + toY = 1; + + /* Initialize to max value as a start point */ + *pBestCost = 0x7fffffff; + + integerMV.dx = pSrcDstBestMV->dx; + integerMV.dy = pSrcDstBestMV->dy; + + /* Looping on y- axis */ + for (y = -fromY; y <= toY; y++) + { + /* Looping on x- axis */ + for (x = -fromX; x <= toX; x++) + { + /* Positioning the pointer */ + pTempSrcRefY = pSrcRefY + (nSrcRefStep * (integerMV.dy/4)) + (integerMV.dx/4); + if (x < 0) + { + pTempSrcRefY = pTempSrcRefY + x; + } + if (y < 0) + { + pTempSrcRefY = pTempSrcRefY + (y * nSrcRefStep); + } + pTempSrcOrgY = pSrcOrgY; + + /* Prepare cand MV */ + candMV.dx = integerMV.dx + x * 2; + candMV.dy = integerMV.dy + y * 2; + + /* Interpolate half pel for the current position*/ + armVCM4P10_Interpolate_Luma( + pTempSrcRefY, + nSrcRefStep, + interpolY, + iBlockWidth, + iBlockWidth, + iBlockHeight, + armAbs(x) * 2, + armAbs(y) * 2); + + /* Calculate the SAD */ + armVCCOMM_SAD( + pTempSrcOrgY, + nSrcOrgStep, + interpolY, + iBlockWidth, + &candSAD, + iBlockHeight, + iBlockWidth); + + diffMV.dx = candMV.dx - pMVPred->dx; + diffMV.dy = candMV.dy - pMVPred->dy; + + /* Result calculations */ + armVCM4P10_CompareMotionCostToMV ( + candMV.dx, + candMV.dy, + diffMV, + candSAD, + pSrcDstBestMV, + nLamda, + pBestCost); + + } /* End of x- axis */ + } /* End of y-axis */ + + return OMX_Sts_NoErr; + +} + +/* End of file */ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_BlockMatch_Integer.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_BlockMatch_Integer.c new file mode 100644 index 0000000000000000000000000000000000000000..f7764e1485bda92a50006ebefe5dbb9b8c4d5046 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_BlockMatch_Integer.c @@ -0,0 +1,196 @@ +/** + * + * File Name: omxVCM4P10_BlockMatch_Integer.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Contains modules for Block matching, a full search algorithm + * is implemented + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armVC.h" +#include "armCOMM.h" + +/** + * Function: omxVCM4P10_BlockMatch_Integer (6.3.5.2.1) + * + * Description: + * Performs integer block match. Returns best MV and associated cost. + * + * Input Arguments: + * + * pSrcOrgY - Pointer to the top-left corner of the current block. If + * iBlockWidth==4, 4-byte alignment required. If iBlockWidth==8, + * 8-byte alignment required. If iBlockWidth==16, 16-byte alignment + * required. + * pSrcRefY - Pointer to the top-left corner of the co-located block in the + * reference picture. If iBlockWidth==4, 4-byte alignment + * required. If iBlockWidth==8, 8-byte alignment required. If + * iBlockWidth==16, 16-byte alignment required. + * nSrcOrgStep - Stride of the original picture plane, expressed in terms + * of integer pixels; must be a multiple of iBlockWidth. + * nSrcRefStep - Stride of the reference picture plane, expressed in terms + * of integer pixels + * pRefRect - pointer to the valid reference rectangle inside the reference + * picture plane + * nCurrPointPos - position of the current block in the current plane + * iBlockWidth - Width of the current block, expressed in terms of integer + * pixels; must be equal to either 4, 8, or 16. + * iBlockHeight - Height of the current block, expressed in terms of + * integer pixels; must be equal to either 4, 8, or 16. + * nLamda - Lamda factor; used to compute motion cost + * pMVPred - Predicted MV; used to compute motion cost, expressed in terms + * of 1/4-pel units + * pMVCandidate - Candidate MV; used to initialize the motion search, + * expressed in terms of integer pixels + * pMESpec - pointer to the ME specification structure + * + * Output Arguments: + * + * pDstBestMV - Best MV resulting from integer search, expressed in terms + * of 1/4-pel units + * pBestCost - Motion cost associated with the best MV; computed as + * SAD+Lamda*BitsUsedByMV + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - any of the following poitners are NULL: + * pSrcOrgY, pSrcRefY, pRefRect, pMVPred, pMVCandidate, or pMESpec. + * - Either iBlockWidth or iBlockHeight are values other than 4, 8, or 16. + * - Any alignment restrictions are violated + * + */ + + OMXResult omxVCM4P10_BlockMatch_Integer ( + const OMX_U8 *pSrcOrgY, + OMX_S32 nSrcOrgStep, + const OMX_U8 *pSrcRefY, + OMX_S32 nSrcRefStep, + const OMXRect *pRefRect, + const OMXVCM4P2Coordinate *pCurrPointPos, + OMX_U8 iBlockWidth, + OMX_U8 iBlockHeight, + OMX_U32 nLamda, + const OMXVCMotionVector *pMVPred, + const OMXVCMotionVector *pMVCandidate, + OMXVCMotionVector *pBestMV, + OMX_S32 *pBestCost, + void *pMESpec +) +{ + /* Definitions and Initializations*/ + OMX_INT candSAD; + OMX_INT fromX, toX, fromY, toY; + /* Offset to the reference at the begining of the bounding box */ + const OMX_U8 *pTempSrcRefY, *pTempSrcOrgY; + OMX_S16 x, y; + OMXVCMotionVector diffMV; + OMX_S32 nSearchRange; + ARMVCM4P10_MESpec *armMESpec = (ARMVCM4P10_MESpec *) pMESpec; + + /* Argument error checks */ + armRetArgErrIf((iBlockWidth == 4) && (!armIs4ByteAligned(pSrcOrgY)), OMX_Sts_BadArgErr); + armRetArgErrIf((iBlockWidth == 8) && (!armIs8ByteAligned(pSrcOrgY)), OMX_Sts_BadArgErr); + armRetArgErrIf((iBlockWidth == 16) && (!armIs16ByteAligned(pSrcOrgY)), OMX_Sts_BadArgErr); + armRetArgErrIf((iBlockWidth == 4) && (!armIs4ByteAligned(pSrcRefY)), OMX_Sts_BadArgErr); + armRetArgErrIf((iBlockWidth == 8) && (!armIs8ByteAligned(pSrcRefY)), OMX_Sts_BadArgErr); + armRetArgErrIf((iBlockWidth == 16) && (!armIs16ByteAligned(pSrcRefY)), OMX_Sts_BadArgErr); + armRetArgErrIf(pSrcOrgY == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pSrcRefY == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pMVPred == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pMVCandidate == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pBestMV == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pBestCost == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(((iBlockWidth!=4)&&(iBlockWidth!=8)&&(iBlockWidth!=16)) , OMX_Sts_BadArgErr); + armRetArgErrIf(((iBlockHeight!=4)&&(iBlockHeight!=8)&&(iBlockHeight!=16)) , OMX_Sts_BadArgErr); + armIgnore (pMESpec); + + if(iBlockWidth == 4) + { + nSearchRange = armMESpec->MEParams.searchRange4x4; + } + else if(iBlockWidth == 8) + { + nSearchRange = armMESpec->MEParams.searchRange8x8; + } + else + { + nSearchRange = armMESpec->MEParams.searchRange16x16; + } + /* Check for valid region */ + fromX = nSearchRange; + toX = nSearchRange; + fromY = nSearchRange; + toY = nSearchRange; + + if ((pCurrPointPos->x - nSearchRange) < pRefRect->x) + { + fromX = pCurrPointPos->x - pRefRect->x; + } + + if ((pCurrPointPos->x + iBlockWidth + nSearchRange) > (pRefRect->x + pRefRect->width)) + { + toX = pRefRect->width - (pCurrPointPos->x - pRefRect->x) - iBlockWidth; + } + + if ((pCurrPointPos->y - nSearchRange) < pRefRect->y) + { + fromY = pCurrPointPos->y - pRefRect->y; + } + + if ((pCurrPointPos->y + iBlockWidth + nSearchRange) > (pRefRect->y + pRefRect->height)) + { + toY = pRefRect->width - (pCurrPointPos->y - pRefRect->y) - iBlockWidth; + } + + pBestMV->dx = -fromX * 4; + pBestMV->dy = -fromY * 4; + /* Initialize to max value as a start point */ + *pBestCost = 0x7fffffff; + + /* Looping on y- axis */ + for (y = -fromY; y <= toY; y++) + { + /* Looping on x- axis */ + for (x = -fromX; x <= toX; x++) + { + /* Positioning the pointer */ + pTempSrcRefY = pSrcRefY + (nSrcRefStep * y) + x; + pTempSrcOrgY = pSrcOrgY; + + /* Calculate the SAD */ + armVCCOMM_SAD( + pTempSrcOrgY, + nSrcOrgStep, + pTempSrcRefY, + nSrcRefStep, + &candSAD, + iBlockHeight, + iBlockWidth); + + diffMV.dx = (x * 4) - pMVPred->dx; + diffMV.dy = (y * 4) - pMVPred->dy; + + /* Result calculations */ + armVCM4P10_CompareMotionCostToMV ((x * 4), (y * 4), diffMV, candSAD, pBestMV, nLamda, pBestCost); + + } /* End of x- axis */ + } /* End of y-axis */ + + return OMX_Sts_NoErr; + +} + +/* End of file */ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_BlockMatch_Quarter.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_BlockMatch_Quarter.c new file mode 100644 index 0000000000000000000000000000000000000000..513ee25167f99d44ba11363b661347dfbeef364d --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_BlockMatch_Quarter.c @@ -0,0 +1,199 @@ +/** + * + * File Name: omxVCM4P10_BlockMatch_Quarter.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Contains modules for quater pel Block matching, + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armVC.h" +#include "armCOMM.h" + + +/** + * Function: omxVCM4P10_BlockMatch_Quarter (6.3.5.2.3) + * + * Description: + * Performs a quarter-pel block match using results from a prior half-pel + * search. Returns the best MV and associated cost. This function estimates + * the quarter-pixel motion vector by interpolating the half-pel resolution + * motion vector referenced by the input parameter pSrcDstBestMV, i.e., the + * initial half-pel MV is generated externally. The function + * omxVCM4P10_BlockMatch_Half may be used for half-pel motion estimation. + * + * Input Arguments: + * + * pSrcOrgY - Pointer to the current position in original picture plane. If + * iBlockWidth==4, 4-byte alignment required. If iBlockWidth==8, + * 8-byte alignment required. If iBlockWidth==16, 16-byte alignment + * required. + * pSrcRefY - Pointer to the top-left corner of the co-located block in the + * reference picture If iBlockWidth==4, 4-byte alignment + * required. If iBlockWidth==8, 8-byte alignment required. If + * iBlockWidth==16, 16-byte alignment required. + * nSrcOrgStep - Stride of the original picture plane in terms of full + * pixels; must be a multiple of iBlockWidth. + * nSrcRefStep - Stride of the reference picture plane in terms of full + * pixels + * iBlockWidth - Width of the current block in terms of full pixels; must + * be equal to either 4, 8, or 16. + * iBlockHeight - Height of the current block in terms of full pixels; must + * be equal to either 4, 8, or 16. + * nLamda - Lamda factor, used to compute motion cost + * pMVPred - Predicted MV, represented in terms of 1/4-pel units; used to + * compute motion cost + * pSrcDstBestMV - The best MV resulting from a prior half-pel search, + * represented in terms of 1/4 pel units + * + * Output Arguments: + * + * pSrcDstBestMV - Best MV resulting from the quarter-pel search, expressed + * in terms of 1/4-pel units + * pBestCost - Motion cost associated with the best MV; computed as + * SAD+Lamda*BitsUsedByMV + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - One of more of the following pointers is NULL: + * pSrcOrgY, pSrcRefY, pSrcDstBestMV, pMVPred, pBestCost + * - iBlockWidth or iBlockHeight are equal to values other than 4, 8, or 16. + * - Any alignment restrictions are violated + * + */ + +OMXResult omxVCM4P10_BlockMatch_Quarter( + const OMX_U8* pSrcOrgY, + OMX_S32 nSrcOrgStep, + const OMX_U8* pSrcRefY, + OMX_S32 nSrcRefStep, + OMX_U8 iBlockWidth, + OMX_U8 iBlockHeight, + OMX_U32 nLamda, + const OMXVCMotionVector* pMVPred, + OMXVCMotionVector* pSrcDstBestMV, + OMX_S32* pBestCost +) +{ + /* Definitions and Initializations*/ + OMX_INT candSAD; + OMX_INT fromX, toX, fromY, toY; + /* Offset to the reference at the begining of the bounding box */ + const OMX_U8 *pTempSrcRefY, *pTempSrcOrgY; + OMX_S16 x, y; + OMXVCMotionVector diffMV, candMV, initialMV; + OMX_U8 interpolY[256]; + OMX_S32 pelPosX, pelPosY; + + /* Argument error checks */ + armRetArgErrIf((iBlockWidth == 4) && (!armIs4ByteAligned(pSrcOrgY)), OMX_Sts_BadArgErr); + armRetArgErrIf((iBlockWidth == 8) && (!armIs8ByteAligned(pSrcOrgY)), OMX_Sts_BadArgErr); + armRetArgErrIf((iBlockWidth == 16) && (!armIs16ByteAligned(pSrcOrgY)), OMX_Sts_BadArgErr); + armRetArgErrIf((iBlockWidth == 4) && (!armIs4ByteAligned(pSrcRefY)), OMX_Sts_BadArgErr); + armRetArgErrIf((iBlockWidth == 8) && (!armIs8ByteAligned(pSrcRefY)), OMX_Sts_BadArgErr); + armRetArgErrIf((iBlockWidth == 16) && (!armIs16ByteAligned(pSrcRefY)), OMX_Sts_BadArgErr); + armRetArgErrIf((nSrcOrgStep % iBlockWidth), OMX_Sts_BadArgErr); + armRetArgErrIf(pSrcOrgY == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pSrcRefY == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pMVPred == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pSrcDstBestMV == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pBestCost == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(((iBlockWidth!=4)&&(iBlockWidth!=8)&&(iBlockWidth!=16)) , OMX_Sts_BadArgErr); + armRetArgErrIf(((iBlockHeight!=4)&&(iBlockHeight!=8)&&(iBlockHeight!=16)) , OMX_Sts_BadArgErr); + + + /* Check for valid region */ + fromX = 1; + toX = 1; + fromY = 1; + toY = 1; + + /* Initialize to max value as a start point */ + *pBestCost = 0x7fffffff; + + initialMV.dx = pSrcDstBestMV->dx; + initialMV.dy = pSrcDstBestMV->dy; + + /* Looping on y- axis */ + for (y = -fromY; y <= toY; y++) + { + /* Looping on x- axis */ + for (x = -fromX; x <= toX; x++) + { + /* Positioning the pointer */ + pTempSrcRefY = pSrcRefY + (nSrcRefStep * (initialMV.dy/4)) + (initialMV.dx/4); + + /* Calculating the fract pel position */ + pelPosX = (initialMV.dx % 4) + x; + if (pelPosX < 0) + { + pTempSrcRefY = pTempSrcRefY - 1; + pelPosX += 4; + } + pelPosY = (initialMV.dy % 4) + y; + if (pelPosY < 0) + { + pTempSrcRefY = pTempSrcRefY - (1 * nSrcRefStep); + pelPosY += 4; + } + + pTempSrcOrgY = pSrcOrgY; + + /* Prepare cand MV */ + candMV.dx = initialMV.dx + x; + candMV.dy = initialMV.dy + y; + + /* Interpolate Quater pel for the current position*/ + armVCM4P10_Interpolate_Luma( + pTempSrcRefY, + nSrcRefStep, + interpolY, + iBlockWidth, + iBlockWidth, + iBlockHeight, + pelPosX, + pelPosY); + + /* Calculate the SAD */ + armVCCOMM_SAD( + pTempSrcOrgY, + nSrcOrgStep, + interpolY, + iBlockWidth, + &candSAD, + iBlockHeight, + iBlockWidth); + + diffMV.dx = candMV.dx - pMVPred->dx; + diffMV.dy = candMV.dy - pMVPred->dy; + + /* Result calculations */ + armVCM4P10_CompareMotionCostToMV ( + candMV.dx, + candMV.dy, + diffMV, + candSAD, + pSrcDstBestMV, + nLamda, + pBestCost); + + } /* End of x- axis */ + } /* End of y-axis */ + + return OMX_Sts_NoErr; + +} + +/* End of file */ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c new file mode 100644 index 0000000000000000000000000000000000000000..a07b1bb0706e6e16155a6e4d250b5d69600b30a5 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c @@ -0,0 +1,107 @@ +/* ---------------------------------------------------------------- + * + * + * File Name: omxVCM4P10_DeblockChroma_I.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * H.264 intra chroma deblock + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armVC.h" + +/** + * Function: omxVCM4P10_DeblockChroma_I (6.3.3.3.6) + * + * Description: + * Performs in-place deblocking filtering on all edges of the chroma + * macroblock (16x16). + * + * Input Arguments: + * + * pSrcDst - pointer to the input macroblock; must be 8-byte aligned. + * srcdstStep - step of the arrays; must be a multiple of 8. + * pAlpha - pointer to a 2x2 array of alpha thresholds, organized as + * follows: {external vertical edge, internal vertical edge, + * external horizontal edge, internal horizontal edge }. Per + * [ISO14496-10] alpha values must be in the range [0,255]. + * pBeta - pointer to a 2x2 array of Beta Thresholds, organized as follows: + * { external vertical edge, internal vertical edge, external + * horizontal edge, internal horizontal edge }. Per [ISO14496-10] + * beta values must be in the range [0,18]. + * pThresholds - array of size 8x2 of Thresholds (TC0) (values for the left + * or above edge of each 4x2 or 2x4 block, arranged in vertical + * block order and then in horizontal block order); must be aligned + * on a 4-byte boundary. Per [ISO14496-10] values must be in the + * range [0,25]. + * pBS - array of size 16x2 of BS parameters (arranged in scan block order + * for vertical edges and then horizontal edges); valid in the + * range [0,4] with the following restrictions: i) pBS[i]== 4 may + * occur only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^3]== + * 4. Must be 4-byte aligned. + * + * Output Arguments: + * + * pSrcDst - pointer to filtered output macroblock. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments + * - one or more of the following pointers is NULL: pSrcDst, pAlpha, + * pBeta, pThresholds, or pBS. pSrcDst is not 8-byte aligned. + * either pThresholds or pBS is not 4-byte aligned. + * - one or more entries in the table pAlpha[0..3] is outside the range + * [0,255]. + * - one or more entries in the table pBeta[0..3] is outside the range + * [0,18]. + * - one or more entries in the table pThresholds[0..15]is outside of + * the range [0,25]. + * - pBS is out of range, i.e., one of the following conditions is true: + * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or + * (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3. + * - srcdstStep is not a multiple of 8. + * + */ +OMXResult omxVCM4P10_DeblockChroma_I( + OMX_U8* pSrcDst, + OMX_S32 srcdstStep, + const OMX_U8* pAlpha, + const OMX_U8* pBeta, + const OMX_U8* pThresholds, + const OMX_U8 *pBS +) +{ + OMXResult errorCode; + + armRetArgErrIf(pSrcDst == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(armNot8ByteAligned(pSrcDst), OMX_Sts_BadArgErr); + armRetArgErrIf(srcdstStep & 7, OMX_Sts_BadArgErr); + armRetArgErrIf(pAlpha == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pBeta == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pThresholds == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(armNot4ByteAligned(pThresholds), OMX_Sts_BadArgErr); + armRetArgErrIf(pBS == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(armNot4ByteAligned(pBS), OMX_Sts_BadArgErr); + + errorCode = omxVCM4P10_FilterDeblockingChroma_VerEdge_I( + pSrcDst, srcdstStep, pAlpha, pBeta, pThresholds, pBS); + + armRetArgErrIf(errorCode != OMX_Sts_NoErr, errorCode) + + errorCode = omxVCM4P10_FilterDeblockingChroma_HorEdge_I( + pSrcDst, srcdstStep, pAlpha+2, pBeta+2, pThresholds+8, pBS+16); + + return errorCode; +} diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c new file mode 100644 index 0000000000000000000000000000000000000000..1f3a64648f1dd3456265b4e23c6317d4ca03db40 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c @@ -0,0 +1,109 @@ +/* ---------------------------------------------------------------- + * + * + * File Name: omxVCM4P10_DeblockLuma_I.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * H.264 luma deblock + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armVC.h" + + +/** + * Function: omxVCM4P10_DeblockLuma_I (6.3.3.3.5) + * + * Description: + * This function performs in-place deblock filtering the horizontal and + * vertical edges of a luma macroblock (16x16). + * + * Input Arguments: + * + * pSrcDst - pointer to the input macroblock; must be 16-byte aligned. + * srcdstStep - image width; must be a multiple of 16. + * pAlpha - pointer to a 2x2 table of alpha thresholds, organized as + * follows: {external vertical edge, internal vertical edge, + * external horizontal edge, internal horizontal edge }. Per + * [ISO14496-10] alpha values must be in the range [0,255]. + * pBeta - pointer to a 2x2 table of beta thresholds, organized as follows: + * {external vertical edge, internal vertical edge, external + * horizontal edge, internal horizontal edge }. Per [ISO14496-10] + * beta values must be in the range [0,18]. + * pThresholds - pointer to a 16x2 table of threshold (TC0), organized as + * follows: {values for the left or above edge of each 4x4 block, + * arranged in vertical block order and then in horizontal block + * order}; must be aligned on a 4-byte boundary. Per [ISO14496-10] + * values must be in the range [0,25]. + * pBS - pointer to a 16x2 table of BS parameters arranged in scan block + * order for vertical edges and then horizontal edges; valid in the + * range [0,4] with the following restrictions: i) pBS[i]== 4 may + * occur only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^3]== + * 4. Must be 4-byte aligned. + * + * Output Arguments: + * + * pSrcDst - pointer to filtered output macroblock. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments + * - one or more of the following pointers is NULL: pSrcDst, pAlpha, + * pBeta, pThresholds or pBS. pSrcDst is not 16-byte aligned. + * either pThresholds or pBS is not aligned on a 4-byte boundary. + * - one or more entries in the table pAlpha[0..3] is outside the range + * [0,255]. + * - one or more entries in the table pBeta[0..3] is outside the range + * [0,18]. + * - one or more entries in the table pThresholds[0..31]is outside of + * the range [0,25]. + * - pBS is out of range, i.e., one of the following conditions is true: + * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or + * (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3. + * - srcdstStep is not a multiple of 16. + * + */ + +OMXResult omxVCM4P10_DeblockLuma_I( + OMX_U8* pSrcDst, + OMX_S32 srcdstStep, + const OMX_U8* pAlpha, + const OMX_U8* pBeta, + const OMX_U8* pThresholds, + const OMX_U8 *pBS +) +{ + OMXResult errorCode; + + armRetArgErrIf(pSrcDst == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(armNot16ByteAligned(pSrcDst), OMX_Sts_BadArgErr); + armRetArgErrIf(srcdstStep & 15, OMX_Sts_BadArgErr); + armRetArgErrIf(pAlpha == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pBeta == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pThresholds == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(armNot4ByteAligned(pThresholds), OMX_Sts_BadArgErr); + armRetArgErrIf(pBS == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(armNot4ByteAligned(pBS), OMX_Sts_BadArgErr); + + errorCode = omxVCM4P10_FilterDeblockingLuma_VerEdge_I( + pSrcDst, srcdstStep, pAlpha, pBeta, pThresholds, pBS); + + armRetArgErrIf(errorCode != OMX_Sts_NoErr, errorCode) + + errorCode = omxVCM4P10_FilterDeblockingLuma_HorEdge_I( + pSrcDst, srcdstStep, pAlpha+2, pBeta+2, pThresholds+16, pBS+16); + + return errorCode; +} diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c new file mode 100644 index 0000000000000000000000000000000000000000..830ddc7a6123e4b33fde7f02678f3f61cd87da08 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c @@ -0,0 +1,86 @@ +/* ---------------------------------------------------------------- + * + * + * File Name: omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * H.264 decode coefficients module + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armVC.h" + +/** + * Function: omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC (6.3.4.1.1) + * + * Description: + * Performs CAVLC decoding and inverse raster scan for a 2x2 block of + * ChromaDCLevel. The decoded coefficients in the packed position-coefficient + * buffer are stored in reverse zig-zag order, i.e., the first buffer element + * contains the last non-zero postion-coefficient pair of the block. Within + * each position-coefficient pair, the position entry indicates the + * raster-scan position of the coefficient, while the coefficient entry + * contains the coefficient value. + * + * Input Arguments: + * + * ppBitStream - Double pointer to current byte in bit stream buffer + * pOffset - Pointer to current bit position in the byte pointed to by + * *ppBitStream; valid in the range [0,7]. + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after each block is decoded + * pOffset - *pOffset is updated after each block is decoded + * pNumCoeff - Pointer to the number of nonzero coefficients in this block + * ppPosCoefBuf - Double pointer to destination residual + * coefficient-position pair buffer. Buffer position + * (*ppPosCoefBuf) is updated upon return, unless there are only + * zero coefficients in the currently decoded block. In this case + * the caller is expected to bypass the transform/dequantization of + * the empty blocks. + * + * Return Value: + * + * OMX_Sts_NoErr, if the function runs without error. + * + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - ppBitStream or pOffset is NULL. + * - ppPosCoefBuf or pNumCoeff is NULL. + * OMX_Sts_Err - if one of the following is true: + * - an illegal code is encountered in the bitstream + * + */ + +OMXResult omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC ( + const OMX_U8** ppBitStream, + OMX_S32* pOffset, + OMX_U8* pNumCoeff, + OMX_U8** ppPosCoefbuf + ) + +{ + armRetArgErrIf(ppBitStream==NULL , OMX_Sts_BadArgErr); + armRetArgErrIf(*ppBitStream==NULL , OMX_Sts_BadArgErr); + armRetArgErrIf(pOffset==NULL , OMX_Sts_BadArgErr); + armRetArgErrIf(*pOffset<0 , OMX_Sts_BadArgErr); + armRetArgErrIf(*pOffset>7 , OMX_Sts_BadArgErr); + armRetArgErrIf(pNumCoeff==NULL , OMX_Sts_BadArgErr); + armRetArgErrIf(ppPosCoefbuf==NULL , OMX_Sts_BadArgErr); + armRetArgErrIf(*ppPosCoefbuf==NULL , OMX_Sts_BadArgErr); + + return armVCM4P10_DecodeCoeffsToPair(ppBitStream, pOffset, pNumCoeff, + ppPosCoefbuf, 4, 4); + +} diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c new file mode 100644 index 0000000000000000000000000000000000000000..7e83d1e311286a5bb4a203f52e79942f57d4d939 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c @@ -0,0 +1,117 @@ +/* ---------------------------------------------------------------- + * + * + * File Name: omxVCM4P10_DecodeCoeffsToPairCAVLC.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * H.264 decode coefficients module + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" +#include "armCOMM.h" +#include "armVC.h" + +/** + * Function: omxVCM4P10_DecodeCoeffsToPairCAVLC (6.3.4.1.2) + * + * Description: + * Performs CAVLC decoding and inverse zigzag scan for 4x4 block of + * Intra16x16DCLevel, Intra16x16ACLevel, LumaLevel, and ChromaACLevel. Inverse + * field scan is not supported. The decoded coefficients in the packed + * position-coefficient buffer are stored in reverse zig-zag order, i.e., the + * first buffer element contains the last non-zero postion-coefficient pair of + * the block. Within each position-coefficient pair, the position entry + * indicates the raster-scan position of the coefficient, while the + * coefficient entry contains the coefficient value. + * + * Input Arguments: + * + * ppBitStream -Double pointer to current byte in bit stream buffer + * pOffset - Pointer to current bit position in the byte pointed to by + * *ppBitStream; valid in the range [0,7]. + * sMaxNumCoeff - Maximum the number of non-zero coefficients in current + * block + * sVLCSelect - VLC table selector, obtained from the number of non-zero + * coefficients contained in the above and left 4x4 blocks. It is + * equivalent to the variable nC described in H.264 standard table + * 9 5, except its value can t be less than zero. + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after each block is decoded. + * Buffer position (*ppPosCoefBuf) is updated upon return, unless + * there are only zero coefficients in the currently decoded block. + * In this case the caller is expected to bypass the + * transform/dequantization of the empty blocks. + * pOffset - *pOffset is updated after each block is decoded + * pNumCoeff - Pointer to the number of nonzero coefficients in this block + * ppPosCoefBuf - Double pointer to destination residual + * coefficient-position pair buffer + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - ppBitStream or pOffset is NULL. + * - ppPosCoefBuf or pNumCoeff is NULL. + * - sMaxNumCoeff is not equal to either 15 or 16. + * - sVLCSelect is less than 0. + * + * OMX_Sts_Err - if one of the following is true: + * - an illegal code is encountered in the bitstream + * + */ + +OMXResult omxVCM4P10_DecodeCoeffsToPairCAVLC( + const OMX_U8** ppBitStream, + OMX_S32* pOffset, + OMX_U8* pNumCoeff, + OMX_U8**ppPosCoefbuf, + OMX_INT sVLCSelect, + OMX_INT sMaxNumCoeff + ) +{ + int nTable; + + armRetArgErrIf(ppBitStream==NULL , OMX_Sts_BadArgErr); + armRetArgErrIf(*ppBitStream==NULL , OMX_Sts_BadArgErr); + armRetArgErrIf(pOffset==NULL , OMX_Sts_BadArgErr); + armRetArgErrIf(*pOffset<0 , OMX_Sts_BadArgErr); + armRetArgErrIf(*pOffset>7 , OMX_Sts_BadArgErr); + armRetArgErrIf(pNumCoeff==NULL , OMX_Sts_BadArgErr); + armRetArgErrIf(ppPosCoefbuf==NULL , OMX_Sts_BadArgErr); + armRetArgErrIf(*ppPosCoefbuf==NULL , OMX_Sts_BadArgErr); + armRetArgErrIf(sVLCSelect<0 , OMX_Sts_BadArgErr); + armRetArgErrIf(sMaxNumCoeff<15 , OMX_Sts_BadArgErr); + armRetArgErrIf(sMaxNumCoeff>16 , OMX_Sts_BadArgErr); + + /* Find VLC table number */ + if (sVLCSelect<2) + { + nTable = 0; + } + else if (sVLCSelect<4) + { + nTable = 1; + } + else if (sVLCSelect<8) + { + nTable = 2; + } + else /* sVLCSelect >= 8 */ + { + nTable = 3; + } + + return armVCM4P10_DecodeCoeffsToPair(ppBitStream, pOffset, pNumCoeff, + ppPosCoefbuf, nTable, sMaxNumCoeff); +} diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_DequantTransformResidualFromPairAndAdd.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_DequantTransformResidualFromPairAndAdd.c new file mode 100644 index 0000000000000000000000000000000000000000..ed5a158dffa6e7a91817b4d806e1710dc549920c --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_DequantTransformResidualFromPairAndAdd.c @@ -0,0 +1,145 @@ +/* ---------------------------------------------------------------- + * + * + * File Name: omxVCM4P10_DequantTransformResidualFromPairAndAdd.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * H.264 inverse quantize and transform module + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armVC.h" + +/* + * Description: + * Dequantize Luma AC block + */ + +static void DequantLumaAC4x4( + OMX_S16* pSrcDst, + OMX_INT QP +) +{ + const OMX_U8 *pVRow = &armVCM4P10_VMatrix[QP%6][0]; + int Shift = QP / 6; + int i; + OMX_S32 Value; + + for (i=0; i<16; i++) + { + + Value = (pSrcDst[i] * pVRow[armVCM4P10_PosToVCol4x4[i]]) << Shift; + pSrcDst[i] = (OMX_S16)Value; + } +} + +/** + * Function: omxVCM4P10_DequantTransformResidualFromPairAndAdd (6.3.4.2.3) + * + * Description: + * Reconstruct the 4x4 residual block from coefficient-position pair buffer, + * perform dequantization and integer inverse transformation for 4x4 block of + * residuals with previous intra prediction or motion compensation data, and + * update the pair buffer pointer to next non-empty block. If pDC == NULL, + * there re 16 non-zero AC coefficients at most in the packed buffer starting + * from 4x4 block position 0; If pDC != NULL, there re 15 non-zero AC + * coefficients at most in the packet buffer starting from 4x4 block position + * 1. + * + * Input Arguments: + * + * ppSrc - Double pointer to residual coefficient-position pair buffer + * output by CALVC decoding + * pPred - Pointer to the predicted 4x4 block; must be aligned on a 4-byte + * boundary + * predStep - Predicted frame step size in bytes; must be a multiple of 4 + * dstStep - Destination frame step in bytes; must be a multiple of 4 + * pDC - Pointer to the DC coefficient of this block, NULL if it doesn't + * exist + * QP - QP Quantization parameter. It should be QpC in chroma 4x4 block + * decoding, otherwise it should be QpY. + * AC - Flag indicating if at least one non-zero AC coefficient exists + * + * Output Arguments: + * + * pDst - pointer to the reconstructed 4x4 block data; must be aligned on a + * 4-byte boundary + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - pPred or pDst is NULL. + * - pPred or pDst is not 4-byte aligned. + * - predStep or dstStep is not a multiple of 4. + * - AC !=0 and Qp is not in the range of [0-51] or ppSrc == NULL. + * - AC ==0 && pDC ==NULL. + * + */ + +OMXResult omxVCM4P10_DequantTransformResidualFromPairAndAdd( + const OMX_U8 **ppSrc, + const OMX_U8 *pPred, + const OMX_S16 *pDC, + OMX_U8 *pDst, + OMX_INT predStep, + OMX_INT dstStep, + OMX_INT QP, + OMX_INT AC +) +{ + OMX_S16 pBuffer[16+4]; + OMX_S16 *pDelta; + int i,x,y; + + armRetArgErrIf(pPred == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(armNot4ByteAligned(pPred),OMX_Sts_BadArgErr); + armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(armNot4ByteAligned(pDst), OMX_Sts_BadArgErr); + armRetArgErrIf(predStep & 3, OMX_Sts_BadArgErr); + armRetArgErrIf(dstStep & 3, OMX_Sts_BadArgErr); + armRetArgErrIf(AC!=0 && (QP<0), OMX_Sts_BadArgErr); + armRetArgErrIf(AC!=0 && (QP>51), OMX_Sts_BadArgErr); + armRetArgErrIf(AC!=0 && ppSrc==NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(AC!=0 && *ppSrc==NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(AC==0 && pDC==NULL, OMX_Sts_BadArgErr); + + pDelta = armAlignTo8Bytes(pBuffer); + + for (i=0; i<16; i++) + { + pDelta[i] = 0; + } + if (AC) + { + armVCM4P10_UnpackBlock4x4(ppSrc, pDelta); + DequantLumaAC4x4(pDelta, QP); + } + if (pDC) + { + pDelta[0] = pDC[0]; + } + armVCM4P10_TransformResidual4x4(pDelta,pDelta); + + for (y=0; y<4; y++) + { + for (x=0; x<4; x++) + { + pDst[y*dstStep+x] = (OMX_U8)armClip(0,255,pPred[y*predStep+x] + pDelta[4*y+x]); + } + } + + return OMX_Sts_NoErr; +} + +/* End of file */ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I.c new file mode 100644 index 0000000000000000000000000000000000000000..75edee215452ac0f26f210a7915786527f7bade8 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I.c @@ -0,0 +1,130 @@ +/* ---------------------------------------------------------------- + * + * + * File Name: omxVCM4P10_FilterDeblockingChroma_HorEdge_I.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * H.264 chroma deblock module + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armVC.h" + +/** + * Function: omxVCM4P10_FilterDeblockingChroma_HorEdge_I (6.3.3.3.4) + * + * Description: + * Performs in-place deblock filtering on the horizontal edges of the chroma + * macroblock (8x8). + * + * Input Arguments: + * + * pSrcDst - pointer to the input macroblock; must be 8-byte aligned. + * srcdstStep - array step; must be a multiple of 8. + * pAlpha - array of size 2 containing alpha thresholds; the first element + * contains the threshold for the external horizontal edge, and the + * second element contains the threshold for internal horizontal + * edge. Per [ISO14496-10] alpha values must be in the range + * [0,255]. + * pBeta - array of size 2 containing beta thresholds; the first element + * contains the threshold for the external horizontal edge, and the + * second element contains the threshold for the internal + * horizontal edge. Per [ISO14496-10] beta values must be in the + * range [0,18]. + * pThresholds - array of size 8 containing thresholds, TC0, for the top + * horizontal edge of each 2x4 chroma block, arranged in horizontal + * block order; must be aligned on a 4-byte boundary. Per + * [ISO14496-10] values must be in the range [0,25]. + * pBS - array of size 16 containing BS parameters for each 2x2 chroma + * block, arranged in horizontal block order; valid in the range + * [0,4] with the following restrictions: i) pBS[i]== 4 may occur + * only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^3]== 4. + * Must be 4-byte aligned. + * + * Output Arguments: + * + * pSrcDst -Pointer to filtered output macroblock. + * + * Return Value: + * + * OMX_Sts_NoErr, if the function runs without error. + * + * OMX_Sts_BadArgErr, if one of the following cases occurs: + * - any of the following pointers is NULL: + * pSrcDst, pAlpha, pBeta, pThresholds, or pBS. + * - pSrcDst is not 8-byte aligned. + * - srcdstStep is not a multiple of 8. + * - pThresholds is not 4-byte aligned. + * - pAlpha[0] and/or pAlpha[1] is outside the range [0,255]. + * - pBeta[0] and/or pBeta[1] is outside the range [0,18]. + * - One or more entries in the table pThresholds[0..7] is outside + * of the range [0,25]. + * - pBS is out of range, i.e., one of the following conditions is true: + * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or + * (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3. + * - pBS is not 4-byte aligned. + * + */ + +OMXResult omxVCM4P10_FilterDeblockingChroma_HorEdge_I( + OMX_U8* pSrcDst, + OMX_S32 srcdstStep, + const OMX_U8* pAlpha, + const OMX_U8* pBeta, + const OMX_U8* pThresholds, + const OMX_U8 *pBS + ) +{ + int I, X, Y, Internal=0; + + armRetArgErrIf(pSrcDst == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(armNot8ByteAligned(pSrcDst), OMX_Sts_BadArgErr); + armRetArgErrIf(srcdstStep & 7, OMX_Sts_BadArgErr); + armRetArgErrIf(pAlpha == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pBeta == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pThresholds == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(armNot4ByteAligned(pThresholds), OMX_Sts_BadArgErr); + armRetArgErrIf(pBS == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(armNot4ByteAligned(pBS), OMX_Sts_BadArgErr); + + for (Y=0; Y<8; Y+=4, Internal=1) + { + for (X=0; X<8; X++) + { + I = (X>>1)+4*(Y>>1); + + armRetArgErrIf(pBS[I] > 4, OMX_Sts_BadArgErr) + + armRetArgErrIf( (I > 3) && (pBS[I] == 4), + OMX_Sts_BadArgErr) + + armRetArgErrIf( (I < 4) && + ( (pBS[I] == 4) && (pBS[I^1] != 4) ), + OMX_Sts_BadArgErr) + + + /* Filter horizontal edge with q0 at (X,Y) */ + armVCM4P10_DeBlockPixel( + pSrcDst + Y*srcdstStep + X, + srcdstStep, + pThresholds[(X>>1)+4*(Y>>2)], + pAlpha[Internal], + pBeta[Internal], + pBS[I], + 1); + } + } + + return OMX_Sts_NoErr; +} diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I.c new file mode 100644 index 0000000000000000000000000000000000000000..10b25925d87f407e9427bd09e26ccf8ad17b4f99 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I.c @@ -0,0 +1,131 @@ +/* ---------------------------------------------------------------- + * + * + * File Name: omxVCM4P10_FilterDeblockingChroma_VerEdge_I.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * H.264 deblocking module + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armVC.h" + +/** + * Function: omxVCM4P10_FilterDeblockingChroma_VerEdge_I (6.3.3.3.3) + * + * Description: + * Performs in-place deblock filtering on four vertical edges of the chroma + * macroblock (8x8). + * + * Input Arguments: + * + * pSrcDst - Pointer to the input macroblock; must be 8-byte aligned. + * srcdstStep - Step of the arrays; must be a multiple of 8. + * pAlpha - Array of size 2 of alpha thresholds (the first item is alpha + * threshold for external vertical edge, and the second item is for + * internal vertical edge); per [ISO14496-10] alpha values must be + * in the range [0,255]. + * pBeta - Array of size 2 of beta thresholds (the first item is the beta + * threshold for the external vertical edge, and the second item is + * for the internal vertical edge); per [ISO14496-10] beta values + * must be in the range [0,18]. + * pThresholds - Array of size 8 containing thresholds, TC0, for the left + * vertical edge of each 4x2 chroma block, arranged in vertical + * block order; must be aligned on a 4-byte boundary. Per + * [ISO14496-10] values must be in the range [0,25]. + * pBS - Array of size 16 of BS parameters (values for each 2x2 chroma + * block, arranged in vertical block order). This parameter is the + * same as the pBSparameter passed into FilterDeblockLuma_VerEdge; + * valid in the range [0,4] with the following restrictions: i) + * pBS[i]== 4 may occur only for 0<=i<=3, ii) pBS[i]== 4 if and + * only if pBS[i^3]== 4. Must be 4 byte aligned. + * + * Output Arguments: + * + * pSrcDst -Pointer to filtered output macroblock. + * + * Return Value: + * + * OMX_Sts_NoErr, if the function runs without error. + * + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - one or more of the following pointers is NULL: pSrcDst, pAlpha, + * pBeta, pThresholds, or pBS. + * - pSrcDst is not 8-byte aligned. + * - srcdstStep is not a multiple of 8. + * - pThresholds is not 4-byte aligned. + * - pAlpha[0] and/or pAlpha[1] is outside the range [0,255]. + * - pBeta[0] and/or pBeta[1] is outside the range [0,18]. + * - One or more entries in the table pThresholds[0..7] is outside + * of the range [0,25]. + * - pBS is out of range, i.e., one of the following conditions is true: + * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or + * (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3. + * - pBS is not 4-byte aligned. + * + */ + +OMXResult omxVCM4P10_FilterDeblockingChroma_VerEdge_I( + OMX_U8* pSrcDst, + OMX_S32 srcdstStep, + const OMX_U8* pAlpha, + const OMX_U8* pBeta, + const OMX_U8* pThresholds, + const OMX_U8 *pBS + ) +{ + int I, X, Y, Internal=0; + + armRetArgErrIf(pSrcDst == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(armNot8ByteAligned(pSrcDst), OMX_Sts_BadArgErr); + armRetArgErrIf(srcdstStep & 7, OMX_Sts_BadArgErr); + armRetArgErrIf(pAlpha == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pBeta == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pThresholds == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(armNot4ByteAligned(pThresholds), OMX_Sts_BadArgErr); + armRetArgErrIf(pBS == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(armNot4ByteAligned(pBS), OMX_Sts_BadArgErr); + armRetArgErrIf(pBeta[0] > 18, OMX_Sts_BadArgErr); + armRetArgErrIf(pBeta[1] > 18, OMX_Sts_BadArgErr); + + for (X=0; X<8; X+=4, Internal=1) + { + for (Y=0; Y<8; Y++) + { + I = (Y>>1)+4*(X>>1); + + armRetArgErrIf(pBS[I] > 4, OMX_Sts_BadArgErr); + + armRetArgErrIf( (I > 3) && (pBS[I] == 4), + OMX_Sts_BadArgErr); + + armRetArgErrIf( ( (pBS[I] == 4) && (pBS[I^3] != 4) ), + OMX_Sts_BadArgErr); + armRetArgErrIf(pThresholds[Y] > 25, OMX_Sts_BadArgErr); + + + /* Filter vertical edge with q0 at (X,Y) */ + armVCM4P10_DeBlockPixel( + pSrcDst + Y*srcdstStep + X, + 1, + pThresholds[(Y>>1)+4*(X>>2)], + pAlpha[Internal], + pBeta[Internal], + pBS[I], + 1); + } + } + + return OMX_Sts_NoErr; +} diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I.c new file mode 100644 index 0000000000000000000000000000000000000000..30a37da1f81ea21e8bc80d5737b67de1803f4160 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I.c @@ -0,0 +1,125 @@ +/* ---------------------------------------------------------------- + * + * + * File Name: omxVCM4P10_FilterDeblockingLuma_HorEdge_I.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * H.264 luma deblock module + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armVC.h" + +/** + * Function: omxVCM4P10_FilterDeblockingLuma_HorEdge_I (6.3.3.3.2) + * + * Description: + * Performs in-place deblock filtering on four horizontal edges of the luma + * macroblock (16x16). + * + * Input Arguments: + * + * pSrcDst - pointer to the input macroblock; must be 16-byte aligned. + * srcdstStep -s tep of the arrays; must be a multiple of 16. + * pAlpha - array of size 2 of alpha thresholds (the first item is the alpha + * threshold for the external vertical edge, and the second item is + * for the internal horizontal edge); per [ISO14496-10] alpha + * values must be in the range [0,255]. + * pBeta - array of size 2 of beta thresholds (the first item is the beta + * threshold for the external horizontal edge, and the second item + * is for the internal horizontal edge). Per [ISO14496-10] beta + * values must be in the range [0,18]. + * pThresholds - array of size 16 containing thresholds, TC0, for the top + * horizontal edge of each 4x4 block, arranged in horizontal block + * order; must be aligned on a 4-byte boundary. Per [ISO14496 10] + * values must be in the range [0,25]. + * pBS - array of size 16 of BS parameters (arranged in horizontal block + * order); valid in the range [0,4] with the following + * restrictions: i) pBS[i]== 4 may occur only for 0<=i<=3, ii) + * pBS[i]== 4 if and only if pBS[i^3]== 4. Must be 4-byte aligned. + * + * Output Arguments: + * + * pSrcDst -Pointer to filtered output macroblock. + * + * Return Value: + * + * OMX_Sts_NoErr, if the function runs without error. + * + * OMX_Sts_BadArgErr, if one of the following cases occurs: + * - one or more of the following pointers is NULL: pSrcDst, pAlpha, + * pBeta, pThresholds, or pBS. + * - either pThresholds or pBS is not aligned on a 4-byte boundary. + * - pSrcDst is not 16-byte aligned. + * - srcdstStep is not a multiple of 16. + * - pAlpha[0] and/or pAlpha[1] is outside the range [0,255]. + * - pBeta[0] and/or pBeta[1] is outside the range [0,18]. + * - One or more entries in the table pThresholds[0..15] is + * outside of the range [0,25]. + * - pBS is out of range, i.e., one of the following conditions is true: + * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or + * (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3. + * + */ + +OMXResult omxVCM4P10_FilterDeblockingLuma_HorEdge_I( + OMX_U8* pSrcDst, + OMX_S32 srcdstStep, + const OMX_U8* pAlpha, + const OMX_U8* pBeta, + const OMX_U8* pThresholds, + const OMX_U8 *pBS + ) +{ + int I, X, Y, Internal=0; + + armRetArgErrIf(pSrcDst == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(armNot8ByteAligned(pSrcDst), OMX_Sts_BadArgErr); + armRetArgErrIf(srcdstStep & 7, OMX_Sts_BadArgErr); + armRetArgErrIf(pAlpha == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pBeta == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pThresholds == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(armNot4ByteAligned(pThresholds), OMX_Sts_BadArgErr); + armRetArgErrIf(pBS == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(armNot4ByteAligned(pBS), OMX_Sts_BadArgErr); + + for (Y=0; Y<16; Y+=4, Internal=1) + { + for (X=0; X<16; X++) + { + I = (X>>2)+4*(Y>>2); + + armRetArgErrIf(pBS[I] > 4, OMX_Sts_BadArgErr) + + armRetArgErrIf( (I > 3) && (pBS[I] == 4), + OMX_Sts_BadArgErr) + + armRetArgErrIf( (I < 4) && + ( (pBS[I] == 4) && (pBS[I^1] != 4) ), + OMX_Sts_BadArgErr) + + /* Filter horizontal edge with q0 at (X,Y) */ + armVCM4P10_DeBlockPixel( + pSrcDst + Y*srcdstStep + X, + srcdstStep, + pThresholds[I], + pAlpha[Internal], + pBeta[Internal], + pBS[I], + 0); + } + } + + return OMX_Sts_NoErr; +} diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I.c new file mode 100644 index 0000000000000000000000000000000000000000..8733427e366a96b5d7e3ecfe24b4ec1979124f66 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I.c @@ -0,0 +1,128 @@ +/* ---------------------------------------------------------------- + * + * + * File Name: omxVCM4P10_FilterDeblockingLuma_VerEdge_I.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * H.264 luma deblock module + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armVC.h" + +/** + * Function: omxVCM4P10_FilterDeblockingLuma_VerEdge_I (6.3.3.3.1) + * + * Description: + * Performs in-place deblock filtering on four vertical edges of the luma + * macroblock (16x16). + * + * Input Arguments: + * + * pSrcDst - Pointer to the input macroblock; must be 16-byte aligned. + * srcdstStep -Step of the arrays; must be a multiple of 16. + * pAlpha -Array of size 2 of alpha thresholds (the first item is the alpha + * threshold for the external vertical edge, and the second item is + * for the internal vertical edge); per [ISO14496-10] alpha values + * must be in the range [0,255]. + * pBeta -Array of size 2 of beta thresholds (the first item is the beta + * threshold for the external vertical edge, and the second item is + * for the internal vertical edge); per [ISO14496-10] beta values + * must be in the range [0,18]. + * pThresholds -Array of size 16 of Thresholds (TC0) (values for the left + * edge of each 4x4 block, arranged in vertical block order); must + * be aligned on a 4-byte boundary.. Per [ISO14496-10] values must + * be in the range [0,25]. + * pBS -Array of size 16 of BS parameters (arranged in vertical block + * order); valid in the range [0,4] with the following + * restrictions: i) pBS[i]== 4 may occur only for 0<=i<=3, ii) + * pBS[i]== 4 if and only if pBS[i^3]== 4. Must be 4-byte aligned. + * + * Output Arguments: + * + * pSrcDst -Pointer to filtered output macroblock. + * + * Return Value: + * If the function runs without error, it returns OMX_Sts_NoErr. + * If one of the following cases occurs, the function returns + * OMX_Sts_BadArgErr: + * Either of the pointers in pSrcDst, pAlpha, pBeta, pThresholds, or pBS + * is NULL. + * Either pThresholds or pBS is not aligned on a 4-byte boundary. + * pSrcDst is not 16-byte aligned. + * srcdstStep is not a multiple of 16. + * pAlpha[0] and/or pAlpha[1] is outside the range [0,255]. + * pBeta[0] and/or pBeta[1] is outside the range [0,18]. + * One or more entries in the table pThresholds[0..15]is outside of the + * range [0,25]. + * pBS is out of range, i.e., one of the following conditions is true: + * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or (pBS[i]==4 && + * pBS[i^3]!=4) for 0<=i<=3. + * + */ + +OMXResult omxVCM4P10_FilterDeblockingLuma_VerEdge_I( + OMX_U8* pSrcDst, + OMX_S32 srcdstStep, + const OMX_U8* pAlpha, + const OMX_U8* pBeta, + const OMX_U8* pThresholds, + const OMX_U8 *pBS + ) +{ + int X, Y, I, Internal=0; + + armRetArgErrIf(pSrcDst == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(armNot16ByteAligned(pSrcDst),OMX_Sts_BadArgErr); + armRetArgErrIf(srcdstStep & 15, OMX_Sts_BadArgErr); + armRetArgErrIf(pAlpha == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pBeta == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pThresholds == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(armNot4ByteAligned(pThresholds), OMX_Sts_BadArgErr); + armRetArgErrIf(pBS == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(armNot4ByteAligned(pBS), OMX_Sts_BadArgErr); + armRetArgErrIf(pBeta[0] > 18, OMX_Sts_BadArgErr); + armRetArgErrIf(pBeta[1] > 18, OMX_Sts_BadArgErr); + + + for (X=0; X<16; X+=4, Internal=1) + { + for (Y=0; Y<16; Y++) + { + I = (Y>>2)+4*(X>>2); + + armRetArgErrIf(pBS[Y] > 4, OMX_Sts_BadArgErr); + + armRetArgErrIf((pBS[Y] == 4) && (Y > 3), + OMX_Sts_BadArgErr); + + armRetArgErrIf(( (pBS[Y] == 4) && (pBS[Y^3] != 4) ), + OMX_Sts_BadArgErr); + + armRetArgErrIf(pThresholds[Y] > 25, OMX_Sts_BadArgErr); + + /* Filter vertical edge with q0 at (X,Y) */ + armVCM4P10_DeBlockPixel( + pSrcDst + Y*srcdstStep + X, + 1, + pThresholds[I], + pAlpha[Internal], + pBeta[Internal], + pBS[I], + 0); + } + } + + return OMX_Sts_NoErr; +} diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_GetVLCInfo.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_GetVLCInfo.c new file mode 100644 index 0000000000000000000000000000000000000000..81c59d6eab2b9585a069da40a050ae0a06a38029 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_GetVLCInfo.c @@ -0,0 +1,192 @@ +/** + * + * File Name: omxVCM4P10_GetVLCInfo.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * Description: + * + * This function extracts run-length encoding (RLE) information + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armVC.h" + +/** + * Function: omxVCM4P10_GetVLCInfo (6.3.5.9.1) + * + * Description: + * This function extracts run-length encoding (RLE) information from the + * coefficient matrix. The results are returned in an OMXVCM4P10VLCInfo + * structure. + * + * Input Arguments: + * + * pSrcCoeff - pointer to the transform coefficient matrix. 8-byte + * alignment required. + * pScanMatrix - pointer to the scan order definition matrix. For a luma + * block the scan matrix should follow [ISO14496-10] section 8.5.4, + * and should contain the values 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, + * 10, 7, 11, 14, 15. For a chroma block, the scan matrix should + * contain the values 0, 1, 2, 3. + * bAC - indicates presence of a DC coefficient; 0 = DC coefficient + * present, 1= DC coefficient absent. + * MaxNumCoef - specifies the number of coefficients contained in the + * transform coefficient matrix, pSrcCoeff. The value should be 16 + * for blocks of type LUMADC, LUMAAC, LUMALEVEL, and CHROMAAC. The + * value should be 4 for blocks of type CHROMADC. + * + * Output Arguments: + * + * pDstVLCInfo - pointer to structure that stores information for + * run-length coding. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: + * pSrcCoeff, pScanMatrix, pDstVLCInfo + * - pSrcCoeff is not aligned on an 8-byte boundary + * + */ +OMXResult omxVCM4P10_GetVLCInfo ( + const OMX_S16* pSrcCoeff, + const OMX_U8* pScanMatrix, + OMX_U8 bAC, + OMX_U32 MaxNumCoef, + OMXVCM4P10VLCInfo* pDstVLCInfo +) +{ + OMX_INT i, MinIndex; + OMX_S32 Value; + OMX_U32 Mask = 4, RunBefore; + OMX_S16 *pLevel; + OMX_U8 *pRun; + OMX_S16 Buf [16]; + + /* check for argument error */ + armRetArgErrIf(pSrcCoeff == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(armNot8ByteAligned(pSrcCoeff), OMX_Sts_BadArgErr) + armRetArgErrIf(pScanMatrix == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(pDstVLCInfo == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(bAC > 1, OMX_Sts_BadArgErr) + armRetArgErrIf(MaxNumCoef > 16, OMX_Sts_BadArgErr) + + /* Initialize RLE Info structure */ + pDstVLCInfo->uTrailing_Ones = 0; + pDstVLCInfo->uTrailing_One_Signs = 0; + pDstVLCInfo->uNumCoeffs = 0; + pDstVLCInfo->uTotalZeros = 0; + + for (i = 0; i < 16; i++) + { + pDstVLCInfo->iLevels [i] = 0; + pDstVLCInfo->uRuns [i] = 0; + } + + MinIndex = (bAC == 0 && MaxNumCoef == 15) ? 1 : 0; + for (i = MinIndex; i < (MaxNumCoef + MinIndex); i++) + { + /* Scan */ + Buf [i - MinIndex] = pSrcCoeff [pScanMatrix [i]]; + } + + /* skip zeros at the end */ + i = MaxNumCoef - 1; + while (!Buf [i] && i >= 0) + { + i--; + } + + if (i < 0) + { + return OMX_Sts_NoErr; + } + + /* Fill RLE Info structure */ + pLevel = pDstVLCInfo->iLevels; + pRun = pDstVLCInfo->uRuns; + RunBefore = 0; + + /* Handle first non zero separate */ + pDstVLCInfo->uNumCoeffs++; + Value = Buf [i]; + if (Value == 1 || Value == -1) + { + pDstVLCInfo->uTrailing_Ones++; + + pDstVLCInfo->uTrailing_One_Signs |= + Value == -1 ? Mask : 0; + Mask >>= 1; + } + else + { + Value -= (Value > 0 ? 1 : -1); + *pLevel++ = Value; + Mask = 0; + } + + /* Remaining non zero */ + while (--i >= 0) + { + Value = Buf [i]; + if (Value) + { + pDstVLCInfo->uNumCoeffs++; + + /* Mask becomes zero after entering */ + if (Mask && + (Value == 1 || + Value == -1)) + { + pDstVLCInfo->uTrailing_Ones++; + + pDstVLCInfo->uTrailing_One_Signs |= + Value == -1 ? Mask : 0; + Mask >>= 1; + *pRun++ = RunBefore; + RunBefore = 0; + } + else + { + /* If 3 trailing ones are not completed */ + if (Mask) + { + Mask = 0; + Value -= (Value > 0 ? 1 : -1); + } + *pLevel++ = Value; + *pRun++ = RunBefore; + RunBefore = 0; + } + } + else + { + pDstVLCInfo->uTotalZeros++; + RunBefore++; + } + } + + /* Update last run */ + if (RunBefore) + { + *pRun++ = RunBefore; + } + + return OMX_Sts_NoErr; +} + +/***************************************************************************** + * END OF FILE + *****************************************************************************/ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InterpolateChroma.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InterpolateChroma.c new file mode 100644 index 0000000000000000000000000000000000000000..8824de2f8c86768a3f10e23dd2708119e06b7404 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InterpolateChroma.c @@ -0,0 +1,99 @@ +/** + * + * File Name: omxVCM4P10_InterpolateChroma.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * Description: + * This function will calculate 1/8 Pixel interpolation for Chroma Block + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armVC.h" +#include "armCOMM.h" + + +/** + * Function: omxVCM4P10_InterpolateChroma (6.3.3.2.2) + * + * Description: + * Performs 1/8-pixel interpolation for inter chroma MB. + * + * Input Arguments: + * + * pSrc -Pointer to the source reference frame buffer + * srcStep -Reference frame step in bytes + * dstStep -Destination frame step in bytes; must be a multiple of + * roi.width. + * dx -Fractional part of horizontal motion vector component in 1/8 pixel + * unit; valid in the range [0,7] + * dy -Fractional part of vertical motion vector component in 1/8 pixel + * unit; valid in the range [0,7] + * roi -Dimension of the interpolation region; the parameters roi.width and + * roi.height must be equal to either 2, 4, or 8. + * + * Output Arguments: + * + * pDst -Pointer to the destination frame buffer if roi.width==2, 2-byte + * alignment required if roi.width==4, 4-byte alignment required + * if roi.width==8, 8-byte alignment required + * + * Return Value: + * If the function runs without error, it returns OMX_Sts_NoErr. + * If one of the following cases occurs, the function returns + * OMX_Sts_BadArgErr: + * pSrc or pDst is NULL. + * srcStep or dstStep < 8. + * dx or dy is out of range [0-7]. + * roi.width or roi.height is out of range {2,4,8}. + * roi.width is equal to 2, but pDst is not 2-byte aligned. + * roi.width is equal to 4, but pDst is not 4-byte aligned. + * roi.width is equal to 8, but pDst is not 8 byte aligned. + * srcStep or dstStep is not a multiple of 8. + * + */ + +OMXResult omxVCM4P10_InterpolateChroma ( + const OMX_U8* pSrc, + OMX_S32 srcStep, + OMX_U8* pDst, + OMX_S32 dstStep, + OMX_S32 dx, + OMX_S32 dy, + OMXSize roi + ) +{ + /* check for argument error */ + armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(srcStep < 8, OMX_Sts_BadArgErr) + armRetArgErrIf(dstStep < 8, OMX_Sts_BadArgErr) + armRetArgErrIf(dx < 0, OMX_Sts_BadArgErr) + armRetArgErrIf(dx > 7, OMX_Sts_BadArgErr) + armRetArgErrIf(dy < 0, OMX_Sts_BadArgErr) + armRetArgErrIf(dy > 7, OMX_Sts_BadArgErr) + armRetArgErrIf((roi.width != 2) && (roi.width != 4) && (roi.width != 8), OMX_Sts_BadArgErr) + armRetArgErrIf((roi.height != 2) && (roi.height != 4) && (roi.height != 8), OMX_Sts_BadArgErr) + armRetArgErrIf((roi.width == 2) && armNot2ByteAligned(pDst), OMX_Sts_BadArgErr) + armRetArgErrIf((roi.width == 4) && armNot4ByteAligned(pDst), OMX_Sts_BadArgErr) + armRetArgErrIf((roi.width == 8) && armNot8ByteAligned(pDst), OMX_Sts_BadArgErr) + armRetArgErrIf(srcStep & 7, OMX_Sts_BadArgErr) + armRetArgErrIf(dstStep & 7, OMX_Sts_BadArgErr) + + return armVCM4P10_Interpolate_Chroma + ((OMX_U8*)pSrc, srcStep, pDst, dstStep, roi.width, roi.height, dx, dy); +} + + +/***************************************************************************** + * END OF FILE + *****************************************************************************/ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InterpolateHalfHor_Luma.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InterpolateHalfHor_Luma.c new file mode 100644 index 0000000000000000000000000000000000000000..ef0befac2d71676b4f095cb1bc1512ee51f066c0 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InterpolateHalfHor_Luma.c @@ -0,0 +1,124 @@ +/** + * + * File Name: omxVCM4P10_InterpolateHalfHor_Luma.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * Description: + * This function will calculate Half horizontal luma interpolation + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armVC.h" + +/** + * Function: omxVCM4P10_InterpolateHalfHor_Luma (6.3.5.5.1) + * + * Description: + * This function performs interpolation for two horizontal 1/2-pel positions + * (-1/2,0) and (1/2, 0) - around a full-pel position. + * + * Input Arguments: + * + * pSrc - Pointer to the top-left corner of the block used to interpolate in + * the reconstruction frame plane. + * iSrcStep - Step of the source buffer. + * iDstStep - Step of the destination(interpolation) buffer; must be a + * multiple of iWidth. + * iWidth - Width of the current block; must be equal to either 4, 8, or 16 + * iHeight - Height of the current block; must be equal to 4, 8, or 16 + * + * Output Arguments: + * + * pDstLeft -Pointer to the interpolation buffer of the left -pel position + * (-1/2, 0) + * If iWidth==4, 4-byte alignment required. + * If iWidth==8, 8-byte alignment required. + * If iWidth==16, 16-byte alignment required. + * pDstRight -Pointer to the interpolation buffer of the right -pel + * position (1/2, 0) + * If iWidth==4, 4-byte alignment required. + * If iWidth==8, 8-byte alignment required. + * If iWidth==16, 16-byte alignment required. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: + * pSrc, pDstLeft, or pDstRight + * - iWidth or iHeight have values other than 4, 8, or 16 + * - iWidth==4 but pDstLeft and/or pDstRight is/are not aligned on a 4-byte boundary + * - iWidth==8 but pDstLeft and/or pDstRight is/are not aligned on a 8-byte boundary + * - iWidth==16 but pDstLeft and/or pDstRight is/are not aligned on a 16-byte boundary + * - any alignment restrictions are violated + * + */ + +OMXResult omxVCM4P10_InterpolateHalfHor_Luma( + const OMX_U8* pSrc, + OMX_U32 iSrcStep, + OMX_U8* pDstLeft, + OMX_U8* pDstRight, + OMX_U32 iDstStep, + OMX_U32 iWidth, + OMX_U32 iHeight +) +{ + OMXResult RetValue; + + /* check for argument error */ + armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(pDstLeft == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(pDstRight == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf((iWidth == 4) && + armNot4ByteAligned(pDstLeft) && + armNot4ByteAligned(pDstRight), OMX_Sts_BadArgErr) + armRetArgErrIf((iWidth == 8) && + armNot8ByteAligned(pDstLeft) && + armNot8ByteAligned(pDstRight), OMX_Sts_BadArgErr) + armRetArgErrIf((iWidth == 16) && + armNot16ByteAligned(pDstLeft) && + armNot16ByteAligned(pDstRight), OMX_Sts_BadArgErr) + + armRetArgErrIf((iHeight != 16) && (iHeight != 8)&& (iHeight != 4), OMX_Sts_BadArgErr) + armRetArgErrIf((iWidth != 16) && (iWidth != 8)&& (iWidth != 4), OMX_Sts_BadArgErr) + + RetValue = armVCM4P10_InterpolateHalfHor_Luma ( + pSrc - 1, + iSrcStep, + pDstLeft, + iDstStep, + iWidth, + iHeight); + + if (RetValue != OMX_Sts_NoErr) + { + return RetValue; + } + + RetValue = armVCM4P10_InterpolateHalfHor_Luma ( + pSrc, + iSrcStep, + pDstRight, + iDstStep, + iWidth, + iHeight); + + return RetValue; +} + +/***************************************************************************** + * END OF FILE + *****************************************************************************/ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InterpolateHalfVer_Luma.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InterpolateHalfVer_Luma.c new file mode 100644 index 0000000000000000000000000000000000000000..3560ff84cf7a4601d478a96af1ff72094ab369a1 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InterpolateHalfVer_Luma.c @@ -0,0 +1,123 @@ +/** + * + * File Name: omxVCM4P10_InterpolateHalfVer_Luma.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * Description: + * This function will calculate SAD for 4x4 blocks + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armVC.h" + + +/** + * Function: omxVCM4P10_InterpolateHalfVer_Luma (6.3.5.5.2) + * + * Description: + * This function performs interpolation for two vertical 1/2-pel positions - + * (0, -1/2) and (0, 1/2) - around a full-pel position. + * + * Input Arguments: + * + * pSrc - Pointer to top-left corner of block used to interpolate in the + * reconstructed frame plane + * iSrcStep - Step of the source buffer. + * iDstStep - Step of the destination (interpolation) buffer; must be a + * multiple of iWidth. + * iWidth - Width of the current block; must be equal to either 4, 8, or 16 + * iHeight - Height of the current block; must be equal to either 4, 8, or 16 + * + * Output Arguments: + * + * pDstUp -Pointer to the interpolation buffer of the -pel position above + * the current full-pel position (0, -1/2) + * If iWidth==4, 4-byte alignment required. + * If iWidth==8, 8-byte alignment required. + * If iWidth==16, 16-byte alignment required. + * pDstDown -Pointer to the interpolation buffer of the -pel position below + * the current full-pel position (0, 1/2) + * If iWidth==4, 4-byte alignment required. + * If iWidth==8, 8-byte alignment required. + * If iWidth==16, 16-byte alignment required. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: + * pSrc, pDstUp, or pDstDown + * - iWidth or iHeight have values other than 4, 8, or 16 + * - iWidth==4 but pDstUp and/or pDstDown is/are not aligned on a 4-byte boundary + * - iWidth==8 but pDstUp and/or pDstDown is/are not aligned on a 8-byte boundary + * - iWidth==16 but pDstUp and/or pDstDown is/are not aligned on a 16-byte boundary + * + */ + OMXResult omxVCM4P10_InterpolateHalfVer_Luma( + const OMX_U8* pSrc, + OMX_U32 iSrcStep, + OMX_U8* pDstUp, + OMX_U8* pDstDown, + OMX_U32 iDstStep, + OMX_U32 iWidth, + OMX_U32 iHeight +) +{ + OMXResult RetValue; + + /* check for argument error */ + armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(pDstUp == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(pDstDown == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf((iWidth == 4) && + armNot4ByteAligned(pDstUp) && + armNot4ByteAligned(pDstDown), OMX_Sts_BadArgErr) + armRetArgErrIf((iWidth == 8) && + armNot8ByteAligned(pDstUp) && + armNot8ByteAligned(pDstDown), OMX_Sts_BadArgErr) + armRetArgErrIf((iWidth == 16) && + armNot16ByteAligned(pDstUp) && + armNot16ByteAligned(pDstDown), OMX_Sts_BadArgErr) + + armRetArgErrIf((iHeight != 16) && (iHeight != 8)&& (iHeight != 4), OMX_Sts_BadArgErr) + armRetArgErrIf((iWidth != 16) && (iWidth != 8)&& (iWidth != 4), OMX_Sts_BadArgErr) + + RetValue = armVCM4P10_InterpolateHalfVer_Luma( + pSrc - iSrcStep, + iSrcStep, + pDstUp, + iDstStep, + iWidth, + iHeight); + + if (RetValue != OMX_Sts_NoErr) + { + return RetValue; + } + + RetValue = armVCM4P10_InterpolateHalfVer_Luma( + pSrc, + iSrcStep, + pDstDown, + iDstStep, + iWidth, + iHeight); + + return RetValue; +} + +/***************************************************************************** + * END OF FILE + *****************************************************************************/ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InterpolateLuma.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InterpolateLuma.c new file mode 100644 index 0000000000000000000000000000000000000000..d233735014a35a4c57b4a7db2e40de91767a97c5 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InterpolateLuma.c @@ -0,0 +1,99 @@ +/** + * + * File Name: omxVCM4P10_InterpolateLuma.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * Description: + * This function will calculate Performs quarter-pixel interpolation + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armVC.h" +#include "armCOMM.h" + +/** + * Function: omxVCM4P10_InterpolateLuma (6.3.3.2.1) + * + * Description: + * Performs quarter-pixel interpolation for inter luma MB. It is assumed that + * the frame is already padded when calling this function. + * + * Input Arguments: + * + * pSrc -Pointer to the source reference frame buffer + * srcStep -reference frame step, in bytes; must be a multiple of roi.width + * dstStep -destination frame step, in bytes; must be a multiple of + * roi.width + * dx -Fractional part of horizontal motion vector component in 1/4 pixel + * unit; valid in the range [0,3] + * dy -Fractional part of vertical motion vector y component in 1/4 pixel + * unit; valid in the range [0,3] + * roi -Dimension of the interpolation region; the parameters roi.width and + * roi.height must be equal to either 4, 8, or 16. + * + * Output Arguments: + * + * pDst -Pointer to the destination frame buffer if roi.width==4, 4-byte + * alignment required if roi.width==8, 8-byte alignment required + * if roi.width==16, 16-byte alignment required + * + * Return Value: + * If the function runs without error, it returns OMX_Sts_NoErr. + * If one of the following cases occurs, the function returns + * OMX_Sts_BadArgErr: + * pSrc or pDst is NULL. + * srcStep or dstStep < roi.width. + * dx or dy is out of range [0,3]. + * roi.width or roi.height is out of range {4, 8, 16}. + * roi.width is equal to 4, but pDst is not 4 byte aligned. + * roi.width is equal to 8 or 16, but pDst is not 8 byte aligned. + * srcStep or dstStep is not a multiple of 8. + * + */ + +OMXResult omxVCM4P10_InterpolateLuma ( + const OMX_U8* pSrc, + OMX_S32 srcStep, + OMX_U8* pDst, + OMX_S32 dstStep, + OMX_S32 dx, + OMX_S32 dy, + OMXSize roi + ) +{ + /* check for argument error */ + armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(srcStep < roi.width, OMX_Sts_BadArgErr) + armRetArgErrIf(dstStep < roi.width, OMX_Sts_BadArgErr) + armRetArgErrIf(dx < 0, OMX_Sts_BadArgErr) + armRetArgErrIf(dx > 3, OMX_Sts_BadArgErr) + armRetArgErrIf(dy < 0, OMX_Sts_BadArgErr) + armRetArgErrIf(dy > 3, OMX_Sts_BadArgErr) + armRetArgErrIf((roi.width != 4) && (roi.width != 8) && (roi.width != 16), OMX_Sts_BadArgErr) + armRetArgErrIf((roi.height != 4) && (roi.height != 8) && (roi.height != 16), OMX_Sts_BadArgErr) + armRetArgErrIf((roi.width == 4) && armNot4ByteAligned(pDst), OMX_Sts_BadArgErr) + armRetArgErrIf((roi.width == 8) && armNot8ByteAligned(pDst), OMX_Sts_BadArgErr) + armRetArgErrIf((roi.width == 16) && armNot16ByteAligned(pDst), OMX_Sts_BadArgErr) + armRetArgErrIf(srcStep & 7, OMX_Sts_BadArgErr) + armRetArgErrIf(dstStep & 7, OMX_Sts_BadArgErr) + + return armVCM4P10_Interpolate_Luma + (pSrc, srcStep, pDst, dstStep, roi.width, roi.height, dx, dy); + +} + + +/***************************************************************************** + * END OF FILE + *****************************************************************************/ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InvTransformDequant_ChromaDC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InvTransformDequant_ChromaDC.c new file mode 100644 index 0000000000000000000000000000000000000000..92ba0319b25aa802bd256d0dafbc23acb570e8a6 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InvTransformDequant_ChromaDC.c @@ -0,0 +1,102 @@ +/** + * + * File Name: omxVCM4P10_InvTransformDequant_ChromaDC.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * Description: + * This function will calculate 4x4 hadamard transform of chroma DC + * coefficients and quantization + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armVC.h" +#include "armCOMM.h" + +/** + * Function: omxVCM4P10_InvTransformDequant_ChromaDC (6.3.5.6.4) + * + * Description: + * This function performs inverse 2x2 Hadamard transform and then dequantizes + * the coefficients. + * + * Input Arguments: + * + * pSrc - Pointer to the 2x2 array of the 2x2 Hadamard-transformed and + * quantized coefficients. 8 byte alignment required. + * iQP - Quantization parameter; must be in the range [0,51]. + * + * Output Arguments: + * + * pDst - Pointer to inverse-transformed and dequantized coefficients. + * 8-byte alignment required. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: pSrc + * - pSrc or pDst is not aligned on an 8-byte boundary + * + */ +OMXResult omxVCM4P10_InvTransformDequant_ChromaDC( + const OMX_S16* pSrc, + OMX_S16* pDst, + OMX_U32 iQP +) +{ + OMX_INT i, j; + OMX_S32 m[2][2]; + OMX_S32 QPer, V00, Value; + + /* check for argument error */ + armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(armNot8ByteAligned(pSrc), OMX_Sts_BadArgErr); + armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(armNot8ByteAligned(pDst), OMX_Sts_BadArgErr); + armRetArgErrIf(iQP > 51, OMX_Sts_BadArgErr) + + /* Inv Hadamard Transform for 2x2 block */ + m[0][0] = pSrc[0] + pSrc[1] + pSrc[2] + pSrc[3]; + m[0][1] = pSrc[0] - pSrc[1] + pSrc[2] - pSrc[3]; + m[1][0] = pSrc[0] + pSrc[1] - pSrc[2] - pSrc[3]; + m[1][1] = pSrc[0] - pSrc[1] - pSrc[2] + pSrc[3]; + + /* Quantization */ + /* Scaling */ + QPer = iQP / 6; + V00 = armVCM4P10_VMatrix [iQP % 6][0]; + + for (j = 0; j < 2; j++) + { + for (i = 0; i < 2; i++) + { + if (QPer < 1) + { + Value = (m[j][i] * V00) >> 1; + } + else + { + Value = (m[j][i] * V00) << (QPer - 1); + } + + pDst[j * 2 + i] = (OMX_S16) Value; + } + } + + return OMX_Sts_NoErr; +} + +/***************************************************************************** + * END OF FILE + *****************************************************************************/ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InvTransformDequant_LumaDC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InvTransformDequant_LumaDC.c new file mode 100644 index 0000000000000000000000000000000000000000..a3b1200fbc371c47d82f8646d227f4aa3b2a069f --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InvTransformDequant_LumaDC.c @@ -0,0 +1,128 @@ +/** + * + * File Name: omxVCM4P10_InvTransformDequant_LumaDC.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * Description: + * This function will calculate 4x4 hadamard transform of luma DC coefficients + * and quantization + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armVC.h" + +/** + * Function: omxVCM4P10_InvTransformDequant_LumaDC (6.3.5.6.3) + * + * Description: + * This function performs inverse 4x4 Hadamard transform and then dequantizes + * the coefficients. + * + * Input Arguments: + * + * pSrc - Pointer to the 4x4 array of the 4x4 Hadamard-transformed and + * quantized coefficients. 16 byte alignment required. + * iQP - Quantization parameter; must be in the range [0,51]. + * + * Output Arguments: + * + * pDst - Pointer to inverse-transformed and dequantized coefficients. + * 16-byte alignment required. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: pSrc + * - pSrc or pDst is not aligned on a 16-byte boundary + * + */ +OMXResult omxVCM4P10_InvTransformDequant_LumaDC( + const OMX_S16* pSrc, + OMX_S16* pDst, + OMX_U32 iQP +) +{ + OMX_INT i, j; + OMX_S32 m1[4][4], m2[4][4], Value; + OMX_S32 QPer, V; + + /* check for argument error */ + armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(iQP > 51, OMX_Sts_BadArgErr) + armRetArgErrIf(armNot16ByteAligned(pSrc), OMX_Sts_BadArgErr) + armRetArgErrIf(armNot16ByteAligned(pDst), OMX_Sts_BadArgErr) + + /* Inv Hadamard Transform for DC Luma 4x4 block */ + /* Horizontal */ + for (i = 0; i < 4; i++) + { + j = i * 4; + + m1[i][0] = pSrc[j + 0] + pSrc[j + 2]; /* a+c */ + m1[i][1] = pSrc[j + 1] + pSrc[j + 3]; /* b+d */ + m1[i][2] = pSrc[j + 0] - pSrc[j + 2]; /* a-c */ + m1[i][3] = pSrc[j + 1] - pSrc[j + 3]; /* b-d */ + + m2[i][0] = m1[i][0] + m1[i][1]; /* a+b+c+d */ + m2[i][1] = m1[i][2] + m1[i][3]; /* a+b-c-d */ + m2[i][2] = m1[i][2] - m1[i][3]; /* a-b-c+d */ + m2[i][3] = m1[i][0] - m1[i][1]; /* a-b+c-d */ + + } + + /* Vertical */ + for (i = 0; i < 4; i++) + { + m1[0][i] = m2[0][i] + m2[2][i]; + m1[1][i] = m2[1][i] + m2[3][i]; + m1[2][i] = m2[0][i] - m2[2][i]; + m1[3][i] = m2[1][i] - m2[3][i]; + + m2[0][i] = m1[0][i] + m1[1][i]; + m2[1][i] = m1[2][i] + m1[3][i]; + m2[2][i] = m1[2][i] - m1[3][i]; + m2[3][i] = m1[0][i] - m1[1][i]; + } + + + /* Scaling */ + QPer = iQP / 6; + V = armVCM4P10_VMatrix [iQP % 6][0]; + + for (j = 0; j < 4; j++) + { + for (i = 0; i < 4; i++) + { + if (QPer < 2) + { + Value = (m2[j][i] * V + (1 << (1 - QPer))) >> (2 - QPer); + } + else + { + Value = m2[j][i] * V * (1 << (QPer - 2)); + } + + pDst[j * 4 + i] = (OMX_S16) Value; + + } + } + return OMX_Sts_NoErr; +} + +/***************************************************************************** + * END OF FILE + *****************************************************************************/ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InvTransformResidualAndAdd.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InvTransformResidualAndAdd.c new file mode 100644 index 0000000000000000000000000000000000000000..3303997b953ba2f05d8337d5eefe1cd8eb9acbcc --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InvTransformResidualAndAdd.c @@ -0,0 +1,124 @@ +/** + * + * File Name: omxVCM4P10_InvTransformResidualAndAdd.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * Description: + * This function will inverse integer 4x4 transform + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armVC.h" + +/** + * Function: omxVCM4P10_InvTransformResidualAndAdd (6.3.5.7.1) + * + * Description: + * This function performs inverse an 4x4 integer transformation to produce + * the difference signal and then adds the difference to the prediction to get + * the reconstructed signal. + * + * Input Arguments: + * + * pSrcPred - Pointer to prediction signal. 4-byte alignment required. + * pDequantCoeff - Pointer to the transformed coefficients. 8-byte + * alignment required. + * iSrcPredStep - Step of the prediction buffer; must be a multiple of 4. + * iDstReconStep - Step of the destination reconstruction buffer; must be a + * multiple of 4. + * bAC - Indicate whether there is AC coefficients in the coefficients + * matrix. + * + * Output Arguments: + * + * pDstRecon -Pointer to the destination reconstruction buffer. 4-byte + * alignment required. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: + * pSrcPred, pDequantCoeff, pDstRecon + * - pSrcPred is not aligned on a 4-byte boundary + * - iSrcPredStep or iDstReconStep is not a multiple of 4. + * - pDequantCoeff is not aligned on an 8-byte boundary + * + */ +OMXResult omxVCM4P10_InvTransformResidualAndAdd( + const OMX_U8* pSrcPred, + const OMX_S16* pDequantCoeff, + OMX_U8* pDstRecon, + OMX_U32 iSrcPredStep, + OMX_U32 iDstReconStep, + OMX_U8 bAC +) +{ + OMX_INT i, j; + OMX_S16 In[16], Out[16]; + OMX_S32 Value; + + /* check for argument error */ + armRetArgErrIf(pSrcPred == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(armNot4ByteAligned(pSrcPred), OMX_Sts_BadArgErr) + armRetArgErrIf(pDequantCoeff == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(armNot8ByteAligned(pDequantCoeff), OMX_Sts_BadArgErr) + armRetArgErrIf(pDstRecon == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(armNot4ByteAligned(pDstRecon), OMX_Sts_BadArgErr) + armRetArgErrIf(bAC > 1, OMX_Sts_BadArgErr) + armRetArgErrIf(iSrcPredStep == 0 || iSrcPredStep & 3, OMX_Sts_BadArgErr) + armRetArgErrIf(iDstReconStep == 0 || iDstReconStep & 3, OMX_Sts_BadArgErr) + + if (bAC) + { + for (i = 0; i < 16; i++) + { + In[i] = pDequantCoeff [i]; + } + } + else + { + /* Copy DC */ + In[0] = pDequantCoeff [0]; + + for (i = 1; i < 16; i++) + { + In[i] = 0; + } + } + + /* Residual Transform */ + armVCM4P10_TransformResidual4x4 (Out, In); + + for (j = 0; j < 4; j++) + { + for (i = 0; i < 4; i++) + { + /* Add predition */ + Value = (OMX_S32) Out [j * 4 + i] + pSrcPred [j * iSrcPredStep + i]; + + /* Saturate Value to OMX_U8 */ + Value = armClip (0, 255, Value); + + pDstRecon[j * iDstReconStep + i] = (OMX_U8) Value; + } + } + + return OMX_Sts_NoErr; +} + +/***************************************************************************** + * END OF FILE + *****************************************************************************/ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_MEGetBufSize.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_MEGetBufSize.c new file mode 100644 index 0000000000000000000000000000000000000000..8c3a5c3126a1287f0e4cb9dbe8ec3b60ac831e56 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_MEGetBufSize.c @@ -0,0 +1,70 @@ +/** + * + * File Name: omxVCM4P10_MEGetBufSize.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Initialization modules for the vendor specific Motion Estimation structure. + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armVC.h" +#include "armCOMM.h" + +/** + * Function: omxVCM4P10_MEGetBufSize (6.3.5.1.1) + * + * Description: + * Computes the size, in bytes, of the vendor-specific specification + * structure for the omxVCM4P10 motion estimation functions BlockMatch_Integer + * and MotionEstimationMB. + * + * Input Arguments: + * + * MEmode - motion estimation mode; available modes are defined by the + * enumerated type OMXVCM4P10MEMode + * pMEParams -motion estimation parameters + * + * Output Arguments: + * + * pSize - pointer to the number of bytes required for the motion + * estimation specification structure + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - pMEParams or pSize is NULL. + * - an invalid MEMode is specified. + * + */ + +OMXResult omxVCM4P10_MEGetBufSize( + OMXVCM4P10MEMode MEMode, + const OMXVCM4P10MEParams *pMEParams, + OMX_U32 *pSize + ) +{ + armRetArgErrIf(!pMEParams, OMX_Sts_BadArgErr); + armRetArgErrIf(!pSize, OMX_Sts_BadArgErr); + armRetArgErrIf((MEMode != OMX_VC_M4P10_FAST_SEARCH) && + (MEMode != OMX_VC_M4P10_FULL_SEARCH), OMX_Sts_BadArgErr); + armRetArgErrIf((pMEParams->searchRange16x16 <= 0) || + (pMEParams->searchRange8x8 <= 0) || + (pMEParams->searchRange4x4 <= 0), OMX_Sts_BadArgErr); + + *pSize = (OMX_INT) sizeof(ARMVCM4P10_MESpec); + + return OMX_Sts_NoErr; +} + +/* End of file */ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_MEInit.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_MEInit.c new file mode 100644 index 0000000000000000000000000000000000000000..58ecc886adbb88770a7f9090474b74a3f3bd3357 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_MEInit.c @@ -0,0 +1,92 @@ +/** + * + * File Name: omxVCM4P10_MEInit.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Initialization modules for the vendor specific Motion Estimation structure. + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armVC.h" +#include "armCOMM.h" + +/** + * Function: omxVCM4P10_MEInit (6.3.5.1.2) + * + * Description: + * Initializes the vendor-specific specification structure required for the + * omxVCM4P10 motion estimation functions: BlockMatch_Integer and + * MotionEstimationMB. Memory for the specification structure *pMESpec must be + * allocated prior to calling the function, and should be aligned on a 4-byte + * boundary. The number of bytes required for the specification structure can + * be determined using the function omxVCM4P10_MEGetBufSize. Following + * initialization by this function, the vendor-specific structure *pMESpec + * should contain an implementation-specific representation of all motion + * estimation parameters received via the structure pMEParams, for example + * searchRange16x16, searchRange8x8, etc. + * + * Input Arguments: + * + * MEmode - motion estimation mode; available modes are defined by the + * enumerated type OMXVCM4P10MEMode + * pMEParams - motion estimation parameters + * pMESpec - pointer to the uninitialized ME specification structure + * + * Output Arguments: + * + * pMESpec - pointer to the initialized ME specification structure + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - pMEParams or pSize is NULL. + * - an invalid value was specified for the parameter MEmode + * - a negative or zero value was specified for one of the search ranges + * (e.g., pMBParams >searchRange8x8, pMEParams->searchRange16x16, etc.) + * - either in isolation or in combination, one or more of the enables or + * search ranges in the structure *pMEParams were configured such + * that the requested behavior fails to comply with [ISO14496-10]. + * + */ + +OMXResult omxVCM4P10_MEInit( + OMXVCM4P10MEMode MEMode, + const OMXVCM4P10MEParams *pMEParams, + void *pMESpec + ) +{ + ARMVCM4P10_MESpec *armMESpec = (ARMVCM4P10_MESpec *) pMESpec; + + armRetArgErrIf(!pMEParams, OMX_Sts_BadArgErr); + armRetArgErrIf(!pMESpec, OMX_Sts_BadArgErr); + armRetArgErrIf((MEMode != OMX_VC_M4P10_FAST_SEARCH) && + (MEMode != OMX_VC_M4P10_FULL_SEARCH), OMX_Sts_BadArgErr); + armRetArgErrIf((pMEParams->searchRange16x16 <= 0) || + (pMEParams->searchRange8x8 <= 0) || + (pMEParams->searchRange4x4 <= 0), OMX_Sts_BadArgErr); + + armMESpec->MEParams.blockSplitEnable8x8 = pMEParams->blockSplitEnable8x8; + armMESpec->MEParams.blockSplitEnable4x4 = pMEParams->blockSplitEnable4x4; + armMESpec->MEParams.halfSearchEnable = pMEParams->halfSearchEnable; + armMESpec->MEParams.quarterSearchEnable = pMEParams->quarterSearchEnable; + armMESpec->MEParams.intraEnable4x4 = pMEParams->intraEnable4x4; + armMESpec->MEParams.searchRange16x16 = pMEParams->searchRange16x16; + armMESpec->MEParams.searchRange8x8 = pMEParams->searchRange8x8; + armMESpec->MEParams.searchRange4x4 = pMEParams->searchRange4x4; + armMESpec->MEMode = MEMode; + + return OMX_Sts_NoErr; +} + +/* End of file */ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_MotionEstimationMB.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_MotionEstimationMB.c new file mode 100644 index 0000000000000000000000000000000000000000..33dbf3f886782e4005347cc161cfe1399c0a4d07 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_MotionEstimationMB.c @@ -0,0 +1,1892 @@ +/** x + * + * File Name: omxVCM4P10_MotionEstimationMB.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * Description: + * This function perform MB level motion estimation + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armVC.h" + +#define ARM_VCM4P10_MAX_FRAMES (15) +#define ARM_VCM4P10_MAX_4x4_SAD (0xffff) +#define ARM_VCM4P10_MAX_MODE_VALUE (0xffffffff) +#define ARM_VCM4P10_MAX_MODES (16) +#define ARM_VCM4P10_MB_BLOCK_SIZE (16) +#define ARM_VCM4P10_MEDIAN(a,b,c) (a>b?a>c?b>c?b:c:a:b>c?a>c?a:c:b) +#define ARM_VCM4P10_SHIFT_QP (12) + +#define ARM_VCM4P10_MVPRED_MEDIAN (0) +#define ARM_VCM4P10_MVPRED_L (1) +#define ARM_VCM4P10_MVPRED_U (2) +#define ARM_VCM4P10_MVPRED_UR (3) + +#define ARM_VCM4P10_MB_BLOCK_SIZE (16) +#define ARM_VCM4P10_BLOCK_SIZE (4) +#define ARM_VCM4P10_MAX_COST (1 << 30) +#define ARM_VCM4P10_INVALID_BLOCK (-2) + + +/** + * Function: armVCM4P10_CalculateBlockSAD + * + * Description: + * Calculate SAD value for the selected MB encoding mode and update + * pDstBlockSAD parameter. These SAD values are calculated 4x4 blocks at + * a time and in the scan order. + * + * Remarks: + * + * Parameters: + * [in] pSrcMBInfo - + * [in] pSrcCurrBuf - + * [in] SrcCurrStep - + * [in] pSrcRefBufList- + * [in] SrcRefStep - + * [in] pSrcRecBuf - + * [in] SrcRecStep - + * [in] pRefRect - + * [in] pCurrPointPos - + * [in] Lambda - + * [in] pMESpec - + * [in] pMBInter - + * [in] pMBIntra - + * [out] pDstBlockSAD - pointer to 16 element array for SAD corresponding to 4x4 blocks + * Return Value: + * None + * + */ + +static OMXResult armVCM4P10_CalculateBlockSAD( + OMXVCM4P10MBInfo *pSrcMBInfo, + const OMX_U8 *pSrcCurrBuf, + OMX_S32 SrcCurrStep, + const OMX_U8 *pSrcRefBufList[ARM_VCM4P10_MAX_FRAMES], + OMX_S32 SrcRefStep, + const OMX_U8 *pSrcRecBuf, + OMX_S32 SrcRecStep, + const OMXRect *pRefRect, + const OMXVCM4P2Coordinate *pCurrPointPos, + const OMXVCM4P10MBInfoPtr *pMBInter, + const OMXVCM4P10MBInfoPtr *pMBIntra, + OMX_U16 *pDstBlockSAD) +{ + OMX_INT InvalidSAD = 0; + OMX_INT i; + + OMX_U8 Buffer [16*16 + 15]; + OMX_U8 *pTempDstBuf; + OMX_S32 TempDstStep; + OMX_U8 *pTempRefBuf; + OMX_S32 TempRefStep; + + /* Temporary buffer to store the predicted mb coefficients */ + pTempDstBuf = armAlignTo16Bytes(Buffer); + TempDstStep = 16; + + /* Update pDstBlockSAD if MB is a valid type */ + if (pSrcMBInfo) + { + OMX_U32 Width=0, Height=0, MaxXPart, MaxYPart,MaxSubXPart,MaxSubYPart; + + /* Depending on type of MB, do prediction and fill temp buffer */ + switch (pSrcMBInfo->mbType) + { + case OMX_VC_P_16x16: + Width = 16; + Height = 16; + break; + case OMX_VC_P_16x8: + Width = 16; + Height = 8; + break; + case OMX_VC_P_8x16: + Width = 8; + Height = 16; + break; + case OMX_VC_P_8x8: + Width = 8; + Height = 8; + break; + case OMX_VC_INTRA_4x4: + { + /* Create predicted MB Intra4x4 mode */ + OMX_S32 PredIntra4x4Mode [5][9]; + OMX_S32 x, y, Block8x8, Block4x4, BlockX, BlockY; + OMX_U8 pSrcYBuff [(16*3)*(16*2)]; + OMX_U8 *pSrcY; + OMX_S32 StepSrcY; + OMX_S32 availability; + + for (y = 0; y < 5; y++) + { + for (x = 0; x < 9; x++) + { + /* + * Initialize with value of ARM_VCM4P10_INVALID_BLOCK, to mean this + * 4x4 block is not available + */ + PredIntra4x4Mode [y][x] = ARM_VCM4P10_INVALID_BLOCK; + } + } + + /* Replace ARM_VCM4P10_INVALID_BLOCK value with available MBs values*/ + for (x = 0; x < 4; x++) + { + /* Store values of b0, b1, b2, b3 */ + if (pMBIntra[1] != NULL) + { + PredIntra4x4Mode [0][x + 1] = + pMBIntra[1]->pIntra4x4PredMode[3*4 + x]; + } + + /* Store values of d0, d1, d2, d3 */ + if (pMBIntra[3] != NULL) + { + PredIntra4x4Mode [0][x + 5] = + pMBIntra[3]->pIntra4x4PredMode[3*4 + x]; + } + } + + /* Store values of c3 */ + if (pMBIntra[2] != NULL) + { + PredIntra4x4Mode [0][0] = pMBIntra[2]->pIntra4x4PredMode[15]; + } + + for (y = 0; y < 4; y++) + { + /* Store values of a0, a1, a2, a3 */ + if (pMBIntra[0] != NULL) + { + PredIntra4x4Mode [y + 1][0] = + pMBIntra[0]->pIntra4x4PredMode[y*4 + 3]; + } + } + + /* + * Update neighbouring Pred mode array which will be used for + * prediction of Intra4x4 modes. + */ + + pSrcY = pSrcYBuff; + StepSrcY = 16 * 3; + for (y = 0; y < (16 * 2); y++) + { + for (x = 0; x < (16 * 3); x++) + { + pSrcY [StepSrcY * y + x] = + pSrcRecBuf [SrcRecStep * (y - 16) + x - 16]; + } + } + + + /* for each 8x8 block */ + for (Block8x8 = 0; Block8x8 < 4; Block8x8++) + { + /* for each 4x4 block inside 8x8 block */ + for (Block4x4 = 0; Block4x4 < 4; Block4x4++) + { + /* Get block cordinates from 8x8 block index and 4x4 block index */ + BlockX = ((Block8x8 & 1) << 1) + (Block4x4 & 1); + BlockY = ((Block8x8 >> 1) << 1) + (Block4x4 >> 1); + + /* Add offset to point to start of current MB in the array pIntra4x4PredMode */ + x = BlockX + 1; + y = BlockY + 1; + + availability = 0; + + /* Check for availability of LEFT Block */ + if (PredIntra4x4Mode [y][x - 1] != ARM_VCM4P10_INVALID_BLOCK) + { + availability |= OMX_VC_LEFT; + } + + /* Check for availability of UPPER Block */ + if (PredIntra4x4Mode [y - 1][x] != ARM_VCM4P10_INVALID_BLOCK) + { + availability |= OMX_VC_UPPER; + } + + /* Check for availability of UPPER LEFT Block */ + if (PredIntra4x4Mode [y - 1][x - 1] != ARM_VCM4P10_INVALID_BLOCK) + { + availability |= OMX_VC_UPPER_LEFT; + } + + PredIntra4x4Mode [y][x] = pSrcMBInfo->pIntra4x4PredMode[BlockY*4+BlockX]; + x = BlockX * 4; + y = BlockY * 4; + + pSrcY = pSrcYBuff + 16 * StepSrcY + 16 + y * StepSrcY + x; + + omxVCM4P10_PredictIntra_4x4( + pSrcY - 1, + pSrcY - StepSrcY, + pSrcY - StepSrcY - 1, + pTempDstBuf + x + y * TempDstStep, + StepSrcY, + TempDstStep, + pSrcMBInfo->pIntra4x4PredMode[BlockY*4+BlockX], + availability); + + for (BlockY=0;BlockY<4;BlockY++) + { + for(BlockX=0;BlockX<4;BlockX++) + { + pSrcY [BlockY * StepSrcY + BlockX] = + (OMX_U8)(*(pTempDstBuf + x + y * TempDstStep + BlockY * TempDstStep + BlockX)); + } + } + + } + } + break; + } + case OMX_VC_INTRA_16x16: + { + OMX_U32 MBPosX = pCurrPointPos->x >> 4; + OMX_U32 MBPosY = pCurrPointPos->y >> 4; + OMX_U32 availability = 0; + + /* Check for availability of LEFT MB */ + if ((MBPosX != 0) && (pMBIntra [0] != 0 || pMBInter [0] != 0)) + { + availability |= OMX_VC_LEFT; + } + + /* Check for availability of UP MB */ + if ((MBPosY != 0) && (pMBIntra [1] != 0 || pMBInter [1] != 0)) + { + availability |= OMX_VC_UPPER; + } + + /* Check for availability of UP-LEFT MB */ + if ((MBPosX > 0) && (MBPosY > 0) && + (pMBIntra [2] != 0 || pMBInter [2] != 0)) + { + availability |= OMX_VC_UPPER_LEFT; + } + + omxVCM4P10_PredictIntra_16x16( + pSrcRecBuf - 1, + pSrcRecBuf - SrcRecStep, + pSrcRecBuf - SrcRecStep - 1, + pTempDstBuf, + SrcRecStep, + TempDstStep, + pSrcMBInfo->Intra16x16PredMode, + availability); + + break; + } + + case OMX_VC_INTER_SKIP: + case OMX_VC_PREF0_8x8: + case OMX_VC_INTRA_PCM: + default: + /* These cases will update pDstBlockSAD with MAX value */ + InvalidSAD = 1; + break; + } + + /* INTER MB */ + if ((pSrcMBInfo->mbType == OMX_VC_P_16x16) || + (pSrcMBInfo->mbType == OMX_VC_P_8x16) || + (pSrcMBInfo->mbType == OMX_VC_P_16x8) || + (pSrcMBInfo->mbType == OMX_VC_P_8x8)) + { + const OMX_U8 *pTempSrcBuf; + OMX_S32 TempSrcStep; + OMX_S32 mvx,mvy; + OMX_U32 PartX, PartY, SubPartX, SubPartY; + + TempSrcStep = SrcRefStep; + + MaxXPart = 16/Width; + MaxYPart = 16/Height; + + + for (PartY = 0; PartY < MaxYPart; PartY++) + { + for (PartX = 0; PartX < MaxXPart; PartX++) + { + + pTempSrcBuf = pSrcRefBufList[pSrcMBInfo->pRefL0Idx[PartY * 2 + PartX]]; + + if (MaxXPart == 2 && MaxYPart == 2) + { + switch (pSrcMBInfo->subMBType[PartY*2+PartX]) + { + case OMX_VC_SUB_P_8x8: + Width = 8; + Height = 8; + break; + case OMX_VC_SUB_P_8x4: + Width = 8; + Height = 4; + break; + case OMX_VC_SUB_P_4x8: + Width = 4; + Height = 8; + break; + case OMX_VC_SUB_P_4x4: + Width = 4; + Height = 4; + break; + default: + /* Default */ + Width = 4; + Height = 4; + break; + } + + MaxSubXPart = 8/Width; + MaxSubYPart = 8/Height; + + for (SubPartY = 0; SubPartY < MaxSubYPart; SubPartY++) + { + for (SubPartX = 0; SubPartX < MaxSubXPart; SubPartX++) + { + mvx = pSrcMBInfo->pMV0 [2*PartY + SubPartY][2*PartX + SubPartX].dx; + mvy = pSrcMBInfo->pMV0 [2*PartY + SubPartY][2*PartX + SubPartX].dy; + armVCM4P10_Interpolate_Luma( + pTempSrcBuf + (8*PartX + 4*SubPartX + (mvx/4)) + (8*PartY + 4*SubPartY + (mvy/4)) * TempSrcStep, + TempSrcStep, + pTempDstBuf + (8*PartX + 4*SubPartX) + (8*PartY + 4*SubPartY) * TempDstStep, + TempDstStep, + Width, + Height, + mvx & 3, + mvy & 3 + ); + } + } + } + else + { + + mvx = pSrcMBInfo->pMV0 [2*PartY][2*PartX].dx; + mvy = pSrcMBInfo->pMV0 [2*PartY][2*PartX].dy; + armVCM4P10_Interpolate_Luma( + pTempSrcBuf + (8*PartX + (mvx/4)) + (8*PartY + (mvy/4)) * TempSrcStep, + TempSrcStep, + pTempDstBuf + (8*PartX) + (8*PartY) * TempDstStep, + TempDstStep, + Width, + Height, + mvx & 3, + mvy & 3 + ); + + } + } + } + } + } + else + { + InvalidSAD = 1; + } + + /* Calculate SAD from predicted buffer */ + if (!InvalidSAD) + { + OMX_U32 x8x8, y8x8, x4x4, y4x4, Block8x8, Block4x4; + OMX_S32 SAD; + + pTempRefBuf = pTempDstBuf; + TempRefStep = 16; + + /* SAD for each 4x4 block in scan order */ + for (Block8x8 = 0; Block8x8 < 4; Block8x8++) + { + x8x8 = 8*(Block8x8 & 1); + y8x8 = 8*(Block8x8 >> 1); + for (Block4x4 = 0; Block4x4 < 4; Block4x4++) + { + x4x4 = 4*(Block4x4 & 1); + y4x4 = 4*(Block4x4 >> 1); + + armVCCOMM_SAD( + pSrcCurrBuf + (x8x8 + x4x4) + (y8x8 + y4x4) * SrcCurrStep, + SrcCurrStep, + pTempRefBuf + (x8x8 + x4x4) + (y8x8 + y4x4) * TempRefStep, + TempRefStep, + &SAD, + 4, /* Height */ + 4); /* Width */ + *(pDstBlockSAD + 4 * Block8x8 + Block4x4) = (SAD < 0x7fff) ? (OMX_U16) SAD : ARM_VCM4P10_MAX_MODE_VALUE; + } + } + } + else + { + /* Fill SADs with max values and return*/ + for (i = 0; i < 16; i++) + { + pDstBlockSAD [i] = ARM_VCM4P10_MAX_4x4_SAD; + } + } + return OMX_Sts_NoErr; +} + + + +/** + * Function: armVCM4P10_Mode4x4Decision + * + * Description: + * Intra 4x4 Mode decision by calculating cost for all possible modes and + * choosing the best mode + * + * Remarks: + * + * Parameters: + * [in] pSrcCurrBuf - Pointer to the start of current Macroblock + * [in] SrcCurrStep - Step size of the pointer pSrcCurrBuf + * [in/out] pSrcDstMBCurr - Pointer to the OMXVCM4P10MBInfo which will be updated for + * field pIntra4x4PredMode of the current block. + * [in] Block8x8 - Index 8x8 block in which current 4x4 block belongs + * [in] Block4x4 - Index of current 4x4 block + * [in/out] pPredIntra4x4SrcY - Pointer to current block location in buffer + * with reconstructed values. This will be modified by this + * function with best mode predicted values + * [in] StepPredIntra4x4SrcY - Step size of the pointer pPredIntra4x4SrcY + * [in] pIntra4x4PredMode - Array of Intra 4x4 prediction mode for the MB. + * Current MB modes starts at [1,1]. + * [in] pBestCost - Cost for the Best Intra 4x4 mode + * Return Value: + * None + * + */ +static OMXVoid armVCM4P10_Mode4x4Decision ( + const OMX_U8* pSrcCurrBuf, + OMX_S32 SrcCurrStep, + OMXVCM4P10MBInfo *pSrcDstMBCurr, + OMX_S32 Block8x8, + OMX_S32 Block4x4, + OMX_U8 *pPredIntra4x4SrcY, + OMX_S32 StepPredIntra4x4SrcY, + OMX_S32 pIntra4x4PredMode [][9], + OMX_S32 *pBestCost +) +{ + OMX_S32 i, j, x, y, BlockX, BlockY, mode; + OMX_S32 Cost, BestCost; + OMX_U8 *pSrcY; + OMX_S32 StepSrcY; + OMX_S32 availability = 0; + OMX_U8 pPredBlock [4*4]; + OMXResult Ret = OMX_Sts_Err; + + /* Get block cordinates from 8x8 block index and 4x4 block index */ + BlockX = ((Block8x8 & 1) << 1) + (Block4x4 & 1); + BlockY = ((Block8x8 >> 1) << 1) + (Block4x4 >> 1); + + /* Add offset to point to start of current MB in the array pIntra4x4PredMode */ + x = BlockX + 1; + y = BlockY + 1; + + /* Check for availability of LEFT Block */ + if (pIntra4x4PredMode [y][x - 1] != ARM_VCM4P10_INVALID_BLOCK) + { + availability |= OMX_VC_LEFT; + } + + /* Check for availability of UPPER Block */ + if (pIntra4x4PredMode [y - 1][x] != ARM_VCM4P10_INVALID_BLOCK) + { + availability |= OMX_VC_UPPER; + } + + /* Check for availability of UPPER LEFT Block */ + if (pIntra4x4PredMode [y - 1][x - 1] != ARM_VCM4P10_INVALID_BLOCK) + { + availability |= OMX_VC_UPPER_LEFT; + } + + pSrcY = pPredIntra4x4SrcY + + StepPredIntra4x4SrcY * (BlockY << 2) + + (BlockX << 2); + + StepSrcY = StepPredIntra4x4SrcY; + + x = BlockX * 4; + y = BlockY * 4; + + Cost = BestCost = ARM_VCM4P10_MAX_COST; + + /* Go through each mode for minim cost */ + for (mode = 0; mode < 9; mode++) + { + Ret = omxVCM4P10_PredictIntra_4x4( + pSrcY - 1, + pSrcY - StepSrcY, + pSrcY - StepSrcY - 1, + pPredBlock, + StepSrcY, + 4, + (OMXVCM4P10Intra4x4PredMode) mode, + availability); + + if (Ret == OMX_Sts_NoErr) + { + armVCCOMM_SAD( + pSrcCurrBuf + (y * SrcCurrStep) + x, + SrcCurrStep, + pPredBlock, + 4, + &Cost, + 4, + 4); + + if (Cost < BestCost) + { + BestCost = Cost; + + pIntra4x4PredMode [BlockY + 1][BlockX + 1] = + (OMXVCM4P10Intra4x4PredMode) mode; + pSrcDstMBCurr->pIntra4x4PredMode [BlockY * 4 + BlockX] = + (OMXVCM4P10Intra4x4PredMode) mode; + + for (j = 0; j < 4; j++) + { + for (i = 0; i < 4; i++) + { + pSrcY [StepSrcY * j + i] = pPredBlock [4 * j + i]; + } + } + } + } + } + + *pBestCost = BestCost; + return; +} + +/** + * Function: armVCM4P10_SetMotionVectorPredictor + * + * Description: + * This function will do the MV Prediction for Inter MBs + * + * Parameters: + * [in] BlockStartX - Start X index in integer pels in current Block + * [in] BlockStartY - Start Y index in integer pels in current Block + * [in] BlockSizeX - Width of current block + * [in] BlockSizeY - Height of current block + * [in] RefFrame - Index of the reference frame for prediction + * [in] pRefFrArr - Pointer to Ref array storing neighbouring MVs for MV prediction + * [in] pMVArr - Pointer to MV array storing neighbouring MVs for MV prediction + * [out] pMVPred - Pointer to predicted MVs + * Remarks: + * + * Return Value: + * None + * + */ +static OMXVoid armVCM4P10_SetMotionVectorPredictor( + OMX_U32 BlockStartX, + OMX_U32 BlockStartY, + OMX_U32 BlockSizex, + OMX_U32 BlockSizey, + OMX_S32 RefFrame, + OMX_S32 pRefFrArr[][6], + OMXVCMotionVector pMVArr[][12], + OMXVCMotionVector *pMVPred +) +{ + OMX_S32 RFrameL; /* Left */ + OMX_S32 RFrameU; /* Up */ + OMX_S32 RFrameUR; /* Up-Right */ + + OMX_S32 BlockX, BlockY, BlockXFr, BlockYFr, MVPredType; + OMX_S32 BlockXPlusOff, BlockXPlusOffFr, BlockXMin1Fr, BlockYMin1Fr; + + BlockX = 4 + (BlockStartX >> 2); + BlockY = 4 + (BlockStartY >> 2); + BlockXPlusOff = BlockX + (BlockSizex >> 2); + + BlockXFr = BlockX >> 1; + BlockYFr = BlockY >> 1; + BlockXMin1Fr = (BlockX - 1) >> 1; + BlockYMin1Fr = (BlockY - 1) >> 1; + BlockXPlusOffFr = BlockXPlusOff >> 1; + + MVPredType = ARM_VCM4P10_MVPRED_MEDIAN; + + RFrameL = pRefFrArr [BlockYFr][BlockXMin1Fr]; + RFrameU = pRefFrArr [BlockYMin1Fr][BlockXFr]; + RFrameUR = pRefFrArr [BlockYMin1Fr][BlockXPlusOffFr]; + + if (RFrameUR == ARM_VCM4P10_INVALID_BLOCK) + { + RFrameUR = pRefFrArr [BlockYMin1Fr][BlockXMin1Fr]; + } + + /* + * Prediction if only one of the neighbors uses the reference frame + * we are checking + */ + + if (RFrameL == RefFrame && RFrameU != RefFrame && RFrameUR != RefFrame) + { + MVPredType = ARM_VCM4P10_MVPRED_L; + } + else if(RFrameL != RefFrame && RFrameU == RefFrame && RFrameUR != RefFrame) + { + MVPredType = ARM_VCM4P10_MVPRED_U; + } + else if(RFrameL != RefFrame && RFrameU != RefFrame && RFrameUR == RefFrame) + { + MVPredType = ARM_VCM4P10_MVPRED_UR; + } + + /* Directional predictions */ + else if(BlockSizex == 8 && BlockSizey == 16) + { + if(BlockStartX == 0) + { + if(RFrameL == RefFrame) + { + MVPredType = ARM_VCM4P10_MVPRED_L; + } + } + else + { + if (RFrameUR == RefFrame) + { + MVPredType = ARM_VCM4P10_MVPRED_UR; + } + } + } + else if(BlockSizex == 16 && BlockSizey == 8) + { + if(BlockStartY == 0) + { + if(RFrameU == RefFrame) + { + MVPredType = ARM_VCM4P10_MVPRED_U; + } + } + else + { + if(RFrameL == RefFrame) + { + MVPredType = ARM_VCM4P10_MVPRED_L; + } + } + } + + switch (MVPredType) + { + case ARM_VCM4P10_MVPRED_MEDIAN: + if (!(pRefFrArr [BlockYMin1Fr][BlockXMin1Fr] == ARM_VCM4P10_INVALID_BLOCK || + pRefFrArr [BlockYMin1Fr][BlockXFr] == ARM_VCM4P10_INVALID_BLOCK || + pRefFrArr [BlockYMin1Fr][BlockXPlusOffFr] == ARM_VCM4P10_INVALID_BLOCK)) + { + pMVPred->dx = pMVArr [BlockY][BlockX - 1].dx; + pMVPred->dy = pMVArr [BlockY][BlockX - 1].dy; + } + else + { + pMVPred->dx = + ARM_VCM4P10_MEDIAN(pMVArr [BlockY][BlockX - 1].dx, + pMVArr [BlockY - 1][BlockX].dx, + pMVArr [BlockY - 1][BlockXPlusOff].dx); + pMVPred->dy = + ARM_VCM4P10_MEDIAN(pMVArr [BlockY][BlockX - 1].dy, + pMVArr [BlockY - 1][BlockX].dy, + pMVArr [BlockY - 1][BlockXPlusOff].dy); + } + break; + + case ARM_VCM4P10_MVPRED_L: + pMVPred->dx = pMVArr [BlockY][BlockX - 1].dx; + pMVPred->dy = pMVArr [BlockY][BlockX - 1].dy; + break; + case ARM_VCM4P10_MVPRED_U: + pMVPred->dx = pMVArr [BlockY - 1][BlockX].dx; + pMVPred->dy = pMVArr [BlockY - 1][BlockX].dy; + break; + case ARM_VCM4P10_MVPRED_UR: + if (pRefFrArr [BlockYMin1Fr][BlockXPlusOffFr] != ARM_VCM4P10_INVALID_BLOCK) + { + pMVPred->dx = pMVArr [BlockY - 1][BlockXPlusOff].dx; + pMVPred->dy = pMVArr [BlockY - 1][BlockXPlusOff].dy; + } + else + { + pMVPred->dx = pMVArr [BlockY - 1][BlockX - 1].dx; + pMVPred->dy = pMVArr [BlockY - 1][BlockX - 1].dy; + } + break; + default: + break; + } + + return; +} + +/** + * Function: armVCM4P10_BlockMotionSearch + * + * Description: + * Gets best MV for the current block + * + * Parameters: + * [in] pSrcCurrBuf - Pointer to the start of luma component of current Macroblock + * [in] SrcCurrStep - Step size for the pointer pSrcCurrBuf + * [in] pSrcRefY - Pointer to the start of luma component of co-located reference MB + * [in] nSrcRefStep - Step size for the pointer pSrcRefY + * [in] pRefRect Pointer to the valid reference rectangle; relative to the image origin. + * [in] pCurrPointPos Position of the current macroblock in the current plane. + * [in] pMESpec - Motion estimation structure + * [in] pMBInter - Array, of dimension four, containing pointers to information associated with four + * adjacent type INTER MBs (Left, Top, Top-Left, Top-Right). + * [in] nLamda - For calculating the cost + * [out] pBestCost - Minimum cost for encoding current block + * [out] pBestMV - MV corresponding to best cost + * [in] BlockStartX - Block start X index in integer pels + * [in] BlockStartY - Block start Y index in integer pels + * [in] BlockSizeX - Width of current block + * [in] BlockSizeY - Height of current block + * [in] RefFrame - Index of the reference frame for prediction + * [in] pRefFrArr - Pointer to reference frame array storing neighbouring MVs for prediction + * [in] pMVArr - Pointer to MV array storing neighbouring MVs for MV prediction + * [in] pMVPred - Pointer to MV predicted from neighbour MVs + * Remarks: + * + * Return Value: + * OMXResult + * + */ +static OMXResult armVCM4P10_BlockMotionSearch( + const OMX_U8* pSrcCurrBuf, + OMX_S32 SrcCurrStep, + const OMX_U8* pSrcRefY, + OMX_S32 nSrcRefStep, + const OMXRect *pRefRect, + const OMXVCM4P2Coordinate *pCurrPointPos, + void* pMESpec, + + OMX_S32 nLamda, + OMX_S32* pBestCost, + OMXVCMotionVector *pBestMV, + + OMX_U32 BlockStartX, + OMX_U32 BlockStartY, + OMX_U32 BlockSizeX, + OMX_U32 BlockSizeY, + OMX_S32 RefFrame, + OMX_S32 pRefFrArr [][6], + OMXVCMotionVector pMVArr [][12], + OMXVCMotionVector *pMVPred + ) +{ + + OMXVCMotionVector MVCalculated, MVCandidate; + OMX_S32 Cost; + OMXResult RetValue; + OMXVCM4P10MEParams *pMEParams; + OMXVCM4P2Coordinate CurrBlockPos; + + /* Get Predicted Motion Vectors */ + armVCM4P10_SetMotionVectorPredictor ( + BlockStartX, + BlockStartY, + BlockSizeX, + BlockSizeY, + RefFrame, + pRefFrArr, + pMVArr, + pMVPred); + + /* Initialize candidate MV */ + MVCandidate.dx = 0; + MVCandidate.dy = 0; + + CurrBlockPos.x = pCurrPointPos->x + BlockStartX; + CurrBlockPos.y = pCurrPointPos->y + BlockStartY; + + /* Block Match Integer */ + RetValue = omxVCM4P10_BlockMatch_Integer ( + pSrcCurrBuf, + SrcCurrStep, + pSrcRefY, + nSrcRefStep, + pRefRect, + &CurrBlockPos, + BlockSizeX, + BlockSizeY, + nLamda, + pMVPred, + &MVCandidate, + &MVCalculated, + &Cost, + pMESpec); + + /* updated BestMV*/ + /**pBestCost = Cost; + pBestMV->dx = MVCalculated.dx; + pBestMV->dy = MVCalculated.dy;*/ + + pMEParams = (OMXVCM4P10MEParams *) pMESpec; + + /* Block Match Half pel */ + if (pMEParams->halfSearchEnable) + { + RetValue = omxVCM4P10_BlockMatch_Half( + pSrcCurrBuf, + SrcCurrStep, + pSrcRefY, + nSrcRefStep, + BlockSizeX, + BlockSizeY, + nLamda, + pMVPred, + &MVCalculated, /* input/output*/ + &Cost); + } + + /* Block Match Quarter pel */ + if (pMEParams->quarterSearchEnable) + { + RetValue = omxVCM4P10_BlockMatch_Quarter( + pSrcCurrBuf, + SrcCurrStep, + pSrcRefY, + nSrcRefStep, + BlockSizeX, + BlockSizeY, + nLamda, + pMVPred, + &MVCalculated, + &Cost); + } + + /* updated Best Cost and Best MV */ + *pBestCost = Cost; + pBestMV->dx = MVCalculated.dx; + pBestMV->dy = MVCalculated.dy; + + /* + * Skip MB cost calculations of 16x16 inter mode + */ + return RetValue; +} + +/** + * Function: armVCM4P10_PartitionME + * + * Description: + * Gets best cost for the current partition + * + * Parameters: + * [in] pSrcCurrBuf - Pointer to the start of luma component of current Macroblock + * [in] SrcCurrStep - Step size for the pointer pSrcCurrBuf + * [in] pSrcRefBufList - Pointer to List of ref buffer of co-located reference MB + * [in] nSrcRefStep - Step size for the pointer pSrcRefY + * [in] pRefRect Pointer to the valid reference rectangle; relative to the image origin. + * [in] pCurrPointPos Position of the current macroblock in the current plane. + * [in] pMESpec - Motion estimation structure + * [in] PartWidth - Width of current partition + * [in] PartHeight - Height of current partition + * [in] BlockWidth - Width of current block + * [in] BlockHeight - Height of current block + * [in] PartStartX - Partition start X index in integer pels + * [in] PartStartY - Partition start Y index in integer pels + * [in] pMVArr - Pointer to MV array storing neighbouring MVs for MV prediction + * [in] pRefFrArr - Pointer to reference frame array storing neighbouring MVs for prediction + * [in] Lambda - For calculating the cost + * [out] pCost - Pointer to cost for Inter MB + * + * Return Value: + * OMXResult + * + */ +static OMXResult armVCM4P10_PartitionME ( + const OMX_U8* pSrcCurrBuf, + OMX_S32 SrcCurrStep, + const OMX_U8 *pSrcRefBufList[ARM_VCM4P10_MAX_FRAMES], + OMX_S32 SrcRefStep, + const OMXRect *pRefRect, + const OMXVCM4P2Coordinate *pCurrPointPos, + void* pMESpec, + + OMX_S32 PartWidth, + OMX_S32 PartHeight, + OMX_S32 BlockWidth, + OMX_S32 BlockHeight, + OMX_S32 PartStartX, + OMX_S32 PartStartY, + + OMXVCMotionVector pMVArr [][12], + OMX_S32 pRefFrArr [][6], + OMXVCMotionVector pMVPredArr [][4], + + OMX_S32 Lambda, + OMX_S32 *pCost +) +{ + OMX_U32 x, y, i, j, ref, OffX, OffY, OffSrc, OffRef; + OMX_S32 BlockCost, PartitionCost, BestCost; + OMX_S32 BestRefFrame=0; + OMXVCMotionVector BestMV [4][4]; + OMXVCMotionVector BestMVPred [4][4]; + OMXVCMotionVector MVPred; + OMXVCMotionVector DstMV; + + BestCost = ARM_VCM4P10_MAX_COST; + + for (ref = 0; ref < ARM_VCM4P10_MAX_FRAMES; ref++) + { + if (pSrcRefBufList [ref] == NULL) + { + /* No reference frame, continue */ + continue; + } + + PartitionCost = 0; + + for (y = 0; y < PartHeight; y += BlockHeight) + { + for (x = 0; x < PartWidth; x += BlockWidth) + { + OffSrc = SrcCurrStep * (PartStartY + y) + PartStartX + x; + OffRef = SrcRefStep * (PartStartY + y) + PartStartX + x; + armVCM4P10_BlockMotionSearch ( + pSrcCurrBuf + OffSrc, + SrcCurrStep, + pSrcRefBufList [ref] + OffRef, + SrcRefStep, + pRefRect, + pCurrPointPos, + pMESpec, + + Lambda, + &BlockCost, + &DstMV, + + x + PartStartX, + y + PartStartY, + BlockWidth, + BlockHeight, + ref, + pRefFrArr, + pMVArr, + &MVPred); + + PartitionCost += BlockCost; + + OffX = (PartStartX + x) >> 2; + OffY = (PartStartY + y) >> 2; + + for (j = 0; j < (BlockHeight >> 2); j++) + { + for (i = 0; i < (BlockWidth >> 2); i++) + { + pMVArr [4 + OffY + j][4 + OffX + i].dx = DstMV.dx; + pMVArr [4 + OffY + j][4 + OffX + i].dy = DstMV.dy; + pMVPredArr [OffY + j][OffX + i].dx = MVPred.dx; + pMVPredArr [OffY + j][OffX + i].dy = MVPred.dy; + } + } + + pRefFrArr [2 + (OffY >> 1)][2 + (OffX >> 1)] = ref; + for (j = 0; j < (BlockHeight >> 3); j++) + { + for (i = 0; i < (BlockWidth >> 3); i++) + { + pRefFrArr [2 + (OffY >> 1) + j][2 + (OffX >> 1) + i] = ref; + } + } + + } + } + + /* + * If PartitionCost is less for this reference frame, motion vectors needs to be backedup + */ + if (PartitionCost <= BestCost) + { + BestCost = PartitionCost; + BestRefFrame = ref; + + for (y = 0; y < (PartHeight/BlockHeight); y++) + { + for (x = 0; x < (PartWidth/BlockWidth); x++) + { + OffX = (PartStartX + x * BlockWidth) >> 2; + OffY = (PartStartY + y * BlockHeight) >> 2; + + BestMV[y][x].dx = pMVArr [4 + OffY][4 + OffX].dx; + BestMV[y][x].dy = pMVArr [4 + OffY][4 + OffX].dy; + BestMVPred[y][x].dx = pMVPredArr [OffY][OffX].dx; + BestMVPred[y][x].dy = pMVPredArr [OffY][OffX].dy; + } + } + } + + } + + /* + * Copy back best reference frame, motion vectors and cost. + */ + for (y = 0; y < (PartHeight/BlockHeight); y++) + { + for (x = 0; x < (PartWidth/BlockWidth); x++) + { + OffX = (PartStartX + x * BlockWidth) >> 2; + OffY = (PartStartY + y * BlockHeight) >> 2; + + for (j = 0; j < (BlockHeight >> 2); j++) + { + for (i = 0; i < (BlockWidth >> 2); i++) + { + pMVArr [4 + OffY + j][4 + OffX + i].dx = BestMV[y][x].dx; + pMVArr [4 + OffY + j][4 + OffX + i].dy = BestMV[y][x].dy; + pMVPredArr [OffY + j][OffX + i].dx = BestMVPred[y][x].dx; + pMVPredArr [OffY + j][OffX + i].dy = BestMVPred[y][x].dy; + } + } + + for (j = 0; j < (BlockHeight >> 3); j++) + { + for (i = 0; i < (BlockWidth >> 3); i++) + { + pRefFrArr [2 + (OffY >> 1) + j][2 + (OffX >> 1) + i] = BestRefFrame; + } + } + } + } + + *pCost = BestCost; + return OMX_Sts_NoErr; + +} + +/** + * Function: armVCM4P10_Intra16x16Estimation + * + * Description: + * Performs MB-level motion estimation for INTER MB type and selects best motion estimation strategy from + * the set of modes supported in baseline profile ISO/IEC 14496-10. + * + * Remarks: + * + * Parameters: + * [in] pSrcCurrBuf - Pointer to the start of luma component of current Macroblock + * [in] SrcCurrStep - Step size for the pointer pSrcCurrBuf + * [in] pSrcRecBuf - Pointer to the start of luma component of co-located reconstructed MB + * [in] SrcRecStep - Step size for the pointer pSrcRecBuf + * [in] nMBPosX - Position of MB in the frame w.r.t X axis + * [in] nMBPosY - Position of MB in the frame w.r.t Y axis + * [in] pMBInter - Array, of dimension four, containing pointers to information associated with four + * adjacent type INTER MBs (Left, Top, Top-Left, Top-Right). + * [in] pMBIntra - Array, of dimension four, containing pointers to information associated with four + * adjacent type INTRA MBs (Left, Top, Top-Left, Top-Right). + * [in/out] pSrcDstMBCurr - Pointer to information structure for the current MB. Following member should be set + * before calling this function + * [in] Lambda - For calculating the cost + * [out] pCost - Pointer to cost for Intra16x16 + * Return Value: + * OMX_Sts_NoErr - No Error + * OMX_Sts_BadArgErr - Bad arguments: + * + */ + +static OMXResult armVCM4P10_Intra16x16Estimation( + const OMX_U8* pSrcCurrBuf, + OMX_S32 SrcCurrStep, + const OMX_U8* pSrcRecBuf, + OMX_S32 SrcRecStep, + const OMXVCM4P2Coordinate *pCurrPointPos, + const OMXVCM4P10MBInfoPtr *pMBInter, + const OMXVCM4P10MBInfoPtr *pMBIntra, + OMXVCM4P10MBInfo *pSrcDstMBCurr, + OMX_U32 *pCost) +{ + OMX_U8 PredBuf [16*16 + 16]; + OMX_U8 *pPred; + OMX_S32 mode; + OMX_S32 Cost; + OMX_S32 availability = 0; + OMXResult Ret; + OMXVCM4P10Intra16x16PredMode IntraMode16x16 [4] = + {OMX_VC_16X16_VERT, OMX_VC_16X16_HOR, + OMX_VC_16X16_DC, OMX_VC_16X16_PLANE}; + OMX_U32 MBPosX = pCurrPointPos->x >> 4; + OMX_U32 MBPosY = pCurrPointPos->y >> 4; + + pPred = armAlignTo16Bytes(PredBuf); + + /* Check for availability of LEFT MB */ + if ((MBPosX != 0) && (pMBIntra [0] != 0 || pMBInter [0] != 0)) + { + availability |= OMX_VC_LEFT; + } + + /* Check for availability of UP MB */ + if ((MBPosY != 0) && (pMBIntra [1] != 0 || pMBInter [1] != 0)) + { + availability |= OMX_VC_UPPER; + } + + /* Check for availability of UP-LEFT MB */ + if ((MBPosX > 0) && (MBPosY > 0) && + (pMBIntra [2] != 0 || pMBInter [2] != 0)) + { + availability |= OMX_VC_UPPER_LEFT; + } + + *pCost = ARM_VCM4P10_MAX_COST; + for (mode = 0; mode < 4; mode++) + { + Ret = omxVCM4P10_PredictIntra_16x16( + pSrcRecBuf - 1, + pSrcRecBuf - SrcRecStep, + pSrcRecBuf - SrcRecStep - 1, + pPred, + SrcRecStep, + 16, + IntraMode16x16 [mode], + availability); + if (Ret == OMX_Sts_NoErr) + { + armVCCOMM_SAD( + pSrcCurrBuf, + SrcCurrStep, + pPred, + 16, + &Cost, + 16, + 16); + if (Cost < *pCost) + { + *pCost = Cost; + pSrcDstMBCurr->Intra16x16PredMode = IntraMode16x16 [mode]; + } + + } + + } + + return OMX_Sts_NoErr; +} + +/** + * Function: armVCM4P10_Intra4x4Estimation + * + * Description: + * Performs MB-level motion estimation for Intra 4x4 MB type and selects + * the best set of modes supported in baseline profile. + * + * Parameters: + * [in] pSrcCurrBuf - Pointer to the start of luma component of current Macroblock + * [in] SrcCurrStep - Step size for the pointer pSrcCurrBuf + * [in] pSrcRecBuf - Pointer to the start of luma component of co-located reconstructed MB + * [in] SrcRecStep - Step size for the pointer pSrcRecBuf + * [in] nMBPosX - Position of MB in the frame w.r.t X axis + * [in] nMBPosY - Position of MB in the frame w.r.t Y axis + * [in] pMBIntra - Array, of dimension four, containing pointers to information associated with four + * adjacent type INTRA MBs (Left, Top, Top-Left, Top-Right). + * [in/out] pSrcDstMBCurr - Pointer to information structure for the current MB. Following member should be set + * before calling this function + * [in] Lambda - For calculating the cost + * [out] pCost - Pointer to cost for Intra4x4 + * Return Value: + * OMX_Sts_NoErr - No Error + * OMX_Sts_BadArgErr - Bad arguments: + * + */ + +static OMXResult armVCM4P10_Intra4x4Estimation( + const OMX_U8* pSrcCurrBuf, + OMX_S32 SrcCurrStep, + const OMX_U8* pSrcRecBuf, + OMX_S32 SrcRecStep, + const OMXVCM4P10MBInfoPtr *pMBIntra, + OMXVCM4P10MBInfo *pSrcDstMBCurr, + OMX_U32 *pCost) +{ + OMX_S32 x, y, Block4x4, Block8x8; + OMX_S32 Cost; + + /* + * PredIntra4x4Mode will store prediction modes of 4x4 blocks. + * Modes for current MB starts at index [1][1]. + * Modes of nighbouring MB's will be as shown below + * A value of ARM_VCM4P10_INVALID_BLOCK for any block in this array means + * that block is not available for prediction. + * + * c3 b0 b1 b2 b3 d0 d1 d2 d3 + * a0 xx xx xx xx - - - - + * a1 xx xx xx xx - - - - + * a2 xx xx xx xx - - - - + * a3 xx xx xx xx - - - - + * + */ + OMX_S32 PredIntra4x4Mode [5][9]; + + /* + * pSrcY stores re-construsted source array of size 3MB X 2MB as below + * + * MB11 MB12 MB13 + * MB21 MB22 MB23 + * + * This array will be used for local reconstruction of 4x4 blocks + * with best prediction mode within an MB + */ + OMX_U8 pSrcY [(16*3)*(16*2)]; + OMX_S32 StepSrcY; + + /* init */ + *pCost = 0; + + for (y = 0; y < 5; y++) + { + for (x = 0; x < 9; x++) + { + /* + * Initialize with value of ARM_VCM4P10_INVALID_BLOCK, to mean this + * 4x4 block is not available + */ + PredIntra4x4Mode [y][x] = ARM_VCM4P10_INVALID_BLOCK; + } + } + + /* Replace ARM_VCM4P10_INVALID_BLOCK value with available MBs values*/ + for (x = 0; x < 4; x++) + { + /* Store values of b0, b1, b2, b3 */ + if (pMBIntra[1] != NULL) + { + PredIntra4x4Mode [0][x + 1] = + pMBIntra[1]->pIntra4x4PredMode[3*4 + x]; + } + + /* Store values of d0, d1, d2, d3 */ + if (pMBIntra[3] != NULL) + { + PredIntra4x4Mode [0][x + 5] = + pMBIntra[3]->pIntra4x4PredMode[3*4 + x]; + } + } + + /* Store values of c3 */ + if (pMBIntra[2] != NULL) + { + PredIntra4x4Mode [0][0] = pMBIntra[2]->pIntra4x4PredMode[15]; + } + + for (y = 0; y < 4; y++) + { + /* Store values of a0, a1, a2, a3 */ + if (pMBIntra[0] != NULL) + { + PredIntra4x4Mode [y + 1][0] = + pMBIntra[0]->pIntra4x4PredMode[y*4 + 3]; + } + } + + /* + * Update neighbouring Pred mode array which will be used for + * prediction of Intra4x4 modes. + */ + + StepSrcY = 16 * 3; + for (y = 0; y < (16 * 2); y++) + { + for (x = 0; x < (16 * 3); x++) + { + pSrcY [StepSrcY * y + x] = + pSrcRecBuf [SrcRecStep * (y - 16) + x - 16]; + } + } + + /* for each 8x8 block */ + for (Block8x8 = 0; Block8x8 < 4; Block8x8++) + { + /* for each 4x4 block inside 8x8 block */ + for (Block4x4 = 0; Block4x4 < 4; Block4x4++) + { + armVCM4P10_Mode4x4Decision ( + pSrcCurrBuf, + SrcCurrStep, + pSrcDstMBCurr, + Block8x8, + Block4x4, + pSrcY + 16 * StepSrcY + 16, + StepSrcY, + PredIntra4x4Mode, + &Cost); + + *pCost += Cost; + } + } + return OMX_Sts_NoErr; +} + +/** + * Function: armVCM4P10_InterMEMB + * + * Description: + * Performs MB-level motion estimation for INTER MB type and selects best motion estimation strategy from + * the set of modes supported in baseline profile ISO/IEC 14496-10. + * + * Remarks: + * + * Parameters: + * [in] pSrcCurrBuf - Pointer to the start of luma component of current Macroblock + * [in] SrcCurrStep - Step size for the pointer pSrcCurrBuf + * [in] pSrcRefBufList - Pointer to the start of luma component of co-located reference MB + * [in] SrcRefStep - Step size for the pointer pSrcRefY + * [in] pRefRect Pointer to the valid reference rectangle; relative to the image origin. + * [in] pCurrPointPos Position of the current macroblock in the current plane. + * [in] pMESpec - Motion estimation structure + * [in] pMBInter - Array, of dimension four, containing pointers to information associated with four + * adjacent type INTER MBs (Left, Top, Top-Left, Top-Right). + * [in/out] pSrcDstMBCurr - Pointer to information structure for the current MB. Following member should be set + * before calling this function + * [in] Lambda - For calculating the cost + * [out] pDstCost - Pointer to cost for Inter MB + * Return Value: + * OMX_Sts_NoErr - No Error + * OMX_Sts_BadArgErr - Bad arguments: + * + */ + +static OMXResult armVCM4P10_InterMEMB( + const OMX_U8 *pSrcCurrBuf, + OMX_S32 SrcCurrStep, + const OMX_U8 *pSrcRefBufList[ARM_VCM4P10_MAX_FRAMES], + OMX_S32 SrcRefStep, + const OMXRect *pRefRect, + const OMXVCM4P2Coordinate *pCurrPointPos, + OMX_U32 Lambda, + void *pMESpec, + const OMXVCM4P10MBInfoPtr *pMBInter, + OMXVCM4P10MBInfoPtr pSrcDstMBCurr, + OMX_U32 *pDstCost) +{ + OMX_S32 i, j, x, y, mode; + OMX_U32 Block8x8, XPerMB, YPerMB, Block2x, Block2y; + OMX_S32 PartStartX = 0, PartStartY = 0; + OMX_S32 PartWidth = 8, PartHeight = 8, BlockWidth = 4, BlockHeight = 4; + const OMX_U32 BlkSz [4][2] = {{4,4}, {4,8}, {8,4}}; + const OMX_U32 PartSz [4][2] = {{8,8}, {8,16}, {16,8}, {16,16}}; + const OMXVCM4P10SubMacroblockType + ModeSubMBType4x4 [] = {OMX_VC_SUB_P_4x4, OMX_VC_SUB_P_4x8, + OMX_VC_SUB_P_8x4, OMX_VC_SUB_P_8x8}; + const OMXVCM4P10MacroblockType + ModeMBType [] = {OMX_VC_P_8x8, OMX_VC_P_8x16, OMX_VC_P_16x8, OMX_VC_P_16x16}; + + OMXVCM4P10MEParams *pMBOptions; + /* + * RefFrArr and MVArr will be used for temporary storage of Reference frame index and MVs + * It will store RefIndex and MVs of 6 MBs as shown below + * + * |------|------|------| + * |Tp-Lt |Top |Tp-R | + * | MB | MB | MB | + * |------|------|------| + * |Left | Curr | | + * | MB | MB | | + * |------|------|------| + */ + OMX_S32 RefFrArr [4][6]; + OMXVCMotionVector MVArr [8][12]; + OMXVCMotionVector MVPredArr [4][4]; + + /* + * IndexToLoc will translate pMBInter index into spacial arrangement of MBs + */ + OMX_S32 IndexToLoc [] = {2,1,3,0}; + OMX_U32 part, MaxPart; + OMX_S32 Cost, MotionCost8x8 [4], MBCost, BestCost; + + /* + * Update neighbouring MV array and Ref frame array which will be used for + * prediction of MVs and Ref frames. + */ + + /* Set cost to a high value */ + Cost = BestCost = ARM_VCM4P10_MAX_COST; + + for (y = 0; y < 8; y++) + { + for (x = 0; x < 12; x++) + { + i = 3 * (y >> 2) + (x >> 2); + if ((y < 4 || x < 4) && (pMBInter[IndexToLoc[i]] != NULL)) + { + MVArr [y][x].dx = + pMBInter[IndexToLoc[i]]->pMV0[y % 4][x % 4].dx; + MVArr [y][x].dy = + pMBInter[IndexToLoc[i]]->pMV0[y % 4][x % 4].dy; + } + else + { + MVArr [y][x].dx = 0; + MVArr [y][x].dy = 0; + } + } + } + + for (y = 0; y < 4; y++) + { + for (x = 0; x < 6; x++) + { + i = 3 * (y >> 1) + (x >> 1); + if ((y < 2 || x < 2) && (pMBInter[IndexToLoc[i]] != NULL)) + { + RefFrArr [y][x] = + pMBInter[IndexToLoc[i]]->pRefL0Idx [(y % 2) * 2 + (x % 2)]; + } + else + { + RefFrArr [y][x] = ARM_VCM4P10_INVALID_BLOCK; + } + } + } + + for (y = 0; y < 4; y++) + { + for (x = 0; x < 4; x++) + { + MVPredArr [y][x].dx = 0; + MVPredArr [y][x].dy = 0; + } + } + /* + * Motion Estimation for 8x8 MB Partition + */ + + for (i = 0; i < 4; i++) + { + MotionCost8x8 [i] = 0; + } + + pMBOptions = (OMXVCM4P10MEParams *) pMESpec; + + if (pMBOptions->blockSplitEnable8x8 == 1 && + pMBOptions->blockSplitEnable4x4 == 1) + { + pSrcDstMBCurr->mbType = OMX_VC_P_8x8; + + PartWidth = PartSz [0][0]; + PartHeight = PartSz [0][1]; + + /* For each 8x8 partitions */ + for (Block8x8 = 0; Block8x8 < 4; Block8x8++) + { + PartStartX = (Block8x8 % 2) << 3; + PartStartY = (Block8x8 / 2) << 3; + + Block2x = (Block8x8 & 1) << 1; + Block2y = (Block8x8 >> 1) << 1; + + BestCost = ARM_VCM4P10_MAX_COST; + for (mode = 0; mode < 3; mode++) + { + BlockWidth = BlkSz [mode][0]; + BlockHeight = BlkSz [mode][1]; + + armVCM4P10_PartitionME ( + pSrcCurrBuf, + SrcCurrStep, + pSrcRefBufList, + SrcRefStep, + pRefRect, + pCurrPointPos, + pMESpec, + + PartWidth, + PartHeight, + BlockWidth, + BlockHeight, + PartStartX, + PartStartY, + + MVArr, + RefFrArr, + MVPredArr, + + Lambda, + &Cost); + + if (Cost <= BestCost) + { + /* Update cost */ + BestCost = Cost; + + /* Update MBCurr struct */ + pSrcDstMBCurr->subMBType [Block8x8] = ModeSubMBType4x4 [mode]; + + pSrcDstMBCurr->pRefL0Idx [Block8x8] = RefFrArr [2 + (PartStartY >> 3)][2 + (PartStartX >> 3)]; + + /* Update pMV0 and pMVPred of MBCurr struct */ + for (j = 0; j < 2; j++) + { + for (i = 0; i < 2; i++) + { + pSrcDstMBCurr->pMV0 [Block2y + j][Block2x + i].dx = + MVArr [4 + Block2y + j][4 + Block2x + i].dx; + pSrcDstMBCurr->pMV0 [Block2y + j][Block2x + i].dy = + MVArr [4 + Block2y + j][4 + Block2x + i].dy; + + pSrcDstMBCurr->pMVPred [Block2y + j][Block2x + i].dx = + MVPredArr [Block2y + j][Block2x + i].dx; + pSrcDstMBCurr->pMVPred [Block2y + j][Block2x + i].dy = + MVPredArr [Block2y + j][Block2x + i].dy; + } + } + } + } + + /* Update cost */ + MotionCost8x8 [Block8x8] = BestCost; + } + + /* Cost for mbType OMX_VC_P_8x8 */ + BestCost = 0; + for (i = 0; i < 4; i++) + { + BestCost += MotionCost8x8 [i]; + } + } + else + { + /* Set sub MB type to 8x8 */ + for (i = 0; i < 4; i++) + { + pSrcDstMBCurr->subMBType [i] = OMX_VC_SUB_P_8x8; + } + } + + /* + * Motion Estimation for 8x8, 8x16, 16x8 and 16x16 MB Partition + * If pMBOptions->b8x8BlockSplitEnable is 0, do only 16x16 ME (mode 3) + */ + for (mode = (pMBOptions->blockSplitEnable8x8 == 1 ? 0 : 3); mode < 4; mode++) + { + BlockWidth = PartWidth = PartSz [mode][0]; + BlockHeight = PartHeight = PartSz [mode][1]; + + XPerMB = 16 / PartWidth; + YPerMB = 16 / PartHeight; + MaxPart = XPerMB * YPerMB; + + MBCost = 0; + + /* part size 4, 2, 2 and 1 corresponding to 8x8, 8x16, 16x8 and 16x16 MB */ + for (part = 0; part < MaxPart; part++) + { + PartStartX = (part % XPerMB) * PartWidth; + PartStartY = (part / XPerMB) * PartHeight; + + armVCM4P10_PartitionME ( + pSrcCurrBuf, + SrcCurrStep, + pSrcRefBufList, + SrcRefStep, + pRefRect, + pCurrPointPos, + pMESpec, + + PartWidth, + PartHeight, + BlockWidth, + BlockHeight, + PartStartX, + PartStartY, + + MVArr, + RefFrArr, + MVPredArr, + + Lambda, + &Cost); + + MBCost += Cost; + } + + if (MBCost <= BestCost) + { + /* Update cost */ + BestCost = MBCost; + + /* Update mbType of MBCurr struct */ + pSrcDstMBCurr->mbType = ModeMBType [mode]; + + /* Update pMV0 and pMVPred of MBCurr struct */ + for (j = 0; j < 4; j++) + { + for (i = 0; i < 4; i++) + { + pSrcDstMBCurr->pMV0 [j][i].dx = MVArr [4+j][4+i].dx; + pSrcDstMBCurr->pMV0 [j][i].dy = MVArr [4+j][4+i].dy; + pSrcDstMBCurr->pMVPred [j][i].dx = MVPredArr [j][i].dx; + pSrcDstMBCurr->pMVPred [j][i].dy = MVPredArr [j][i].dy; + } + } + for (j = 0; j < 2; j++) + { + for (i = 0; i < 2; i++) + { + pSrcDstMBCurr->pRefL0Idx [j*2+i] = RefFrArr [2+j][2+i]; + } + } + } + + } + + /* Update Best Cost */ + *pDstCost = BestCost; + + return OMX_Sts_NoErr; +} + +/** + * Function: omxVCM4P10_MotionEstimationMB (6.3.5.3.1) + * + * Description: + * Performs MB-level motion estimation and selects best motion estimation + * strategy from the set of modes supported in baseline profile [ISO14496-10]. + * + * Input Arguments: + * + * pSrcCurrBuf - Pointer to the current position in original picture plane; + * 16-byte alignment required + * pSrcRefBufList - Pointer to an array with 16 entries. Each entry points + * to the top-left corner of the co-located MB in a reference + * picture. The array is filled from low-to-high with valid + * reference frame pointers; the unused high entries should be set + * to NULL. Ordering of the reference frames should follow + * [ISO14496-10] subclause 8.2.4 Decoding Process for Reference + * Picture Lists. The entries must be 16-byte aligned. + * pSrcRecBuf - Pointer to the top-left corner of the co-located MB in the + * reconstructed picture; must be 16-byte aligned. + * SrcCurrStep - Width of the original picture plane in terms of full + * pixels; must be a multiple of 16. + * SrcRefStep - Width of the reference picture plane in terms of full + * pixels; must be a multiple of 16. + * SrcRecStep - Width of the reconstructed picture plane in terms of full + * pixels; must be a multiple of 16. + * pRefRect - Pointer to the valid reference rectangle; relative to the + * image origin. + * pCurrPointPos - Position of the current macroblock in the current plane. + * Lambda - Lagrange factor for computing the cost function + * pMESpec - Pointer to the motion estimation specification structure; must + * have been allocated and initialized prior to calling this + * function. + * pMBInter - Array, of dimension four, containing pointers to information + * associated with four adjacent type INTER MBs (Left, Top, + * Top-Left, Top-Right). Any pointer in the array may be set equal + * to NULL if the corresponding MB doesn t exist or is not of type + * INTER. pMBInter[0] - Pointer to left MB information pMBInter[1] + * - Pointer to top MB information pMBInter[2] - Pointer to + * top-left MB information pMBInter[3] - Pointer to top-right MB + * information + * pMBIntra - Array, of dimension four, containing pointers to information + * associated with four adjacent type INTRA MBs (Left, Top, + * Top-Left, Top-Right). Any pointer in the array may be set equal + * to NULL if the corresponding MB doesn t exist or is not of type + * INTRA. pMBIntra[0] - Pointer to left MB information pMBIntra[1] + * - Pointer to top MB information pMBIntra[2] - Pointer to + * top-left MB information pMBIntra[3] - Pointer to top-right MB + * information + * pSrcDstMBCurr - Pointer to information structure for the current MB. + * The following entries should be set prior to calling the + * function: sliceID - the number of the slice the to which the + * current MB belongs. + * + * Output Arguments: + * + * pDstCost - Pointer to the minimum motion cost for the current MB. + * pDstBlockSAD - Pointer to the array of SADs for each of the sixteen luma + * 4x4 blocks in each MB. The block SADs are in scan order for + * each MB. For implementations that cannot compute the SAD values + * individually, the maximum possible value (0xffff) is returned + * for each of the 16 block SAD entries. + * pSrcDstMBCurr - Pointer to updated information structure for the current + * MB after MB-level motion estimation has been completed. The + * following fields are updated by the ME function. The following + * parameter set quantifies the MB-level ME search results: MbType + * subMBType[4] pMV0[4][4] pMVPred[4][4] pRefL0Idx[4] + * Intra16x16PredMode pIntra4x4PredMode[4][4] + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - One of more of the following pointers is NULL: pSrcCurrBuf, + * pSrcRefBufList, pSrcRecBuf, pRefRect, pCurrPointPos, pMESpec, + * pMBInter, pMBIntra,pSrcDstMBCurr, pDstCost, pSrcRefBufList[0] + * - SrcRefStep, SrcRecStep are not multiples of 16 + * - iBlockWidth or iBlockHeight are values other than 4, 8, or 16. + * - Any alignment restrictions are violated + * + */ + +OMXResult omxVCM4P10_MotionEstimationMB( + const OMX_U8 *pSrcCurrBuf, + OMX_S32 SrcCurrStep, + const OMX_U8 *pSrcRefBufList[ARM_VCM4P10_MAX_FRAMES], + OMX_S32 SrcRefStep, + const OMX_U8 *pSrcRecBuf, + OMX_S32 SrcRecStep, + const OMXRect *pRefRect, + const OMXVCM4P2Coordinate *pCurrPointPos, + OMX_U32 Lambda, + void *pMESpec, + const OMXVCM4P10MBInfoPtr *pMBInter, + const OMXVCM4P10MBInfoPtr *pMBIntra, + OMXVCM4P10MBInfo *pSrcDstMBCurr, + OMX_INT *pDstCost, + OMX_U16 *pDstBlockSAD) +{ + OMX_U32 Cost, i, IntraFlag = 1; + OMXVCM4P10MEParams *pMEParams; + + /* check for argument error */ + armRetArgErrIf(pSrcCurrBuf == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(pSrcRefBufList == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(pSrcRecBuf == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(pRefRect == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(pCurrPointPos == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(pMESpec == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(pMBInter == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(pMBIntra == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(pSrcDstMBCurr == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(pDstCost == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(SrcRefStep <= 0 || SrcRefStep & 15, OMX_Sts_BadArgErr) + armRetArgErrIf(SrcRecStep <= 0 || SrcRecStep & 15, OMX_Sts_BadArgErr) + armRetArgErrIf(SrcCurrStep <= 0 || SrcCurrStep & 15, OMX_Sts_BadArgErr) + + armRetArgErrIf(armNot16ByteAligned(pSrcCurrBuf), OMX_Sts_BadArgErr) + armRetArgErrIf(armNot16ByteAligned(pSrcRecBuf), OMX_Sts_BadArgErr) + + for (i = 0; i < ARM_VCM4P10_MAX_FRAMES; i++) + { + armRetArgErrIf(pSrcRefBufList [i] != NULL && + armNot16ByteAligned(pSrcRefBufList [i]), OMX_Sts_BadArgErr) + + /* Check if current MB needs INTER cost calculations */ + if (pSrcRefBufList [i] != NULL && IntraFlag == 1) + { + IntraFlag = 0; + } + } + + *pDstCost = ARM_VCM4P10_MAX_COST; + /* + * Inter cost calculations + */ + + /* check this MB can be Inter */ + if (IntraFlag != 1) + { + armVCM4P10_InterMEMB( + pSrcCurrBuf, + SrcCurrStep, + pSrcRefBufList, + SrcRefStep, + pRefRect, + pCurrPointPos, + Lambda, + pMESpec, + pMBInter, + pSrcDstMBCurr, + &Cost + ); + + *pDstCost = Cost; + } + + pMEParams = (OMXVCM4P10MEParams *)pMESpec; + + if (pMEParams->intraEnable4x4 == 1) + { + /* + * Intra 4x4 cost calculations + */ + armVCM4P10_Intra4x4Estimation( + pSrcCurrBuf, + SrcCurrStep, + pSrcRecBuf, + SrcRecStep, + pMBIntra, + pSrcDstMBCurr, + &Cost + ); + + if (Cost <= *pDstCost) + { + *pDstCost = Cost; + pSrcDstMBCurr->mbType = OMX_VC_INTRA_4x4; + + } + + } + + /* + * Cost for Intra 16x16 mode + */ + + armVCM4P10_Intra16x16Estimation( + pSrcCurrBuf, + SrcCurrStep, + pSrcRecBuf, + SrcRecStep, + pCurrPointPos, + pMBInter, + pMBIntra, + pSrcDstMBCurr, + &Cost + ); + + if (Cost <= *pDstCost) + { + *pDstCost = Cost; + pSrcDstMBCurr->mbType = OMX_VC_INTRA_16x16; + } + + /* + * Update pDstBlockSAD to max value + */ + armVCM4P10_CalculateBlockSAD( pSrcDstMBCurr, + pSrcCurrBuf, + SrcCurrStep, + pSrcRefBufList, + SrcRefStep, + pSrcRecBuf, + SrcRecStep, + pRefRect, + pCurrPointPos, + pMBInter, + pMBIntra, + pDstBlockSAD); + + + return OMX_Sts_NoErr; +} + + +/***************************************************************************** + * END OF FILE + *****************************************************************************/ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8.c new file mode 100644 index 0000000000000000000000000000000000000000..d6ca7833f0a629d744a3ac9271cd7bb1ee35bdea --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8.c @@ -0,0 +1,284 @@ +/* ---------------------------------------------------------------- + * + * + * File Name: omxVCM4P10_PredictIntraChroma_8x8.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * H.264 Chroma 8x8 intra prediction module + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armVC.h" + +/* + * Description: + * Perform DC style intra prediction, upper block has priority + * + * Parameters: + * [in] pSrcLeft Pointer to the buffer of 16 left coefficients: + * p[x, y] (x = -1, y = 0..3) + * [in] pSrcAbove Pointer to the buffer of 16 above coefficients: + * p[x,y] (x = 0..3, y = -1) + * [in] leftStep Step of left coefficient buffer + * [in] dstStep Step of the destination buffer + * [in] availability Neighboring 16x16 MB availability flag + * [out] pDst Pointer to the destination buffer + * + * Return Value: + * None + */ + +static void armVCM4P10_PredictIntraDCUp4x4( + const OMX_U8* pSrcLeft, + const OMX_U8 *pSrcAbove, + OMX_U8* pDst, + OMX_INT leftStep, + OMX_INT dstStep, + OMX_S32 availability +) +{ + int x, y, Sum=0, Count = 0; + + if (availability & OMX_VC_UPPER) + { + for (x=0; x<4; x++) + { + Sum += pSrcAbove[x]; + } + Count++; + } + else if (availability & OMX_VC_LEFT) + { + for (y=0; y<4; y++) + { + Sum += pSrcLeft[y*leftStep]; + } + Count++; + } + if (Count==0) + { + Sum = 128; + } + else + { + Sum = (Sum + 2) >> 2; + } + for (y=0; y<4; y++) + { + for (x=0; x<4; x++) + { + pDst[y*dstStep+x] = (OMX_U8)Sum; + } + } +} + +/* + * Description: + * Perform DC style intra prediction, left block has priority + * + * Parameters: + * [in] pSrcLeft Pointer to the buffer of 16 left coefficients: + * p[x, y] (x = -1, y = 0..3) + * [in] pSrcAbove Pointer to the buffer of 16 above coefficients: + * p[x,y] (x = 0..3, y = -1) + * [in] leftStep Step of left coefficient buffer + * [in] dstStep Step of the destination buffer + * [in] availability Neighboring 16x16 MB availability flag + * [out] pDst Pointer to the destination buffer + * + * Return Value: + * None + */ + +static void armVCM4P10_PredictIntraDCLeft4x4( + const OMX_U8* pSrcLeft, + const OMX_U8 *pSrcAbove, + OMX_U8* pDst, + OMX_INT leftStep, + OMX_INT dstStep, + OMX_S32 availability +) +{ + int x, y, Sum=0, Count = 0; + + if (availability & OMX_VC_LEFT) + { + for (y=0; y<4; y++) + { + Sum += pSrcLeft[y*leftStep]; + } + Count++; + } + else if (availability & OMX_VC_UPPER) + { + for (x=0; x<4; x++) + { + Sum += pSrcAbove[x]; + } + Count++; + } + if (Count==0) + { + Sum = 128; + } + else + { + Sum = (Sum + 2) >> 2; + } + for (y=0; y<4; y++) + { + for (x=0; x<4; x++) + { + pDst[y*dstStep+x] = (OMX_U8)Sum; + } + } +} + +/** + * Function: omxVCM4P10_PredictIntraChroma_8x8 (6.3.3.1.3) + * + * Description: + * Performs intra prediction for chroma samples. + * + * Input Arguments: + * + * pSrcLeft - Pointer to the buffer of 8 left pixels: p[x, y] (x = -1, y= + * 0..7). + * pSrcAbove - Pointer to the buffer of 8 above pixels: p[x,y] (x = 0..7, y + * = -1); must be aligned on an 8-byte boundary. + * pSrcAboveLeft - Pointer to the above left pixels: p[x,y] (x = -1, y = -1) + * leftStep - Step of left pixel buffer; must be a multiple of 8. + * dstStep - Step of the destination buffer; must be a multiple of 8. + * predMode - Intra chroma prediction mode, please refer to section 3.4.3. + * availability - Neighboring chroma block availability flag, please refer + * to "Neighboring Macroblock Availability". + * + * Output Arguments: + * + * pDst - Pointer to the destination buffer; must be aligned on an 8-byte + * boundary. + * + * Return Value: + * If the function runs without error, it returns OMX_Sts_NoErr. + * If any of the following cases occurs, the function returns + * OMX_Sts_BadArgErr: + * pDst is NULL. + * dstStep < 8 or dstStep is not a multiple of 8. + * leftStep is not a multiple of 8. + * predMode is not in the valid range of enumeration + * OMXVCM4P10IntraChromaPredMode. + * predMode is OMX_VC_CHROMA_VERT, but availability doesn't set + * OMX_VC_UPPER indicating p[x,-1] (x = 0..7) is not available. + * predMode is OMX_VC_CHROMA_HOR, but availability doesn't set OMX_VC_LEFT + * indicating p[-1,y] (y = 0..7) is not available. + * predMode is OMX_VC_CHROMA_PLANE, but availability doesn't set + * OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating + * p[x,-1](x = 0..7), or p[-1,y] (y = 0..7), or p[-1,-1] is not + * available. + * availability sets OMX_VC_UPPER, but pSrcAbove is NULL. + * availability sets OMX_VC_LEFT, but pSrcLeft is NULL. + * availability sets OMX_VC_UPPER_LEFT, but pSrcAboveLeft is NULL. + * either pSrcAbove or pDst is not aligned on a 8-byte boundary. Note: + * pSrcAbove, pSrcAbove, pSrcAboveLeft may be invalid pointer if + * they are not used by intra prediction implied in predMode. + * Note: OMX_VC_UPPER_RIGHT is not used in intra chroma + * prediction. + * + */ +OMXResult omxVCM4P10_PredictIntraChroma_8x8( + const OMX_U8* pSrcLeft, + const OMX_U8 *pSrcAbove, + const OMX_U8 *pSrcAboveLeft, + OMX_U8* pDst, + OMX_INT leftStep, + OMX_INT dstStep, + OMXVCM4P10IntraChromaPredMode predMode, + OMX_S32 availability + ) +{ + int x, y, Sum; + int H, V, a, b, c; + + armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(dstStep < 8, OMX_Sts_BadArgErr); + armRetArgErrIf((dstStep % 8) != 0, OMX_Sts_BadArgErr); + armRetArgErrIf((leftStep % 8) != 0, OMX_Sts_BadArgErr); + armRetArgErrIf(armNot8ByteAligned(pSrcAbove), OMX_Sts_BadArgErr); + armRetArgErrIf(armNot8ByteAligned(pDst), OMX_Sts_BadArgErr); + armRetArgErrIf((availability & OMX_VC_UPPER) && pSrcAbove == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf((availability & OMX_VC_LEFT ) && pSrcLeft == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf((availability & OMX_VC_UPPER_LEFT) && pSrcAboveLeft == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(predMode==OMX_VC_CHROMA_VERT && !(availability & OMX_VC_UPPER), OMX_Sts_BadArgErr); + armRetArgErrIf(predMode==OMX_VC_CHROMA_HOR && !(availability & OMX_VC_LEFT), OMX_Sts_BadArgErr); + armRetArgErrIf(predMode==OMX_VC_CHROMA_PLANE && !(availability & OMX_VC_UPPER), OMX_Sts_BadArgErr); + armRetArgErrIf(predMode==OMX_VC_CHROMA_PLANE && !(availability & OMX_VC_UPPER_LEFT), OMX_Sts_BadArgErr); + armRetArgErrIf(predMode==OMX_VC_CHROMA_PLANE && !(availability & OMX_VC_LEFT), OMX_Sts_BadArgErr); + armRetArgErrIf((unsigned)predMode > OMX_VC_CHROMA_PLANE, OMX_Sts_BadArgErr); + + switch (predMode) + { + case OMX_VC_CHROMA_DC: + armVCM4P10_PredictIntraDC4x4( pSrcLeft, pSrcAbove, pDst, leftStep, dstStep, availability); + armVCM4P10_PredictIntraDCUp4x4( pSrcLeft, pSrcAbove+4, pDst+4, leftStep, dstStep, availability); + armVCM4P10_PredictIntraDCLeft4x4( pSrcLeft+4*leftStep, pSrcAbove, pDst+4*dstStep, leftStep, dstStep, availability); + armVCM4P10_PredictIntraDC4x4( pSrcLeft+4*leftStep, pSrcAbove+4, pDst+4+4*dstStep, leftStep, dstStep, availability); + break; + + case OMX_VC_CHROMA_HOR: + for (y=0; y<8; y++) + { + for (x=0; x<8; x++) + { + pDst[y*dstStep+x] = pSrcLeft[y*leftStep]; + } + } + break; + + case OMX_VC_CHROMA_VERT: + for (y=0; y<8; y++) + { + for (x=0; x<8; x++) + { + pDst[y*dstStep+x] = pSrcAbove[x]; + } + } + break; + + case OMX_VC_CHROMA_PLANE: + H = 4*(pSrcAbove[7] - pSrcAboveLeft[0]); + for (x=2; x>=0; x--) + { + H += (x+1)*(pSrcAbove[4+x] - pSrcAbove[2-x]); + } + V = 4*(pSrcLeft[7*leftStep] - pSrcAboveLeft[0]); + for (y=2; y>=0; y--) + { + V += (y+1)*(pSrcLeft[(4+y)*leftStep] - pSrcLeft[(2-y)*leftStep]); + } + a = 16*(pSrcAbove[7] + pSrcLeft[7*leftStep]); + b = (17*H+16)>>5; + c = (17*V+16)>>5; + for (y=0; y<8; y++) + { + for (x=0; x<8; x++) + { + Sum = (a + b*(x-3) + c*(y-3) + 16)>>5; + pDst[y*dstStep+x] = (OMX_U8)armClip(0,255,Sum); + } + } + break; + } + + return OMX_Sts_NoErr; +} diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_PredictIntra_16x16.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_PredictIntra_16x16.c new file mode 100644 index 0000000000000000000000000000000000000000..c90cb4cdea8404e4f05f564566461e9b3e35572f --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_PredictIntra_16x16.c @@ -0,0 +1,198 @@ +/* ---------------------------------------------------------------- + * + * + * File Name: omxVCM4P10_PredictIntra_16x16.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * H.264 16x16 intra prediction module + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armVC.h" + +/** + * Function: omxVCM4P10_PredictIntra_16x16 (6.3.3.1.2) + * + * Description: + * Perform Intra_16x16 prediction for luma samples. If the upper-right block + * is not available, then duplication work should be handled inside the + * function. Users need not define them outside. + * + * Input Arguments: + * + * pSrcLeft - Pointer to the buffer of 16 left pixels: p[x, y] (x = -1, y = + * 0..15) + * pSrcAbove - Pointer to the buffer of 16 above pixels: p[x,y] (x = 0..15, + * y= -1); must be aligned on a 16-byte boundary. + * pSrcAboveLeft - Pointer to the above left pixels: p[x,y] (x = -1, y = -1) + * leftStep - Step of left pixel buffer; must be a multiple of 16. + * dstStep - Step of the destination buffer; must be a multiple of 16. + * predMode - Intra_16x16 prediction mode, please refer to section 3.4.1. + * availability - Neighboring 16x16 MB availability flag. Refer to + * section 3.4.4. + * + * Output Arguments: + * + * pDst -Pointer to the destination buffer; must be aligned on a 16-byte + * boundary. + * + * Return Value: + * If the function runs without error, it returns OMX_Sts_NoErr. + * If one of the following cases occurs, the function returns + * OMX_Sts_BadArgErr: + * pDst is NULL. + * dstStep < 16. or dstStep is not a multiple of 16. + * leftStep is not a multiple of 16. + * predMode is not in the valid range of enumeration + * OMXVCM4P10Intra16x16PredMode + * predMode is OMX_VC_16X16_VERT, but availability doesn't set + * OMX_VC_UPPER indicating p[x,-1] (x = 0..15) is not available. + * predMode is OMX_VC_16X16_HOR, but availability doesn't set OMX_VC_LEFT + * indicating p[-1,y] (y = 0..15) is not available. + * predMode is OMX_VC_16X16_PLANE, but availability doesn't set + * OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating + * p[x,-1](x = 0..15), or p[-1,y] (y = 0..15), or p[-1,-1] is not + * available. + * availability sets OMX_VC_UPPER, but pSrcAbove is NULL. + * availability sets OMX_VC_LEFT, but pSrcLeft is NULL. + * availability sets OMX_VC_UPPER_LEFT, but pSrcAboveLeft is NULL. + * either pSrcAbove or pDst is not aligned on a 16-byte boundary. + * + * Note: + * pSrcAbove, pSrcAbove, pSrcAboveLeft may be invalid pointers if + * they are not used by intra prediction implied in predMode. + * Note: + * OMX_VC_UPPER_RIGHT is not used in intra_16x16 luma prediction. + * + */ +OMXResult omxVCM4P10_PredictIntra_16x16( + const OMX_U8* pSrcLeft, + const OMX_U8 *pSrcAbove, + const OMX_U8 *pSrcAboveLeft, + OMX_U8* pDst, + OMX_INT leftStep, + OMX_INT dstStep, + OMXVCM4P10Intra16x16PredMode predMode, + OMX_S32 availability) +{ + int x,y,Sum,Count; + int H,V,a,b,c; + + armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(dstStep < 16, OMX_Sts_BadArgErr); + armRetArgErrIf((dstStep % 16) != 0, OMX_Sts_BadArgErr); + armRetArgErrIf((leftStep % 16) != 0, OMX_Sts_BadArgErr); + armRetArgErrIf(armNot16ByteAligned(pSrcAbove), OMX_Sts_BadArgErr); + armRetArgErrIf(armNot16ByteAligned(pDst), OMX_Sts_BadArgErr); + armRetArgErrIf((availability & OMX_VC_UPPER) && pSrcAbove == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf((availability & OMX_VC_LEFT ) && pSrcLeft == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf((availability & OMX_VC_UPPER_LEFT) && pSrcAboveLeft == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(predMode==OMX_VC_16X16_VERT && !(availability & OMX_VC_UPPER), OMX_Sts_BadArgErr); + armRetArgErrIf(predMode==OMX_VC_16X16_HOR && !(availability & OMX_VC_LEFT), OMX_Sts_BadArgErr); + armRetArgErrIf(predMode==OMX_VC_16X16_PLANE && !(availability & OMX_VC_UPPER), OMX_Sts_BadArgErr); + armRetArgErrIf(predMode==OMX_VC_16X16_PLANE && !(availability & OMX_VC_UPPER_LEFT), OMX_Sts_BadArgErr); + armRetArgErrIf(predMode==OMX_VC_16X16_PLANE && !(availability & OMX_VC_LEFT), OMX_Sts_BadArgErr); + armRetArgErrIf((unsigned)predMode > OMX_VC_16X16_PLANE, OMX_Sts_BadArgErr); + + switch (predMode) + { + case OMX_VC_16X16_VERT: + for (y=0; y<16; y++) + { + for (x=0; x<16; x++) + { + pDst[y*dstStep+x] = pSrcAbove[x]; + } + } + break; + + case OMX_VC_16X16_HOR: + for (y=0; y<16; y++) + { + for (x=0; x<16; x++) + { + pDst[y*dstStep+x] = pSrcLeft[y*leftStep]; + } + } + break; + + case OMX_VC_16X16_DC: + /* This can always be used even if no blocks available */ + Sum = 0; + Count = 0; + if (availability & OMX_VC_LEFT) + { + for (y=0; y<16; y++) + { + Sum += pSrcLeft[y*leftStep]; + } + Count++; + } + if (availability & OMX_VC_UPPER) + { + for (x=0; x<16; x++) + { + Sum += pSrcAbove[x]; + } + Count++; + } + if (Count==0) + { + Sum = 128; + } + else if (Count==1) + { + Sum = (Sum + 8) >> 4; + } + else /* Count = 2 */ + { + Sum = (Sum + 16) >> 5; + } + for (y=0; y<16; y++) + { + for (x=0; x<16; x++) + { + pDst[y*dstStep+x] = (OMX_U8)Sum; + } + } + break; + + case OMX_VC_16X16_PLANE: + H = 8*(pSrcAbove[15] - pSrcAboveLeft[0]); + for (x=6; x>=0; x--) + { + H += (x+1)*(pSrcAbove[8+x] - pSrcAbove[6-x]); + } + V = 8*(pSrcLeft[15*leftStep] - pSrcAboveLeft[0]); + for (y=6; y>=0; y--) + { + V += (y+1)*(pSrcLeft[(8+y)*leftStep] - pSrcLeft[(6-y)*leftStep]); + } + a = 16*(pSrcAbove[15] + pSrcLeft[15*leftStep]); + b = (5*H+32)>>6; + c = (5*V+32)>>6; + for (y=0; y<16; y++) + { + for (x=0; x<16; x++) + { + Sum = (a + b*(x-7) + c*(y-7) + 16)>>5; + pDst[y*dstStep+x] = (OMX_U8)armClip(0,255,Sum); + } + } + break; + } + + return OMX_Sts_NoErr; +} + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_PredictIntra_4x4.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_PredictIntra_4x4.c new file mode 100644 index 0000000000000000000000000000000000000000..3fa8212778d5249f4d26a75cd407ee19cb1974a7 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_PredictIntra_4x4.c @@ -0,0 +1,338 @@ +/* ---------------------------------------------------------------- + * + * + * File Name: omxVCM4P10_PredictIntra_4x4.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * H.264 4x4 intra prediction module + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armVC.h" + +/** + * Function: omxVCM4P10_PredictIntra_4x4 (6.3.3.1.1) + * + * Description: + * Perform Intra_4x4 prediction for luma samples. If the upper-right block is + * not available, then duplication work should be handled inside the function. + * Users need not define them outside. + * + * Input Arguments: + * + * pSrcLeft - Pointer to the buffer of 4 left pixels: + * p[x, y] (x = -1, y = 0..3) + * pSrcAbove - Pointer to the buffer of 8 above pixels: + * p[x,y] (x = 0..7, y =-1); + * must be aligned on a 4-byte boundary. + * pSrcAboveLeft - Pointer to the above left pixels: p[x,y] (x = -1, y = -1) + * leftStep - Step of left pixel buffer; must be a multiple of 4. + * dstStep - Step of the destination buffer; must be a multiple of 4. + * predMode - Intra_4x4 prediction mode. + * availability - Neighboring 4x4 block availability flag, refer to + * "Neighboring Macroblock Availability" . + * + * Output Arguments: + * + * pDst - Pointer to the destination buffer; must be aligned on a 4-byte + * boundary. + * + * Return Value: + * If the function runs without error, it returns OMX_Sts_NoErr. + * If one of the following cases occurs, the function returns + * OMX_Sts_BadArgErr: + * pDst is NULL. + * dstStep < 4, or dstStep is not a multiple of 4. + * leftStep is not a multiple of 4. + * predMode is not in the valid range of enumeration + * OMXVCM4P10Intra4x4PredMode. + * predMode is OMX_VC_4x4_VERT, but availability doesn't set OMX_VC_UPPER + * indicating p[x,-1] (x = 0..3) is not available. + * predMode is OMX_VC_4x4_HOR, but availability doesn't set OMX_VC_LEFT + * indicating p[-1,y] (y = 0..3) is not available. + * predMode is OMX_VC_4x4_DIAG_DL, but availability doesn't set + * OMX_VC_UPPER indicating p[x, 1] (x = 0..3) is not available. + * predMode is OMX_VC_4x4_DIAG_DR, but availability doesn't set + * OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating + * p[x,-1] (x = 0..3), or p[-1,y] (y = 0..3) or p[-1,-1] is not + * available. + * predMode is OMX_VC_4x4_VR, but availability doesn't set + * OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating + * p[x,-1] (x = 0..3), or p[-1,y] (y = 0..3) or p[-1,-1] is not + * available. + * predMode is OMX_VC_4x4_HD, but availability doesn't set + * OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating + * p[x,-1] (x = 0..3), or p[-1,y] (y = 0..3) or p[-1,-1] is not + * available. + * predMode is OMX_VC_4x4_VL, but availability doesn't set OMX_VC_UPPER + * indicating p[x,-1] (x = 0..3) is not available. + * predMode is OMX_VC_4x4_HU, but availability doesn't set OMX_VC_LEFT + * indicating p[-1,y] (y = 0..3) is not available. + * availability sets OMX_VC_UPPER, but pSrcAbove is NULL. + * availability sets OMX_VC_LEFT, but pSrcLeft is NULL. + * availability sets OMX_VC_UPPER_LEFT, but pSrcAboveLeft is NULL. + * either pSrcAbove or pDst is not aligned on a 4-byte boundary. + * + * Note: + * pSrcAbove, pSrcAbove, pSrcAboveLeft may be invalid pointers if + * they are not used by intra prediction as implied in predMode. + * + */ + +OMXResult omxVCM4P10_PredictIntra_4x4( + const OMX_U8* pSrcLeft, + const OMX_U8 *pSrcAbove, + const OMX_U8 *pSrcAboveLeft, + OMX_U8* pDst, + OMX_INT leftStep, + OMX_INT dstStep, + OMXVCM4P10Intra4x4PredMode predMode, + OMX_S32 availability + ) +{ + int x, y; + OMX_U8 pTmp[10]; + + armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf((leftStep % 4) != 0, OMX_Sts_BadArgErr); + armRetArgErrIf((dstStep % 4) != 0, OMX_Sts_BadArgErr); + armRetArgErrIf((dstStep < 4), OMX_Sts_BadArgErr); + armRetArgErrIf(armNot4ByteAligned(pSrcAbove), OMX_Sts_BadArgErr); + armRetArgErrIf(armNot4ByteAligned(pDst), OMX_Sts_BadArgErr); + armRetArgErrIf((availability & OMX_VC_UPPER) && pSrcAbove == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf((availability & OMX_VC_LEFT ) && pSrcLeft == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf((availability & OMX_VC_UPPER_LEFT) && pSrcAboveLeft == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(predMode==OMX_VC_4X4_VERT && !(availability & OMX_VC_UPPER), OMX_Sts_BadArgErr); + armRetArgErrIf(predMode==OMX_VC_4X4_HOR && !(availability & OMX_VC_LEFT), OMX_Sts_BadArgErr); + armRetArgErrIf(predMode==OMX_VC_4X4_DIAG_DL && !(availability & OMX_VC_UPPER), OMX_Sts_BadArgErr); + armRetArgErrIf(predMode==OMX_VC_4X4_DIAG_DR && !(availability & OMX_VC_UPPER), OMX_Sts_BadArgErr); + armRetArgErrIf(predMode==OMX_VC_4X4_DIAG_DR && !(availability & OMX_VC_UPPER_LEFT), OMX_Sts_BadArgErr); + armRetArgErrIf(predMode==OMX_VC_4X4_DIAG_DR && !(availability & OMX_VC_LEFT), OMX_Sts_BadArgErr); + armRetArgErrIf(predMode==OMX_VC_4X4_VR && !(availability & OMX_VC_UPPER), OMX_Sts_BadArgErr); + armRetArgErrIf(predMode==OMX_VC_4X4_VR && !(availability & OMX_VC_UPPER_LEFT), OMX_Sts_BadArgErr); + armRetArgErrIf(predMode==OMX_VC_4X4_VR && !(availability & OMX_VC_LEFT), OMX_Sts_BadArgErr); + armRetArgErrIf(predMode==OMX_VC_4X4_HD && !(availability & OMX_VC_UPPER), OMX_Sts_BadArgErr); + armRetArgErrIf(predMode==OMX_VC_4X4_HD && !(availability & OMX_VC_UPPER_LEFT), OMX_Sts_BadArgErr); + armRetArgErrIf(predMode==OMX_VC_4X4_HD && !(availability & OMX_VC_LEFT), OMX_Sts_BadArgErr); + armRetArgErrIf(predMode==OMX_VC_4X4_VL && !(availability & OMX_VC_UPPER), OMX_Sts_BadArgErr); + armRetArgErrIf(predMode==OMX_VC_4X4_HU && !(availability & OMX_VC_LEFT), OMX_Sts_BadArgErr); + armRetArgErrIf((unsigned)predMode > OMX_VC_4X4_HU, OMX_Sts_BadArgErr); + + /* Note: This code must not read the pSrc arrays unless the corresponding + * block is marked as available. If the block is not avaibable then pSrc + * may not be a valid pointer. + * + * Note: To make the code more readable we refer to the neighbouring pixels + * in variables named as below: + * + * UL U0 U1 U2 U3 U4 U5 U6 U7 + * L0 xx xx xx xx + * L1 xx xx xx xx + * L2 xx xx xx xx + * L3 xx xx xx xx + */ + +#define UL pSrcAboveLeft[0] +#define U0 pSrcAbove[0] +#define U1 pSrcAbove[1] +#define U2 pSrcAbove[2] +#define U3 pSrcAbove[3] +#define U4 pSrcAbove[4] +#define U5 pSrcAbove[5] +#define U6 pSrcAbove[6] +#define U7 pSrcAbove[7] +#define L0 pSrcLeft[0*leftStep] +#define L1 pSrcLeft[1*leftStep] +#define L2 pSrcLeft[2*leftStep] +#define L3 pSrcLeft[3*leftStep] + + switch (predMode) + { + case OMX_VC_4X4_VERT: + for (y=0; y<4; y++) + { + pDst[y*dstStep+0] = U0; + pDst[y*dstStep+1] = U1; + pDst[y*dstStep+2] = U2; + pDst[y*dstStep+3] = U3; + } + break; + + case OMX_VC_4X4_HOR: + for (x=0; x<4; x++) + { + pDst[0*dstStep+x] = L0; + pDst[1*dstStep+x] = L1; + pDst[2*dstStep+x] = L2; + pDst[3*dstStep+x] = L3; + } + break; + + case OMX_VC_4X4_DC: + /* This can always be used even if no blocks available */ + armVCM4P10_PredictIntraDC4x4(pSrcLeft, pSrcAbove, pDst, leftStep, dstStep, availability); + break; + + case OMX_VC_4X4_DIAG_DL: + pTmp[0] = (OMX_U8)((U0 + 2*U1 + U2 + 2)>>2); + pTmp[1] = (OMX_U8)((U1 + 2*U2 + U3 + 2)>>2); + if (availability & OMX_VC_UPPER_RIGHT) + { + pTmp[2] = (OMX_U8)((U2 + 2*U3 + U4 + 2)>>2); + pTmp[3] = (OMX_U8)((U3 + 2*U4 + U5 + 2)>>2); + pTmp[4] = (OMX_U8)((U4 + 2*U5 + U6 + 2)>>2); + pTmp[5] = (OMX_U8)((U5 + 2*U6 + U7 + 2)>>2); + pTmp[6] = (OMX_U8)((U6 + 3*U7 + 2)>>2); + } + else + { + pTmp[2] = (OMX_U8)((U2 + 3*U3 + 2)>>2); + pTmp[3] = U3; + pTmp[4] = U3; + pTmp[5] = U3; + pTmp[6] = U3; + } + for (y=0; y<4; y++) + { + for (x=0; x<4; x++) + { + pDst[y*dstStep+x] = pTmp[x+y]; + } + } + break; + + case OMX_VC_4X4_DIAG_DR: + /* x-y = -3, -2, -1, 0, 1, 2, 3 */ + pTmp[0] = (OMX_U8)((L1 + 2*L2 + L3 + 2)>>2); + pTmp[1] = (OMX_U8)((L0 + 2*L1 + L2 + 2)>>2); + pTmp[2] = (OMX_U8)((UL + 2*L0 + L1 + 2)>>2); + pTmp[3] = (OMX_U8)((U0 + 2*UL + L0 + 2)>>2); + pTmp[4] = (OMX_U8)((U1 + 2*U0 + UL + 2)>>2); + pTmp[5] = (OMX_U8)((U2 + 2*U1 + U0 + 2)>>2); + pTmp[6] = (OMX_U8)((U3 + 2*U2 + U1 + 2)>>2); + for (y=0; y<4; y++) + { + for (x=0; x<4; x++) + { + pDst[y*dstStep+x] = pTmp[3+x-y]; + } + } + break; + + case OMX_VC_4X4_VR: + /* zVR=2x-y = -3, -2, -1, 0, 1, 2, 3, 4, 5, 6 + * x-(y>>1) = -1, -1, 0, 0, 1, 1, 2, 2, 3, 3 + * y = 3, 2, ?, ?, ?, ?, ?, ?, 1, 0 + */ + pTmp[0] = (OMX_U8)((L2 + 2*L1 + L0 + 2)>>2); + pTmp[1] = (OMX_U8)((L1 + 2*L0 + UL + 2)>>2); + pTmp[2] = (OMX_U8)((L0 + 2*UL + U0 + 2)>>2); + pTmp[3] = (OMX_U8)((UL + U0 + 1)>>1); + pTmp[4] = (OMX_U8)((UL + 2*U0 + U1 + 2)>>2); + pTmp[5] = (OMX_U8)((U0 + U1 + 1)>>1); + pTmp[6] = (OMX_U8)((U0 + 2*U1 + U2 + 2)>>2); + pTmp[7] = (OMX_U8)((U1 + U2 + 1)>>1); + pTmp[8] = (OMX_U8)((U1 + 2*U2 + U3 + 2)>>2); + pTmp[9] = (OMX_U8)((U2 + U3 + 1)>>1); + for (y=0; y<4; y++) + { + for (x=0; x<4; x++) + { + pDst[y*dstStep+x] = pTmp[3+2*x-y]; + } + } + break; + + case OMX_VC_4X4_HD: + /* zHD=2y-x = -3 -2 -1 0 1 2 3 4 5 6 + * y-(x>>1) = -1 -1 0 0 1 1 2 2 3 3 + * x = 3 2 1 0 + */ + pTmp[0] = (OMX_U8)((U2 + 2*U1 + U0 + 2)>>2); + pTmp[1] = (OMX_U8)((U1 + 2*U0 + UL + 2)>>2); + pTmp[2] = (OMX_U8)((U0 + 2*UL + L0 + 2)>>2); + pTmp[3] = (OMX_U8)((UL + L0 + 1)>>1); + pTmp[4] = (OMX_U8)((UL + 2*L0 + L1 + 2)>>2); + pTmp[5] = (OMX_U8)((L0 + L1 + 1)>>1); + pTmp[6] = (OMX_U8)((L0 + 2*L1 + L2 + 2)>>2); + pTmp[7] = (OMX_U8)((L1 + L2 + 1)>>1); + pTmp[8] = (OMX_U8)((L1 + 2*L2 + L3 + 2)>>2); + pTmp[9] = (OMX_U8)((L2 + L3 + 1)>>1); + for (y=0; y<4; y++) + { + for (x=0; x<4; x++) + { + pDst[y*dstStep+x] = pTmp[3+2*y-x]; + } + } + break; + + case OMX_VC_4X4_VL: + /* Note: x+(y>>1) = (2*x+y)>>1 + * 2x+y = 0 1 2 3 4 5 6 7 8 9 + */ + pTmp[0] = (OMX_U8)((U0 + U1 + 1)>>1); + pTmp[1] = (OMX_U8)((U0 + 2*U1 + U2 + 2)>>2); + pTmp[2] = (OMX_U8)((U1 + U2 + 1)>>1); + pTmp[3] = (OMX_U8)((U1 + 2*U2 + U3 + 2)>>2); + pTmp[4] = (OMX_U8)((U2 + U3 + 1)>>1); + if (availability & OMX_VC_UPPER_RIGHT) + { + pTmp[5] = (OMX_U8)((U2 + 2*U3 + U4 + 2)>>2); + pTmp[6] = (OMX_U8)((U3 + U4 + 1)>>1); + pTmp[7] = (OMX_U8)((U3 + 2*U4 + U5 + 2)>>2); + pTmp[8] = (OMX_U8)((U4 + U5 + 1)>>1); + pTmp[9] = (OMX_U8)((U4 + 2*U5 + U6 + 2)>>2); + } + else + { + pTmp[5] = (OMX_U8)((U2 + 3*U3 + 2)>>2); + pTmp[6] = U3; + pTmp[7] = U3; + pTmp[8] = U3; + pTmp[9] = U3; + } + for (y=0; y<4; y++) + { + for (x=0; x<4; x++) + { + pDst[y*dstStep+x] = pTmp[2*x+y]; + } + } + break; + + case OMX_VC_4X4_HU: + /* zHU = x+2*y */ + pTmp[0] = (OMX_U8)((L0 + L1 + 1)>>1); + pTmp[1] = (OMX_U8)((L0 + 2*L1 + L2 + 2)>>2); + pTmp[2] = (OMX_U8)((L1 + L2 + 1)>>1); + pTmp[3] = (OMX_U8)((L1 + 2*L2 + L3 + 2)>>2); + pTmp[4] = (OMX_U8)((L2 + L3 + 1)>>1); + pTmp[5] = (OMX_U8)((L2 + 3*L3 + 2)>>2); + pTmp[6] = L3; + pTmp[7] = L3; + pTmp[8] = L3; + pTmp[9] = L3; + for (y=0; y<4; y++) + { + for (x=0; x<4; x++) + { + pDst[y*dstStep+x] = pTmp[x+2*y]; + } + } + break; + } + + return OMX_Sts_NoErr; +} diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SADQuar_16x.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SADQuar_16x.c new file mode 100644 index 0000000000000000000000000000000000000000..c8114ee55f9e13fcfc7e28ae59a50c678a11d3c7 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SADQuar_16x.c @@ -0,0 +1,86 @@ +/** + * + * File Name: omxVCM4P10_SADQuar_16x.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * Description: + * This function will calculate SAD of pSrc with average of two Ref blocks + * of 16x16 or 16x8 + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armVC.h" +#include "armCOMM.h" + +/** + * Function: omxVCM4P10_SADQuar_16x (6.3.5.4.4) + * + * Description: + * This function calculates the SAD between one block (pSrc) and the average + * of the other two (pSrcRef0 and pSrcRef1) for 16x16 or 16x8 blocks. + * Rounding is applied according to the convention (a+b+1)>>1. + * + * Input Arguments: + * + * pSrc - Pointer to the original block; must be aligned on a 16-byte + * boundary. + * pSrcRef0 - Pointer to reference block 0 + * pSrcRef1 - Pointer to reference block 1 + * iSrcStep - Step of the original block buffer; must be a multiple of 16 + * iRefStep0 - Step of reference block 0 + * iRefStep1 - Step of reference block 1 + * iHeight - Height of the block; must be equal to either 8 or 16 + * + * Output Arguments: + * + * pDstSAD -Pointer of result SAD + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - iHeight is not equal to either 8 or 16. + * - One of more of the following pointers is NULL: pSrc, pSrcRef0, + * pSrcRef1, pDstSAD. + * - iSrcStep is not a multiple of 16 + * - Any alignment restrictions are violated + * + */ +OMXResult omxVCM4P10_SADQuar_16x( + const OMX_U8* pSrc, + const OMX_U8* pSrcRef0, + const OMX_U8* pSrcRef1, + OMX_U32 iSrcStep, + OMX_U32 iRefStep0, + OMX_U32 iRefStep1, + OMX_U32* pDstSAD, + OMX_U32 iHeight +) +{ + /* check for argument error */ + armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(pSrcRef0 == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(pSrcRef1 == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(pDstSAD == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf((iHeight != 16) && (iHeight != 8), OMX_Sts_BadArgErr) + armRetArgErrIf(armNot16ByteAligned(pSrc), OMX_Sts_BadArgErr) + armRetArgErrIf((iSrcStep == 0) || (iSrcStep & 15), OMX_Sts_BadArgErr) + + + return armVCM4P10_SADQuar + (pSrc, pSrcRef0, pSrcRef1, iSrcStep, + iRefStep0, iRefStep1, pDstSAD, iHeight, 16); +} + +/***************************************************************************** + * END OF FILE + *****************************************************************************/ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SADQuar_4x.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SADQuar_4x.c new file mode 100644 index 0000000000000000000000000000000000000000..4b330ba3e0ce5b1d8c676dcabe94ff83e9e4cdfd --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SADQuar_4x.c @@ -0,0 +1,85 @@ +/** + * + * File Name: omxVCM4P10_SADQuar_4x.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * Description: + * This function will calculate SAD of pSrc with average of two Ref blocks + * of 4x8 or 4x4 blocks + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armVC.h" +#include "armCOMM.h" + +/** + * Function: omxVCM4P10_SADQuar_4x (6.3.5.4.2) + * + * Description: + * This function calculates the SAD between one block (pSrc) and the average + * of the other two (pSrcRef0 and pSrcRef1) for 4x8 or 4x4 blocks. Rounding + * is applied according to the convention (a+b+1)>>1. + * + * Input Arguments: + * + * pSrc - Pointer to the original block; must be aligned on a 4-byte + * boundary. + * pSrcRef0 - Pointer to reference block 0 + * pSrcRef1 - Pointer to reference block 1 + * iSrcStep - Step of the original block buffer; must be a multiple of 4. + * iRefStep0 - Step of reference block 0 + * iRefStep1 - Step of reference block 1 + * iHeight - Height of the block; must be equal to either 4 or 8. + * + * Output Arguments: + * + * pDstSAD - Pointer of result SAD + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - iHeight is not equal to either 4 or 8. + * - One of more of the following pointers is NULL: pSrc, pSrcRef0, + * pSrcRef1, pDstSAD. + * - iSrcStep is not a multiple of 4 + * - Any alignment restrictions are violated + * + */ +OMXResult omxVCM4P10_SADQuar_4x( + const OMX_U8* pSrc, + const OMX_U8* pSrcRef0, + const OMX_U8* pSrcRef1, + OMX_U32 iSrcStep, + OMX_U32 iRefStep0, + OMX_U32 iRefStep1, + OMX_U32* pDstSAD, + OMX_U32 iHeight +) +{ + /* check for argument error */ + armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pSrcRef0 == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pSrcRef1 == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pDstSAD == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf((iHeight != 8) && (iHeight != 4), OMX_Sts_BadArgErr); + armRetArgErrIf(armNot4ByteAligned(pSrc), OMX_Sts_BadArgErr); + armRetArgErrIf((iSrcStep == 0) || (iSrcStep & 3), OMX_Sts_BadArgErr); + + return armVCM4P10_SADQuar + (pSrc, pSrcRef0, pSrcRef1, iSrcStep, + iRefStep0, iRefStep1, pDstSAD, iHeight, 4); +} + +/***************************************************************************** + * END OF FILE + *****************************************************************************/ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SADQuar_8x.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SADQuar_8x.c new file mode 100644 index 0000000000000000000000000000000000000000..c9e9c246df82736223ccd367f997e5639bb54fe8 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SADQuar_8x.c @@ -0,0 +1,87 @@ +/** + * + * File Name: omxVCM4P10_SADQuar_8x.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * Description: + * This function will calculate SAD of pSrc with average of two Ref blocks + * of 8x16 or 8x8 or 8x4 + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armVC.h" +#include "armCOMM.h" + +/** + * Function: omxVCM4P10_SADQuar_8x (6.3.5.4.3) + * + * Description: + * This function calculates the SAD between one block (pSrc) and the average + * of the other two (pSrcRef0 and pSrcRef1) for 8x16, 8x8, or 8x4 blocks. + * Rounding is applied according to the convention (a+b+1)>>1. + * + * Input Arguments: + * + * pSrc - Pointer to the original block; must be aligned on an 8-byte + * boundary. + * pSrcRef0 - Pointer to reference block 0 + * pSrcRef1 - Pointer to reference block 1 + * iSrcStep - Step of the original block buffer; must be a multiple of 8. + * iRefStep0 - Step of reference block 0 + * iRefStep1 - Step of reference block 1 + * iHeight - Height of the block; must be equal either 4, 8, or 16. + * + * Output Arguments: + * + * pDstSAD - Pointer of result SAD + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - iHeight is not equal to either 4, 8, or 16. + * - One of more of the following pointers is NULL: pSrc, pSrcRef0, + * pSrcRef1, pDstSAD. + * - iSrcStep is not a multiple of 8 + * - Any alignment restrictions are violated + * + */ +OMXResult omxVCM4P10_SADQuar_8x( + const OMX_U8* pSrc, + const OMX_U8* pSrcRef0, + const OMX_U8* pSrcRef1, + OMX_U32 iSrcStep, + OMX_U32 iRefStep0, + OMX_U32 iRefStep1, + OMX_U32* pDstSAD, + OMX_U32 iHeight +) +{ + /* check for argument error */ + armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(pSrcRef0 == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(pSrcRef1 == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(pDstSAD == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf((iHeight != 16) && (iHeight != 8) && + (iHeight != 4), OMX_Sts_BadArgErr) + armRetArgErrIf(armNot8ByteAligned(pSrc), OMX_Sts_BadArgErr) + armRetArgErrIf((iSrcStep == 0) || (iSrcStep & 7), OMX_Sts_BadArgErr) + + + return armVCM4P10_SADQuar + (pSrc, pSrcRef0, pSrcRef1, iSrcStep, + iRefStep0, iRefStep1, pDstSAD, iHeight, 8); +} + +/***************************************************************************** + * END OF FILE + *****************************************************************************/ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SAD_4x.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SAD_4x.c new file mode 100644 index 0000000000000000000000000000000000000000..927c454831ff6baa39b2928e3fb995447f00a222 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SAD_4x.c @@ -0,0 +1,77 @@ +/** + * + * File Name: omxVCM4P10_SAD_4x.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * Description: + * This function will calculate SAD for 4x8 and 4x4 blocks + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armVC.h" +#include "armCOMM.h" + +/** + * Function: omxVCM4P10_SAD_4x (6.3.5.4.1) + * + * Description: + * This function calculates the SAD for 4x8 and 4x4 blocks. + * + * Input Arguments: + * + * pSrcOrg -Pointer to the original block; must be aligned on a 4-byte + * boundary. + * iStepOrg -Step of the original block buffer; must be a multiple of 4. + * pSrcRef -Pointer to the reference block + * iStepRef -Step of the reference block buffer + * iHeight -Height of the block; must be equal to either 4 or 8. + * + * Output Arguments: + * + * pDstSAD -Pointer of result SAD + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - One of more of the following pointers is NULL: + * pSrcOrg, pSrcRef, or pDstSAD + * - iHeight is not equal to either 4 or 8. + * - iStepOrg is not a multiple of 4 + * - Any alignment restrictions are violated + * + */ +OMXResult omxVCM4P10_SAD_4x( + const OMX_U8* pSrcOrg, + OMX_U32 iStepOrg, + const OMX_U8* pSrcRef, + OMX_U32 iStepRef, + OMX_S32* pDstSAD, + OMX_U32 iHeight +) +{ + /* check for argument error */ + armRetArgErrIf(pSrcOrg == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(pSrcRef == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(pDstSAD == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf((iHeight != 8) && (iHeight != 4), OMX_Sts_BadArgErr) + armRetArgErrIf(armNot4ByteAligned(pSrcOrg), OMX_Sts_BadArgErr) + armRetArgErrIf((iStepOrg == 0) || (iStepOrg & 3), OMX_Sts_BadArgErr) + armRetArgErrIf((iStepRef == 0) || (iStepRef & 3), OMX_Sts_BadArgErr) + + return armVCCOMM_SAD + (pSrcOrg, iStepOrg, pSrcRef, iStepRef, pDstSAD, iHeight, 4); +} + +/***************************************************************************** + * END OF FILE + *****************************************************************************/ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SATD_4x4.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SATD_4x4.c new file mode 100644 index 0000000000000000000000000000000000000000..a91ae660ef180d6ab128343e9213d0de4dc8eb4a --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SATD_4x4.c @@ -0,0 +1,132 @@ +/** + * + * File Name: omxVCM4P10_SATD_4x4.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * Description: + * This function will calculate SAD for 4x4 blocks + * + */ +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" + +/** + * Function: omxVCM4P10_SATD_4x4 (6.3.5.4.5) + * + * Description: + * This function calculates the sum of absolute transform differences (SATD) + * for a 4x4 block by applying a Hadamard transform to the difference block + * and then calculating the sum of absolute coefficient values. + * + * Input Arguments: + * + * pSrcOrg - Pointer to the original block; must be aligned on a 4-byte + * boundary + * iStepOrg - Step of the original block buffer; must be a multiple of 4 + * pSrcRef - Pointer to the reference block; must be aligned on a 4-byte + * boundary + * iStepRef - Step of the reference block buffer; must be a multiple of 4 + * + * Output Arguments: + * + * pDstSAD - pointer to the resulting SAD + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: + * pSrcOrg, pSrcRef, or pDstSAD either pSrcOrg + * - pSrcRef is not aligned on a 4-byte boundary + * - iStepOrg <= 0 or iStepOrg is not a multiple of 4 + * - iStepRef <= 0 or iStepRef is not a multiple of 4 + * + */ +OMXResult omxVCM4P10_SATD_4x4( + const OMX_U8* pSrcOrg, + OMX_U32 iStepOrg, + const OMX_U8* pSrcRef, + OMX_U32 iStepRef, + OMX_U32* pDstSAD +) +{ + OMX_INT i, j; + OMX_S32 SATD = 0; + OMX_S32 d [4][4], m1[4][4], m2[4][4]; + + /* check for argument error */ + armRetArgErrIf(pSrcOrg == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(pSrcRef == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(pDstSAD == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf((iStepOrg == 0) || (iStepOrg & 3), OMX_Sts_BadArgErr) + armRetArgErrIf((iStepRef == 0) || (iStepRef & 3), OMX_Sts_BadArgErr) + armRetArgErrIf(armNot4ByteAligned(pSrcOrg), OMX_Sts_BadArgErr) + armRetArgErrIf(armNot4ByteAligned(pSrcRef), OMX_Sts_BadArgErr) + + /* Calculate the difference */ + for (j = 0; j < 4; j++) + { + for (i = 0; i < 4; i++) + { + d [j][i] = pSrcOrg [j * iStepOrg + i] - pSrcRef [j * iStepRef + i]; + } + } + + /* Hadamard Transfor for 4x4 block */ + + /* Horizontal */ + for (i = 0; i < 4; i++) + { + m1[i][0] = d[i][0] + d[i][2]; /* a+c */ + m1[i][1] = d[i][1] + d[i][3]; /* b+d */ + m1[i][2] = d[i][0] - d[i][2]; /* a-c */ + m1[i][3] = d[i][1] - d[i][3]; /* b-d */ + + m2[i][0] = m1[i][0] + m1[i][1]; /* a+b+c+d */ + m2[i][1] = m1[i][2] + m1[i][3]; /* a+b-c-d */ + m2[i][2] = m1[i][2] - m1[i][3]; /* a-b-c+d */ + m2[i][3] = m1[i][0] - m1[i][1]; /* a-b+c-d */ + + } + + /* Vertical */ + for (i = 0; i < 4; i++) + { + m1[0][i] = m2[0][i] + m2[2][i]; + m1[1][i] = m2[1][i] + m2[3][i]; + m1[2][i] = m2[0][i] - m2[2][i]; + m1[3][i] = m2[1][i] - m2[3][i]; + + m2[0][i] = m1[0][i] + m1[1][i]; + m2[1][i] = m1[2][i] + m1[3][i]; + m2[2][i] = m1[2][i] - m1[3][i]; + m2[3][i] = m1[0][i] - m1[1][i]; + } + + /* calculate SAD for Transformed coefficients */ + for (j = 0; j < 4; j++) + { + for (i = 0; i < 4; i++) + { + SATD += armAbs(m2 [j][i]); + } + } + + *pDstSAD = (SATD + 1) / 2; + + return OMX_Sts_NoErr; +} + +/***************************************************************************** + * END OF FILE + *****************************************************************************/ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SubAndTransformQDQResidual.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SubAndTransformQDQResidual.c new file mode 100644 index 0000000000000000000000000000000000000000..23a5662e20957354784bf33aca9b5a84f76b9d59 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SubAndTransformQDQResidual.c @@ -0,0 +1,220 @@ +/** + * + * File Name: omxVCM4P10_SubAndTransformQDQResidual.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * Description: + * This function will calculate SAD for 4x4 blocks + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armVC.h" + +/** + * Function: omxVCM4P10_SubAndTransformQDQResidual (6.3.5.8.1) + * + * Description: + * This function subtracts the prediction signal from the original signal to + * produce the difference signal and then performs a 4x4 integer transform and + * quantization. The quantized transformed coefficients are stored as + * pDstQuantCoeff. This function can also output dequantized coefficients or + * unquantized DC coefficients optionally by setting the pointers + * pDstDeQuantCoeff, pDCCoeff. + * + * Input Arguments: + * + * pSrcOrg - Pointer to original signal. 4-byte alignment required. + * pSrcPred - Pointer to prediction signal. 4-byte alignment required. + * iSrcOrgStep - Step of the original signal buffer; must be a multiple of + * 4. + * iSrcPredStep - Step of the prediction signal buffer; must be a multiple + * of 4. + * pNumCoeff -Number of non-zero coefficients after quantization. If this + * parameter is not required, it is set to NULL. + * nThreshSAD - Zero-block early detection threshold. If this parameter is + * not required, it is set to 0. + * iQP - Quantization parameter; must be in the range [0,51]. + * bIntra - Indicates whether this is an INTRA block, either 1-INTRA or + * 0-INTER + * + * Output Arguments: + * + * pDstQuantCoeff - Pointer to the quantized transformed coefficients. + * 8-byte alignment required. + * pDstDeQuantCoeff - Pointer to the dequantized transformed coefficients + * if this parameter is not equal to NULL. 8-byte alignment + * required. + * pDCCoeff - Pointer to the unquantized DC coefficient if this parameter + * is not equal to NULL. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: + * pSrcOrg, pSrcPred, pNumCoeff, pDstQuantCoeff, + * pDstDeQuantCoeff, pDCCoeff + * - pSrcOrg is not aligned on a 4-byte boundary + * - pSrcPred is not aligned on a 4-byte boundary + * - iSrcOrgStep is not a multiple of 4 + * - iSrcPredStep is not a multiple of 4 + * - pDstQuantCoeff or pDstDeQuantCoeff is not aligned on an 8-byte boundary + * + */ + OMXResult omxVCM4P10_SubAndTransformQDQResidual ( + const OMX_U8* pSrcOrg, + const OMX_U8* pSrcPred, + OMX_U32 iSrcOrgStep, + OMX_U32 iSrcPredStep, + OMX_S16* pDstQuantCoeff, + OMX_S16* pDstDeQuantCoeff, + OMX_S16* pDCCoeff, + OMX_S8* pNumCoeff, + OMX_U32 nThreshSAD, + OMX_U32 iQP, + OMX_U8 bIntra +) +{ + OMX_INT i, j; + OMX_S8 NumCoeff = 0; + OMX_S16 Buf[16], m[16]; + OMX_U32 QBits, QPper, QPmod, f; + OMX_S32 Value, MF, ThreshDC; + + /* check for argument error */ + armRetArgErrIf(pSrcOrg == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(pDstDeQuantCoeff == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(pNumCoeff == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(pDCCoeff == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(armNot4ByteAligned(pSrcOrg), OMX_Sts_BadArgErr) + armRetArgErrIf(pSrcPred == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(armNot4ByteAligned(pSrcPred), OMX_Sts_BadArgErr) + armRetArgErrIf(pDstQuantCoeff == NULL, OMX_Sts_BadArgErr) + armRetArgErrIf(armNot8ByteAligned(pDstQuantCoeff), OMX_Sts_BadArgErr) + armRetArgErrIf((pDstDeQuantCoeff != NULL) && + armNot8ByteAligned(pDstDeQuantCoeff), OMX_Sts_BadArgErr) + armRetArgErrIf((bIntra != 0) && (bIntra != 1), OMX_Sts_BadArgErr) + armRetArgErrIf(iQP > 51, OMX_Sts_BadArgErr) + armRetArgErrIf(iSrcOrgStep == 0, OMX_Sts_BadArgErr) + armRetArgErrIf(iSrcPredStep == 0, OMX_Sts_BadArgErr) + armRetArgErrIf(iSrcOrgStep & 3, OMX_Sts_BadArgErr) + armRetArgErrIf(iSrcPredStep & 3, OMX_Sts_BadArgErr) + + /* + * Zero-Block Early detection using nThreshSAD param + */ + + QPper = iQP / 6; + QPmod = iQP % 6; + QBits = 15 + QPper; + + f = (1 << QBits) / (bIntra ? 3 : 6); + + /* Do Zero-Block Early detection if enabled */ + if (nThreshSAD) + { + ThreshDC = ((1 << QBits) - f) / armVCM4P10_MFMatrix[QPmod][0]; + if (nThreshSAD < ThreshDC) + { + /* Set block to zero */ + if (pDCCoeff != NULL) + { + *pDCCoeff = 0; + } + + for (j = 0; j < 4; j++) + { + for (i = 0; i < 4; i++) + { + pDstQuantCoeff [4 * j + i] = 0; + if (pDstDeQuantCoeff != NULL) + { + pDstDeQuantCoeff [4 * j + i] = 0; + } + } + } + + if (pNumCoeff != NULL) + { + *pNumCoeff = 0; + } + return OMX_Sts_NoErr; + } + } + + + /* Calculate difference */ + for (j = 0; j < 4; j++) + { + for (i = 0; i < 4; i++) + { + Buf [j * 4 + i] = + pSrcOrg [j * iSrcOrgStep + i] - pSrcPred [j * iSrcPredStep + i]; + } + } + + /* Residual Transform */ + armVCM4P10_FwdTransformResidual4x4 (m, Buf); + + if (pDCCoeff != NULL) + { + /* Copy unquantized DC value into pointer */ + *pDCCoeff = m[0]; + } + + /* Quantization */ + for (j = 0; j < 4; j++) + { + for (i = 0; i < 4; i++) + { + MF = armVCM4P10_MFMatrix[QPmod][armVCM4P10_PosToVCol4x4[j * 4 + i]]; + Value = armAbs(m[j * 4 + i]) * MF + f; + Value >>= QBits; + Value = m[j * 4 + i] < 0 ? -Value : Value; + Buf[4 * j + i] = pDstQuantCoeff [4 * j + i] = (OMX_S16)Value; + if ((pNumCoeff != NULL) && Value) + { + NumCoeff++; + } + } + } + + /* Output number of non-zero Coeffs */ + if (pNumCoeff != NULL) + { + *pNumCoeff = NumCoeff; + } + + /* Residual Inv Transform */ + if (pDstDeQuantCoeff != NULL) + { + /* Re Scale */ + for (j = 0; j < 4; j++) + { + for (i = 0; i < 4; i++) + { + m [j * 4 + i] = Buf [j * 4 + i] * (1 << QPper) * + armVCM4P10_VMatrix[QPmod][armVCM4P10_PosToVCol4x4[j * 4 + i]]; + } + } + armVCM4P10_TransformResidual4x4 (pDstDeQuantCoeff, m); + } + + return OMX_Sts_NoErr; +} + +/***************************************************************************** + * END OF FILE + *****************************************************************************/ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair.c new file mode 100644 index 0000000000000000000000000000000000000000..9ad0e813f84da21949a207a9288d72b6f6a1b5dc --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair.c @@ -0,0 +1,131 @@ +/* ---------------------------------------------------------------- + * + * + * File Name: omxVCM4P10_TransformDequantChromaDCFromPair.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * H.264 inverse quantize and transform module + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armVC.h" + +/* + * Description: + * Dequantize Chroma 2x2 DC block + */ + +static void DequantChromaDC2x2( + OMX_S16* pDst, + OMX_INT QP +) +{ + int Shift = (QP/6)-1 ; + int Scale = armVCM4P10_VMatrix[QP%6][0]; + int i, Value; + + if (Shift >= 0) + { + for (i=0; i<4; i++) + { + Value = (pDst[i] * Scale) << Shift; + pDst[i] = (OMX_S16)Value; + } + } + else + { + for (i=0; i<4; i++) + { + Value = (pDst[i] * Scale) >> 1; + pDst[i] = (OMX_S16)Value; + } + } +} + + +/* + * Description: + * Inverse Transform DC 2x2 Coefficients + */ + +static void InvTransformDC2x2(OMX_S16* pData) +{ + int c00 = pData[0]; + int c01 = pData[1]; + int c10 = pData[2]; + int c11 = pData[3]; + + int d00 = c00 + c01; + int d01 = c00 - c01; + int d10 = c10 + c11; + int d11 = c10 - c11; + + pData[0] = (OMX_S16)(d00 + d10); + pData[1] = (OMX_S16)(d01 + d11); + pData[2] = (OMX_S16)(d00 - d10); + pData[3] = (OMX_S16)(d01 - d11); +} + + +/** + * Function: omxVCM4P10_TransformDequantChromaDCFromPair (6.3.4.2.2) + * + * Description: + * Reconstruct the 2x2 ChromaDC block from coefficient-position pair buffer, + * perform integer inverse transformation, and dequantization for 2x2 chroma + * DC coefficients, and update the pair buffer pointer to next non-empty + * block. + * + * Input Arguments: + * + * ppSrc - Double pointer to residual coefficient-position pair buffer + * output by CALVC decoding + * QP - Quantization parameter QpC + * + * Output Arguments: + * + * ppSrc - *ppSrc is updated to the start of next non empty block + * pDst - Pointer to the reconstructed 2x2 ChromaDC coefficients buffer; + * must be aligned on a 4-byte boundary. + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - ppSrc or pDst is NULL. + * - pDst is not 4-byte aligned. + * - QP is not in the range of [0-51]. + * + */ + +OMXResult omxVCM4P10_TransformDequantChromaDCFromPair( + const OMX_U8 **ppSrc, + OMX_S16* pDst, + OMX_INT QP + ) +{ + armRetArgErrIf(ppSrc == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(*ppSrc == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(armNot4ByteAligned(pDst), OMX_Sts_BadArgErr); + armRetArgErrIf(QP<0, OMX_Sts_BadArgErr); + armRetArgErrIf(QP>51, OMX_Sts_BadArgErr); + + armVCM4P10_UnpackBlock2x2(ppSrc, pDst); + InvTransformDC2x2(pDst); + DequantChromaDC2x2(pDst, QP); + + return OMX_Sts_NoErr; +} + +/* End of file */ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair.c new file mode 100644 index 0000000000000000000000000000000000000000..16c8be1b08722f062391ffdc2d56ea13021cb68b --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair.c @@ -0,0 +1,148 @@ +/* ---------------------------------------------------------------- + * + * + * File Name: omxVCM4P10_TransformDequantLumaDCFromPair.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * H.264 inverse quantize and transform module + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armVC.h" + +/* + * Description: + * Dequantize Luma DC block + */ + +static void DequantLumaDC4x4( + OMX_S16* pDst, + OMX_INT QP +) +{ + int Shift = (QP/6)-2 ; + int Scale = armVCM4P10_VMatrix[QP%6][0]; + int i, Round, Value; + + if (Shift >= 0) + { + for (i=0; i<16; i++) + { + Value = (pDst[i] * Scale) << Shift; + pDst[i] = (OMX_S16)Value; + } + } + else + { + Shift = -Shift;; + Round = 1<<(Shift-1); + + for (i=0; i<16; i++) + { + Value = (pDst[i] * Scale + Round) >> Shift; + pDst[i] = (OMX_S16)Value; + } + } +} + + + +/* + * Description: + * Inverse Transform DC 4x4 Coefficients + */ +static void InvTransformDC4x4(OMX_S16* pData) +{ + int i; + + /* Transform rows */ + for (i=0; i<16; i+=4) + { + int c0 = pData[i+0]; + int c1 = pData[i+1]; + int c2 = pData[i+2]; + int c3 = pData[i+3]; + pData[i+0] = (OMX_S16)(c0+c1+c2+c3); + pData[i+1] = (OMX_S16)(c0+c1-c2-c3); + pData[i+2] = (OMX_S16)(c0-c1-c2+c3); + pData[i+3] = (OMX_S16)(c0-c1+c2-c3); + } + + /* Transform columns */ + for (i=0; i<4; i++) + { + int c0 = pData[i+0]; + int c1 = pData[i+4]; + int c2 = pData[i+8]; + int c3 = pData[i+12]; + pData[i+0] = (OMX_S16)(c0+c1+c2+c3); + pData[i+4] = (OMX_S16)(c0+c1-c2-c3); + pData[i+8] = (OMX_S16)(c0-c1-c2+c3); + pData[i+12] = (OMX_S16)(c0-c1+c2-c3); + } +} + + +/** + * Function: omxVCM4P10_TransformDequantLumaDCFromPair (6.3.4.2.1) + * + * Description: + * Reconstructs the 4x4 LumaDC block from the coefficient-position pair + * buffer, performs integer inverse, and dequantization for 4x4 LumaDC + * coefficients, and updates the pair buffer pointer to the next non-empty + * block. + * + * Input Arguments: + * + * ppSrc - Double pointer to residual coefficient-position pair buffer + * output by CALVC decoding + * QP - Quantization parameter QpY + * + * Output Arguments: + * + * ppSrc - *ppSrc is updated to the start of next non empty block + * pDst - Pointer to the reconstructed 4x4 LumaDC coefficients buffer; must + * be aligned on a 8-byte boundary. + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - ppSrc or pDst is NULL. + * - pDst is not 8 byte aligned. + * - QP is not in the range of [0-51]. + * + */ + +OMXResult omxVCM4P10_TransformDequantLumaDCFromPair( + const OMX_U8 **ppSrc, + OMX_S16* pDst, + OMX_INT QP + ) +{ + armRetArgErrIf(ppSrc == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(*ppSrc == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(armNot8ByteAligned(pDst), OMX_Sts_BadArgErr); + armRetArgErrIf(QP<0, OMX_Sts_BadArgErr); + armRetArgErrIf(QP>51, OMX_Sts_BadArgErr); + + armVCM4P10_UnpackBlock4x4(ppSrc, pDst); + /*InvTransformDequantLumaDC4x4(pDst, QP);*/ + InvTransformDC4x4(pDst); + DequantLumaDC4x4(pDst, QP); + + return OMX_Sts_NoErr; +} + +/* End of file */ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_TransformQuant_ChromaDC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_TransformQuant_ChromaDC.c new file mode 100644 index 0000000000000000000000000000000000000000..b5544dd0caaad77581fb9b743b9967fe4d251c84 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_TransformQuant_ChromaDC.c @@ -0,0 +1,97 @@ +/** + * + * File Name: omxVCM4P10_TransformQuant_ChromaDC.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * Description: + * This function will calculate 4x4 hadamard transform of chroma DC + * coefficients and quantization + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armVC.h" +#include "armCOMM.h" + +/** + * Function: omxVCM4P10_TransformQuant_ChromaDC (6.3.5.6.1) + * + * Description: + * This function performs 2x2 Hadamard transform of chroma DC coefficients + * and then quantizes the coefficients. + * + * Input Arguments: + * + * pSrcDst - Pointer to the 2x2 array of chroma DC coefficients. 8-byte + * alignment required. + * iQP - Quantization parameter; must be in the range [0,51]. + * bIntra - Indicate whether this is an INTRA block. 1-INTRA, 0-INTER + * + * Output Arguments: + * + * pSrcDst - Pointer to transformed and quantized coefficients. 8-byte + * alignment required. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: + * pSrcDst + * - pSrcDst is not aligned on an 8-byte boundary + * + */ +OMXResult omxVCM4P10_TransformQuant_ChromaDC( + OMX_S16* pSrcDst, + OMX_U32 iQP, + OMX_U8 bIntra +) +{ + OMX_INT i, j; + OMX_S32 m[2][2]; + OMX_S32 Value; + OMX_S32 QbitsPlusOne, Two_f, MF00; + + /* Check for argument error */ + armRetArgErrIf(pSrcDst == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(armNot8ByteAligned(pSrcDst), OMX_Sts_BadArgErr); + armRetArgErrIf(iQP > 51, OMX_Sts_BadArgErr); + + /* Hadamard Transform for 2x2 block */ + m[0][0] = pSrcDst[0] + pSrcDst[1] + pSrcDst[2] + pSrcDst[3]; + m[0][1] = pSrcDst[0] - pSrcDst[1] + pSrcDst[2] - pSrcDst[3]; + m[1][0] = pSrcDst[0] + pSrcDst[1] - pSrcDst[2] - pSrcDst[3]; + m[1][1] = pSrcDst[0] - pSrcDst[1] - pSrcDst[2] + pSrcDst[3]; + + /* Quantization */ + QbitsPlusOne = ARM_M4P10_Q_OFFSET + 1 + (iQP / 6); /*floor (QP/6)*/ + MF00 = armVCM4P10_MFMatrix [iQP % 6][0]; + + Two_f = (1 << QbitsPlusOne) / (bIntra ? 3 : 6); /* 3->INTRA, 6->INTER */ + + /* Scaling */ + for (j = 0; j < 2; j++) + { + for (i = 0; i < 2; i++) + { + Value = (armAbs(m[j][i]) * MF00 + Two_f) >> QbitsPlusOne; + pSrcDst[j * 2 + i] = (OMX_S16)((m[j][i] < 0) ? -Value : Value); + } + } + + return OMX_Sts_NoErr; +} + +/***************************************************************************** + * END OF FILE + *****************************************************************************/ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_TransformQuant_LumaDC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_TransformQuant_LumaDC.c new file mode 100644 index 0000000000000000000000000000000000000000..2ccf7f0597c535fb57127218f01be254a17cbfe9 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_TransformQuant_LumaDC.c @@ -0,0 +1,119 @@ +/** + * + * File Name: omxVCM4P10_TransformQuant_LumaDC.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * Description: + * This function will calculate 4x4 hadamard transform of luma DC coefficients + * and quantization + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armVC.h" + +/** + * Function: omxVCM4P10_TransformQuant_LumaDC (6.3.5.6.2) + * + * Description: + * This function performs a 4x4 Hadamard transform of luma DC coefficients + * and then quantizes the coefficients. + * + * Input Arguments: + * + * pSrcDst - Pointer to the 4x4 array of luma DC coefficients. 16-byte + * alignment required. + * iQP - Quantization parameter; must be in the range [0,51]. + * + * Output Arguments: + * + * pSrcDst - Pointer to transformed and quantized coefficients. 16-byte + * alignment required. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: pSrcDst + * - pSrcDst is not aligned on an 16-byte boundary + * + */ +OMXResult omxVCM4P10_TransformQuant_LumaDC( + OMX_S16* pSrcDst, + OMX_U32 iQP +) +{ + OMX_INT i, j; + OMX_S32 m1[4][4], m2[4][4]; + OMX_S32 Value; + OMX_U32 QbitsPlusOne, Two_f, MF; + + /* Check for argument error */ + armRetArgErrIf(pSrcDst == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(armNot16ByteAligned(pSrcDst), OMX_Sts_BadArgErr); + armRetArgErrIf(iQP > 51, OMX_Sts_BadArgErr); + + /* Hadamard Transform for 4x4 block */ + /* Horizontal Hadamard */ + for (i = 0; i < 4; i++) + { + j = i * 4; + + m1[i][0] = pSrcDst[j + 0] + pSrcDst[j + 2]; /* a+c */ + m1[i][1] = pSrcDst[j + 1] + pSrcDst[j + 3]; /* b+d */ + m1[i][2] = pSrcDst[j + 0] - pSrcDst[j + 2]; /* a-c */ + m1[i][3] = pSrcDst[j + 1] - pSrcDst[j + 3]; /* b-d */ + + m2[i][0] = m1[i][0] + m1[i][1]; /* a+b+c+d */ + m2[i][1] = m1[i][2] + m1[i][3]; /* a+b-c-d */ + m2[i][2] = m1[i][2] - m1[i][3]; /* a-b-c+d */ + m2[i][3] = m1[i][0] - m1[i][1]; /* a-b+c-d */ + + } + + /* Vertical */ + for (i = 0; i < 4; i++) + { + m1[0][i] = m2[0][i] + m2[2][i]; + m1[1][i] = m2[1][i] + m2[3][i]; + m1[2][i] = m2[0][i] - m2[2][i]; + m1[3][i] = m2[1][i] - m2[3][i]; + + m2[0][i] = m1[0][i] + m1[1][i]; + m2[1][i] = m1[2][i] + m1[3][i]; + m2[2][i] = m1[2][i] - m1[3][i]; + m2[3][i] = m1[0][i] - m1[1][i]; + } + + + /* Quantization */ + QbitsPlusOne = ARM_M4P10_Q_OFFSET + 1 + (iQP / 6); /*floor (QP/6)*/ + Two_f = (1 << QbitsPlusOne) / 3; /* 3->INTRA, 6->INTER */ + MF = armVCM4P10_MFMatrix [iQP % 6][0]; + + /* Scaling */ + for (j = 0; j < 4; j++) + { + for (i = 0; i < 4; i++) + { + Value = (armAbs((m2[j][i]/* + 1*/) / 2) * MF + Two_f) >> QbitsPlusOne; + pSrcDst[j * 4 + i] = (OMX_S16)((m2[j][i] < 0) ? -Value : Value); + } + } + return OMX_Sts_NoErr; +} + +/***************************************************************************** + * END OF FILE + *****************************************************************************/ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/api/armVCM4P2_DCT_Table.h b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/api/armVCM4P2_DCT_Table.h new file mode 100644 index 0000000000000000000000000000000000000000..3255b61beacff199a8bc0c8e4e86ee0367e1b2a5 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/api/armVCM4P2_DCT_Table.h @@ -0,0 +1,30 @@ +/** + * + * File Name: armVCM4P2_DCT_Table.h + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * + * File: armVCM4P2_DCT_Table.h + * Description: Declares Tables used for DCT/IDCT module + * in MP4P2 codec. + * + */ + +#ifndef _OMXDCTTAB_H_ +#define _OMXDCTTAB_H_ + +extern const OMX_F64 armVCM4P2_preCalcDCTCos[8][8]; + +#endif /* _OMXDCTTAB_H_ */ + + +/* End of file */ + + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h new file mode 100644 index 0000000000000000000000000000000000000000..92ecc0573d5acc977eff446ad903a717ef58dc44 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h @@ -0,0 +1,42 @@ +/** + * + * File Name: armVCM4P2_Huff_Tables_VLC.h + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * + * File: armVCM4P2_Huff_Tables.h + * Description: Declares Tables used for Hufffman coding and decoding + * in MP4P2 codec. + * + */ + +#ifndef _OMXHUFFTAB_H_ +#define _OMXHUFFTAB_H_ + +extern const OMX_U8 armVCM4P2_IntraL0RunIdx[11]; +extern const ARM_VLC32 armVCM4P2_IntraVlcL0[68]; +extern const OMX_U8 armVCM4P2_IntraL1RunIdx[7]; +extern const ARM_VLC32 armVCM4P2_IntraVlcL1[36]; +extern const OMX_U8 armVCM4P2_IntraL0LMAX[15]; +extern const OMX_U8 armVCM4P2_IntraL1LMAX[21]; +extern const OMX_U8 armVCM4P2_IntraL0RMAX[27]; +extern const OMX_U8 armVCM4P2_IntraL1RMAX[8]; +extern const OMX_U8 armVCM4P2_InterL0RunIdx[12]; +extern const ARM_VLC32 armVCM4P2_InterVlcL0[59]; +extern const OMX_U8 armVCM4P2_InterL1RunIdx[3]; +extern const ARM_VLC32 armVCM4P2_InterVlcL1[45]; +extern const OMX_U8 armVCM4P2_InterL0LMAX[27]; +extern const OMX_U8 armVCM4P2_InterL1LMAX[41]; +extern const OMX_U8 armVCM4P2_InterL0RMAX[12]; +extern const OMX_U8 armVCM4P2_InterL1RMAX[3]; +extern const ARM_VLC32 armVCM4P2_aIntraDCLumaIndex[14]; +extern const ARM_VLC32 armVCM4P2_aIntraDCChromaIndex[14]; +extern const ARM_VLC32 armVCM4P2_aVlcMVD[66]; + +#endif /* _OMXHUFFTAB_H_ */ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/api/armVCM4P2_ZigZag_Tables.h b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/api/armVCM4P2_ZigZag_Tables.h new file mode 100644 index 0000000000000000000000000000000000000000..c75ed8910ecdc5c9d437a6bb5b23ba5efb677886 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/api/armVCM4P2_ZigZag_Tables.h @@ -0,0 +1,25 @@ +/** + * + * File Name: armVCM4P2_ZigZag_Tables.h + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * + * File: armVCM4P2_Zigzag_Tables.h + * Description: Declares Tables used for Zigzag scan in MP4P2 codec. + * + */ + +#ifndef _OMXZIGZAGTAB_H_ +#define _OMXZIGZAGTAB_H_ + +extern const OMX_U8 armVCM4P2_aClassicalZigzagScan [64]; +extern const OMX_U8 armVCM4P2_aHorizontalZigzagScan [64]; +extern const OMX_U8 armVCM4P2_aVerticalZigzagScan [64]; + +#endif /* _OMXZIGZAGTAB_H_ */ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_ACDCPredict.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_ACDCPredict.c new file mode 100644 index 0000000000000000000000000000000000000000..b6a396a665cdf1541fcf8a383aaed23905375182 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_ACDCPredict.c @@ -0,0 +1,253 @@ +/** + * + * File Name: armVCM4P2_ACDCPredict.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Contains module for DC/AC coefficient prediction + * + */ + +#include "omxtypes.h" +#include "armOMX.h" + +#include "armVC.h" +#include "armCOMM.h" + +/** + * Function: armVCM4P2_ACDCPredict + * + * Description: + * Performs adaptive DC/AC coefficient prediction for an intra block. Prior + * to the function call, prediction direction (predDir) should be selected + * as specified in subclause 7.4.3.1 of ISO/IEC 14496-2. + * + * Remarks: + * + * Parameters: + * [in] pSrcDst pointer to the coefficient buffer which contains + * the quantized coefficient residuals (PQF) of the + * current block + * [in] pPredBufRow pointer to the coefficient row buffer + * [in] pPredBufCol pointer to the coefficient column buffer + * [in] curQP quantization parameter of the current block. curQP + * may equal to predQP especially when the current + * block and the predictor block are in the same + * macroblock. + * [in] predQP quantization parameter of the predictor block + * [in] predDir indicates the prediction direction which takes one + * of the following values: + * OMX_VC_HORIZONTAL predict horizontally + * OMX_VC_VERTICAL predict vertically + * [in] ACPredFlag a flag indicating if AC prediction should be + * performed. It is equal to ac_pred_flag in the bit + * stream syntax of MPEG-4 + * [in] videoComp video component type (luminance, chrominance or + * alpha) of the current block + * [in] flag This flag defines the if one wants to use this functions to + * calculate PQF (set 1, prediction) or QF (set 0, reconstruction) + * [out] pPreACPredict pointer to the predicted coefficients buffer. + * Filled ONLY if it is not NULL + * [out] pSrcDst pointer to the coefficient buffer which contains + * the quantized coefficients (QF) of the current + * block + * [out] pPredBufRow pointer to the updated coefficient row buffer + * [out] pPredBufCol pointer to the updated coefficient column buffer + * [out] pSumErr pointer to the updated sum of the difference + * between predicted and unpredicted coefficients + * If this is NULL, do not update + * + * Return Value: + * Standard OMXResult result. See enumeration for possible result codes. + * + */ + +OMXResult armVCM4P2_ACDCPredict( + OMX_S16 * pSrcDst, + OMX_S16 * pPreACPredict, + OMX_S16 * pPredBufRow, + OMX_S16 * pPredBufCol, + OMX_INT curQP, + OMX_INT predQP, + OMX_INT predDir, + OMX_INT ACPredFlag, + OMXVCM4P2VideoComponent videoComp, + OMX_U8 flag, + OMX_INT *pSumErr +) +{ + OMX_INT dcScaler, i; + OMX_S16 tempPred; + + /* Argument error checks */ + armRetArgErrIf(pSrcDst == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pPredBufRow == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pPredBufCol == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(curQP <= 0, OMX_Sts_BadArgErr); + armRetArgErrIf(predQP <= 0, OMX_Sts_BadArgErr); + armRetArgErrIf((predDir != 1) && (predDir != 2), OMX_Sts_BadArgErr); + armRetArgErrIf(!armIs4ByteAligned(pSrcDst), OMX_Sts_BadArgErr); + armRetArgErrIf(!armIs4ByteAligned(pPredBufRow), OMX_Sts_BadArgErr); + armRetArgErrIf(!armIs4ByteAligned(pPredBufCol), OMX_Sts_BadArgErr); + + + /* Set DC scaler value to avoid some compilers giving a warning. */ + dcScaler=0; + + /* Calculate the DC scaler value */ + if (videoComp == OMX_VC_LUMINANCE) + { + if (curQP >= 1 && curQP <= 4) + { + dcScaler = 8; + } + else if (curQP >= 5 && curQP <= 8) + { + dcScaler = 2 * curQP; + } + else if (curQP >= 9 && curQP <= 24) + { + dcScaler = curQP + 8; + } + else + { + dcScaler = (2 * curQP) - 16; + } + } + else if (videoComp == OMX_VC_CHROMINANCE) + { + if (curQP >= 1 && curQP <= 4) + { + dcScaler = 8; + } + else if (curQP >= 5 && curQP <= 24) + { + dcScaler = (curQP + 13)/2; + } + else + { + dcScaler = curQP - 6; + } + } + + if (pPreACPredict != NULL) + { + pPreACPredict[0] = predDir; + } + + if (predDir == OMX_VC_VERTICAL) + { + /* F[0][0]//dc_scaler */ + tempPred = armIntDivAwayFromZero(pPredBufRow[0], dcScaler); + } + else + { + /* F[0][0]//dc_scaler */ + tempPred = armIntDivAwayFromZero(pPredBufCol[0], dcScaler); + } + + /* Updating the DC value to the row and col buffer */ + *(pPredBufRow - 8) = *pPredBufCol; + + if (flag) + { + /* Cal and store F[0][0] into the col buffer */ + *pPredBufCol = pSrcDst[0] * dcScaler; + + /* PQF = QF - F[0][0]//dc_scaler */ + pSrcDst[0] -= tempPred; + } + else + { + /* QF = PQF + F[0][0]//dc_scaler */ + pSrcDst[0] += tempPred; + + /* Saturate */ + pSrcDst[0] = armClip (-2048, 2047, pSrcDst[0]); + + /* Cal and store F[0][0] into the col buffer */ + *pPredBufCol = pSrcDst[0] * dcScaler; + } + + + if (ACPredFlag == 1) + { + if (predDir == OMX_VC_VERTICAL) + { + for (i = 1; i < 8; i++) + { + tempPred = armIntDivAwayFromZero \ + (pPredBufRow[i] * predQP, curQP); + if (flag) + { + /* Updating QF to the row buff */ + pPredBufRow[i] = pSrcDst[i]; + /*PQFX[v][0] = QFX[v][0] - (QFA[v][0] * QPA) // QPX */ + pSrcDst[i] -= tempPred; + /* Sum of absolute values of AC prediction error, this can + be used as a reference to choose whether to use + AC prediction */ + *pSumErr += armAbs(pSrcDst[i]); + /* pPreACPredict[1~7] store the error signal + after AC prediction */ + pPreACPredict[i] = pSrcDst[i]; + } + else + { + /*QFX[v][0] = PQFX[v][0] + (QFA[v][0] * QPA) // QPX */ + pSrcDst[i] += tempPred; + + /* Saturate */ + pSrcDst[i] = armClip (-2048, 2047, pSrcDst[i]); + + /* Updating QF to the row buff */ + pPredBufRow[i] = pSrcDst[i]; + } + } + } + else + { + for (i = 8; i < 64; i += 8) + { + tempPred = armIntDivAwayFromZero \ + (pPredBufCol[i>>3] * predQP, curQP); + if (flag) + { + /* Updating QF to col buff */ + pPredBufCol[i>>3] = pSrcDst[i]; + /*PQFX[0][u] = QFX[0][u] - (QFA[0][u] * QPA) // QPX */ + pSrcDst[i] -= tempPred; + /* Sum of absolute values of AC prediction error, this can + be used as a reference to choose whether to use AC + prediction */ + *pSumErr += armAbs(pSrcDst[i]); + /* pPreACPredict[1~7] store the error signal + after AC prediction */ + pPreACPredict[i>>3] = pSrcDst[i]; + } + else + { + /*QFX[0][u] = PQFX[0][u] + (QFA[0][u] * QPA) // QPX */ + pSrcDst[i] += tempPred; + + /* Saturate */ + pSrcDst[i] = armClip (-2048, 2047, pSrcDst[i]); + + /* Updating QF to col buff */ + pPredBufCol[i>>3] = pSrcDst[i]; + } + } + } + } + + return OMX_Sts_NoErr; +} + +/*End of File*/ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_BlockMatch_Half.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_BlockMatch_Half.c new file mode 100644 index 0000000000000000000000000000000000000000..1b69a337b7034dd4e45d6f11476f20f36c58bdc2 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_BlockMatch_Half.c @@ -0,0 +1,187 @@ +/** + * + * File Name: armVCM4P2_BlockMatch_Half.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Contains modules for Block matching, a full search algorithm + * is implemented + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armVC.h" +#include "armCOMM.h" + +/** + * Function: armVCM4P2_BlockMatch_Half + * + * Description: + * Performs a 16x16 block match with half-pixel resolution. Returns the estimated + * motion vector and associated minimum SAD. This function estimates the half-pixel + * motion vector by interpolating the integer resolution motion vector referenced + * by the input parameter pSrcDstMV, i.e., the initial integer MV is generated + * externally. The input parameters pSrcRefBuf and pSearchPointRefPos should be + * shifted by the winning MV of 16x16 integer search prior to calling BlockMatch_Half_16x16. + * The function BlockMatch_Integer_16x16 may be used for integer motion estimation. + * + * Remarks: + * + * Parameters: + * [in] pSrcRefBuf pointer to the reference Y plane; points to the reference MB + * that corresponds to the location of the current macroblock in + * the current plane. + * [in] refWidth width of the reference plane + * [in] pRefRect reference plane valid region rectangle + * [in] pSrcCurrBuf pointer to the current macroblock extracted from original plane + * (linear array, 256 entries); must be aligned on an 8-byte boundary. + * [in] pSearchPointRefPos position of the starting point for half pixel search (specified + * in terms of integer pixel units) in the reference plane. + * [in] rndVal rounding control bit for half pixel motion estimation; + * 0=rounding control disabled; 1=rounding control enabled + * [in] pSrcDstMV pointer to the initial MV estimate; typically generated during a prior + * 16X16 integer search and its unit is half pixel. + * [in] BlockSize MacroBlock Size i.e either 16x16 or 8x8. + * [out]pSrcDstMV pointer to estimated MV + * [out]pDstSAD pointer to minimum SAD + * + * Return Value: + * OMX_Sts_NoErr ¨C no error + * OMX_Sts_BadArgErr ¨C bad arguments + * + */ + +OMXResult armVCM4P2_BlockMatch_Half( + const OMX_U8 *pSrcRefBuf, + OMX_INT refWidth, + const OMXRect *pRefRect, + const OMX_U8 *pSrcCurrBuf, + const OMXVCM4P2Coordinate *pSearchPointRefPos, + OMX_INT rndVal, + OMXVCMotionVector *pSrcDstMV, + OMX_INT *pDstSAD, + OMX_U8 BlockSize +) +{ + OMX_INT outer, inner, count, index; + OMX_S16 halfPelX = 0, halfPelY = 0, x, y; + OMX_INT candSAD, refSAD = 0; + OMX_INT minSAD, fromX, toX, fromY, toY; + /* Offset to the reference at the begining of the bounding box */ + const OMX_U8 *pTempSrcRefBuf; + OMX_U8 tempPel; + + /* Argument error checks */ + armRetArgErrIf(pSrcRefBuf == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pRefRect == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pSrcCurrBuf == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pSearchPointRefPos == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pSrcDstMV == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pDstSAD == NULL, OMX_Sts_BadArgErr); + + /* Positioning the pointer */ + pTempSrcRefBuf = pSrcRefBuf + (refWidth * (pSrcDstMV->dy/2)) + (pSrcDstMV->dx/2); + + /* Copy the candidate to the temporary linear array */ + for (outer = 0, count = 0,index = 0; + outer < BlockSize; + outer++, index += refWidth - BlockSize) + { + for (inner = 0; inner < BlockSize; inner++, count++, index++) + { + refSAD += armAbs (pTempSrcRefBuf[index] - pSrcCurrBuf[count]); + } + } + + /* Set the minSad as reference SAD */ + minSAD = refSAD; + *pDstSAD = refSAD; + + /* Check for valid region */ + fromX = 1; + toX = 1; + fromY = 1; + toY = 1; + if ((pSearchPointRefPos->x - 1) < pRefRect->x) + { + fromX = 0; + } + + if ((pSearchPointRefPos->x + BlockSize + 1) > (pRefRect->x + pRefRect->width)) + { + toX = 0; + } + + if ((pSearchPointRefPos->y - 1) < pRefRect->y) + { + fromY = 0; + } + + if ((pSearchPointRefPos->y + BlockSize + 1) > (pRefRect->y + pRefRect->height)) + { + toY = 0; + } + + /* Looping on y- axis */ + for (y = -fromY; y <= toY; y++) + { + /* Looping on x- axis */ + for (x = -fromX; x <= toX; x++) + { + /* check for integer position */ + if ( x == 0 && y == 0) + { + continue; + } + /* Positioning the pointer */ + pTempSrcRefBuf = pSrcRefBuf + (refWidth * (pSrcDstMV->dy/2)) + + (pSrcDstMV->dx/2); + + /* Interpolate the pixel and calculate the SAD*/ + for (outer = 0, count = 0, candSAD = 0,index = 0; + outer < BlockSize; + outer++, index += refWidth - BlockSize) + { + for (inner = 0; inner < BlockSize; inner++, count++,index++) + { + tempPel = ( + pTempSrcRefBuf[index] + + pTempSrcRefBuf[index + x] * armAbs(x) + + pTempSrcRefBuf[index + refWidth * y] * armAbs(y) + + pTempSrcRefBuf[index + refWidth * y + x] + * armAbs(x*y) + + armAbs (x) + armAbs (y) - rndVal + ) / (2 * (armAbs (x) + armAbs (y))); + candSAD += armAbs (tempPel - pSrcCurrBuf[count]); + } + } + + /* Result calculations */ + if (armVCM4P2_CompareMV (x, y, candSAD, halfPelX, halfPelY, minSAD)) + { + *pDstSAD = candSAD; + minSAD = candSAD; + halfPelX = x; + halfPelY = y; + } + + } /* End of x- axis */ + } /* End of y-axis */ + + pSrcDstMV->dx += halfPelX; + pSrcDstMV->dy += halfPelY; + + return OMX_Sts_NoErr; + +} + +/* End of file */ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_BlockMatch_Integer.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_BlockMatch_Integer.c new file mode 100644 index 0000000000000000000000000000000000000000..77fe358bf2c3c37351afe1a2487d150350cc3a60 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_BlockMatch_Integer.c @@ -0,0 +1,167 @@ +/** + * + * File Name: armVCM4P2_BlockMatch_Integer.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Contains modules for Block matching, a full search algorithm + * is implemented + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armVC.h" +#include "armCOMM.h" + +/** + * Function: armVCM4P2_BlockMatch_Integer + * + * Description: + * Performs a 16x16 block search; estimates motion vector and associated minimum SAD. + * Both the input and output motion vectors are represented using half-pixel units, and + * therefore a shift left or right by 1 bit may be required, respectively, to match the + * input or output MVs with other functions that either generate output MVs or expect + * input MVs represented using integer pixel units. + * + * Remarks: + * + * Parameters: + * [in] pSrcRefBuf pointer to the reference Y plane; points to the reference MB that + * corresponds to the location of the current macroblock in the current + * plane. + * [in] refWidth width of the reference plane + * [in] pRefRect pointer to the valid rectangular in reference plane. Relative to image origin. + * It's not limited to the image boundary, but depended on the padding. For example, + * if you pad 4 pixels outside the image border, then the value for left border + * can be -4 + * [in] pSrcCurrBuf pointer to the current macroblock extracted from original plane (linear array, + * 256 entries); must be aligned on an 8-byte boundary. + * [in] pCurrPointPos position of the current macroblock in the current plane + * [in] pSrcPreMV pointer to predicted motion vector; NULL indicates no predicted MV + * [in] pSrcPreSAD pointer to SAD associated with the predicted MV (referenced by pSrcPreMV) + * [in] searchRange search range for 16X16 integer block,the units of it is full pixel,the search range + * is the same in all directions.It is in inclusive of the boundary and specified in + * terms of integer pixel units. + * [in] pMESpec vendor-specific motion estimation specification structure; must have been allocated + * and then initialized using omxVCM4P2_MEInit prior to calling the block matching + * function. + * [out] pDstMV pointer to estimated MV + * [out] pDstSAD pointer to minimum SAD + * + * Return Value: + * OMX_Sts_NoErr ¨C no error. + * OMX_Sts_BadArgErr ¨C bad arguments + * + */ + +OMXResult armVCM4P2_BlockMatch_Integer( + const OMX_U8 *pSrcRefBuf, + OMX_INT refWidth, + const OMXRect *pRefRect, + const OMX_U8 *pSrcCurrBuf, + const OMXVCM4P2Coordinate *pCurrPointPos, + const OMXVCMotionVector *pSrcPreMV, + const OMX_INT *pSrcPreSAD, + void *pMESpec, + OMXVCMotionVector *pDstMV, + OMX_INT *pDstSAD, + OMX_U8 BlockSize +) +{ + + /* Definitions and Initializations*/ + + OMX_INT outer, inner, count,index; + OMX_INT candSAD; + /*(256*256 +1) this is to make the SAD max initially*/ + OMX_INT minSAD = 0x10001, fromX, toX, fromY, toY; + /* Offset to the reference at the begining of the bounding box */ + const OMX_U8 *pTempSrcRefBuf; + OMX_S16 x, y; + OMX_INT searchRange; + + /* Argument error checks */ + armRetArgErrIf(pSrcRefBuf == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pRefRect == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pSrcCurrBuf == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pCurrPointPos == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pMESpec == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pDstMV == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pDstSAD == NULL, OMX_Sts_BadArgErr); + + searchRange = ((OMXVCM4P2MEParams *)pMESpec)->searchRange; + /* Check for valid region */ + fromX = searchRange; + toX = searchRange; + fromY = searchRange; + toY = searchRange; + + if ((pCurrPointPos->x - searchRange) < pRefRect->x) + { + fromX = pCurrPointPos->x - pRefRect->x; + } + + if ((pCurrPointPos->x + BlockSize + searchRange) > (pRefRect->x + pRefRect->width)) + { + toX = pRefRect->width - (pCurrPointPos->x - pRefRect->x) - BlockSize; + } + + if ((pCurrPointPos->y - searchRange) < pRefRect->y) + { + fromY = pCurrPointPos->y - pRefRect->y; + } + + if ((pCurrPointPos->y + BlockSize + searchRange) > (pRefRect->y + pRefRect->height)) + { + toY = pRefRect->width - (pCurrPointPos->y - pRefRect->y) - BlockSize; + } + + pDstMV->dx = -fromX; + pDstMV->dy = -fromY; + /* Looping on y- axis */ + for (y = -fromY; y <= toY; y++) + { + + /* Looping on x- axis */ + for (x = -fromX; x <= toX; x++) + { + /* Positioning the pointer */ + pTempSrcRefBuf = pSrcRefBuf + (refWidth * y) + x; + + /* Calculate the SAD */ + for (outer = 0, count = 0, index = 0, candSAD = 0; + outer < BlockSize; + outer++, index += refWidth - BlockSize) + { + for (inner = 0; inner < BlockSize; inner++, count++, index++) + { + candSAD += armAbs (pTempSrcRefBuf[index] - pSrcCurrBuf[count]); + } + } + + /* Result calculations */ + if (armVCM4P2_CompareMV (x, y, candSAD, pDstMV->dx/2, pDstMV->dy/2, minSAD)) + { + *pDstSAD = candSAD; + minSAD = candSAD; + pDstMV->dx = x*2; + pDstMV->dy = y*2; + } + + } /* End of x- axis */ + } /* End of y-axis */ + + return OMX_Sts_NoErr; + +} + +/* End of file */ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_CheckVLCEscapeMode.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_CheckVLCEscapeMode.c new file mode 100644 index 0000000000000000000000000000000000000000..94e8639a909a788162e147c93fc12551ebbbfb33 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_CheckVLCEscapeMode.c @@ -0,0 +1,156 @@ +/** + * + * File Name: armVCM4P2_CheckVLCEscapeMode.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Contains module for VLC escape mode check + * + */ + +#include "omxtypes.h" +#include "armOMX.h" + +#include "armVC.h" +#include "armCOMM.h" + +/** + * Function: armVCM4P2_CheckVLCEscapeMode + * + * Description: + * Performs escape mode decision based on the run, run+, level, level+ and + * last combinations. + * + * Remarks: + * + * Parameters: + * [in] run Run value (count of zeros) to be encoded + * [in] level Level value (non-zero value) to be encoded + * [in] runPlus Calculated as runPlus = run - (RMAX + 1) + * [in] levelPlus Calculated as + * levelPlus = sign(level)*[abs(level) - LMAX] + * [in] maxStoreRun Max store possible (considering last and inter/intra) + * [in] maxRunForMultipleEntries + * The run value after which level + * will be equal to 1: + * (considering last and inter/intra status) + * [in] pRunIndexTable Run Index table defined in + * armVCM4P2_Huff_Tables_VLC.c + * (considering last and inter/intra status) + * + * + * Return Value: + * Returns an Escape mode which can take values from 0 to 3 + * 0 --> no escape mode, 1 --> escape type 1, + * 1 --> escape type 2, 3 --> escape type 3, check section 7.4.1.3 + * in the MPEG ISO standard. + * + */ + +OMX_U8 armVCM4P2_CheckVLCEscapeMode( + OMX_U32 run, + OMX_U32 runPlus, + OMX_S16 level, + OMX_S16 levelPlus, + OMX_U8 maxStoreRun, + OMX_U8 maxRunForMultipleEntries, + OMX_INT shortVideoHeader, + const OMX_U8 *pRunIndexTable +) +{ + OMX_U8 escape = 0, fMode = 0, entries; + + level = armAbs (level); + levelPlus = armAbs (levelPlus); + + /* Check for a valid entry with run, level and Last combination + Mode 0 check */ + if (run <= maxStoreRun) + { + entries = pRunIndexTable[run + 1] + - pRunIndexTable[run]; + if (run > maxRunForMultipleEntries) + { + entries = 1; + } + if (level > entries) + { + escape = 1; + } + } + else + { + escape = 1; + } + if(escape && shortVideoHeader) + { + escape = 0; + fMode = 4; + } + /* Check for a valid entry with run, levelPlus and Last combination + Mode 1 check */ + if (escape) + { + escape = 0; + fMode = 1; + if (run <= maxStoreRun) + { + entries = pRunIndexTable[run + 1] + - pRunIndexTable[run]; + if (run > maxRunForMultipleEntries) + { + entries = 1; + } + if (levelPlus > entries) + { + escape = 1; + } + } + else + { + escape = 1; + } + } + + /* Check for a valid entry with runPlus, level and Last combination + Mode 2 check */ + if (escape) + { + escape = 0; + fMode = 2; + if (runPlus <= maxStoreRun) + { + entries = pRunIndexTable[runPlus + 1] + - pRunIndexTable[runPlus]; + if (runPlus > maxRunForMultipleEntries) + { + entries = 1; + } + if (level > entries) + { + escape = 1; + } + } + else + { + escape = 1; + } + } + + /* select mode 3 --> FLC */ + if (escape) + { + fMode = 3; + } + + return fMode; +} + +/*End of File*/ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_CompareMV.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_CompareMV.c new file mode 100644 index 0000000000000000000000000000000000000000..3b8845e07467b9ba7603d02f694cc38db72b75ea --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_CompareMV.c @@ -0,0 +1,71 @@ +/** + * + * File Name: armVCM4P2_CompareMV.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Contains module for comparing motion vectors and SAD's to decide + * the best MV and SAD + * + */ + +#include "omxtypes.h" +#include "armOMX.h" + +#include "armVC.h" +#include "armCOMM.h" + +/** + * Function: armVCM4P2_CompareMV + * + * Description: + * Performs comparision of motion vectors and SAD's to decide the + * best MV and SAD + * + * Remarks: + * + * Parameters: + * [in] mvX x coordinate of the candidate motion vector + * [in] mvY y coordinate of the candidate motion vector + * [in] candSAD Candidate SAD + * [in] bestMVX x coordinate of the best motion vector + * [in] bestMVY y coordinate of the best motion vector + * [in] bestSAD best SAD + * + * Return Value: + * OMX_INT -- 1 to indicate that the current sad is the best + * 0 to indicate that it is NOT the best SAD + */ + +OMX_INT armVCM4P2_CompareMV ( + OMX_S16 mvX, + OMX_S16 mvY, + OMX_INT candSAD, + OMX_S16 bestMVX, + OMX_S16 bestMVY, + OMX_INT bestSAD +) +{ + if (candSAD < bestSAD) + { + return 1; + } + if (candSAD > bestSAD) + { + return 0; + } + /* shorter motion vector */ + if ( (mvX * mvX + mvY * mvY) < (bestMVX*bestMVX+bestMVY*bestMVY) ) + { + return 1; + } + return 0; +} + +/*End of File*/ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_DCT_Table.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_DCT_Table.c new file mode 100644 index 0000000000000000000000000000000000000000..a6f713e7a4d237ec9339f8b2a6a9c314f5ec7b31 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_DCT_Table.c @@ -0,0 +1,112 @@ + /** + * + * File Name: armVCM4P2_DCT_Table.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * File: armVCM4P2_DCT_Table.c + * Description: Contains the DCT/IDCT coefficent matrix + * + */ + +#ifndef _OMXDCTCOSTAB_C_ +#define _OMXDCTCOSTAB_C_ + +#include "omxtypes.h" +#include "armOMX.h" + +const OMX_F64 armVCM4P2_preCalcDCTCos[8][8] = +{ + { + 0.353553390593273730, + 0.490392640201615220, + 0.461939766255643370, + 0.415734806151272620, + 0.353553390593273790, + 0.277785116509801140, + 0.191341716182544920, + 0.097545161008064152 + }, + { + 0.353553390593273730, + 0.415734806151272620, + 0.191341716182544920, + -0.097545161008064096, + -0.353553390593273730, + -0.490392640201615220, + -0.461939766255643420, + -0.277785116509801090 + }, + { + 0.353553390593273730, + 0.277785116509801140, + -0.191341716182544860, + -0.490392640201615220, + -0.353553390593273840, + 0.097545161008064138, + 0.461939766255643260, + 0.415734806151272730 + }, + { + 0.353553390593273730, + 0.097545161008064152, + -0.461939766255643370, + -0.277785116509801090, + 0.353553390593273680, + 0.415734806151272730, + -0.191341716182544920, + -0.490392640201615330 + }, + { + 0.353553390593273730, + -0.097545161008064096, + -0.461939766255643420, + 0.277785116509800920, + 0.353553390593273840, + -0.415734806151272620, + -0.191341716182545280, + 0.490392640201615220 + }, + { + 0.353553390593273730, + -0.277785116509800980, + -0.191341716182545170, + 0.490392640201615220, + -0.353553390593273340, + -0.097545161008064013, + 0.461939766255643370, + -0.415734806151272510 + }, + { + 0.353553390593273730, + -0.415734806151272670, + 0.191341716182545000, + 0.097545161008064388, + -0.353553390593273620, + 0.490392640201615330, + -0.461939766255643200, + 0.277785116509800760 + }, + { + 0.353553390593273730, + -0.490392640201615220, + 0.461939766255643260, + -0.415734806151272620, + 0.353553390593273290, + -0.277785116509800760, + 0.191341716182544780, + -0.097545161008064277 + } +}; + +#endif /*_OMXDCTCOSTAB_C_*/ + + +/* End of file */ + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_intra.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_intra.c new file mode 100644 index 0000000000000000000000000000000000000000..a2572e07f291c3c4e2e6bf707771afcf1184adba --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_intra.c @@ -0,0 +1,144 @@ +/** + * + * File Name: armVCM4P2_DecodeVLCZigzag_intra.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Contains modules for filling of the coefficient buffer + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armVC.h" +#include "armCOMM_Bitstream.h" +#include "armCOMM.h" +#include "armVCM4P2_Huff_Tables_VLC.h" +#include "armVCM4P2_ZigZag_Tables.h" + + + +/** + * Function: armVCM4P2_DecodeVLCZigzag_Intra + * + * Description: + * Performs VLC decoding and inverse zigzag scan for one intra coded block. + * + * Remarks: + * + * Parameters: + * [in] ppBitStream pointer to the pointer to the current byte in + * the bitstream buffer + * [in] pBitOffset pointer to the bit position in the byte pointed + * to by *ppBitStream. *pBitOffset is valid within + * [0-7]. + * [in] predDir AC prediction direction which is used to decide + * the zigzag scan pattern. It takes one of the + * following values: + * OMX_VC_NONE AC prediction not used; + * perform classical zigzag scan; + * OMX_VC_HORIZONTAL Horizontal prediction; + * perform alternate-vertical + * zigzag scan; + * OMX_VC_VERTICAL Vertical prediction; + * thus perform + * alternate-horizontal + * zigzag scan. + * [in] start start indicates whether the encoding begins with 0th element + * or 1st. + * [out] ppBitStream *ppBitStream is updated after the block is + * decoded, so that it points to the current byte + * in the bit stream buffer + * [out] pBitOffset *pBitOffset is updated so that it points to the + * current bit position in the byte pointed by + * *ppBitStream + * [out] pDst pointer to the coefficient buffer of current + * block. Should be 32-bit aligned + * + * Return Value: + * Standard OMXResult result. See enumeration for possible result codes. + * + */ + +OMXResult armVCM4P2_DecodeVLCZigzag_Intra( + const OMX_U8 ** ppBitStream, + OMX_INT * pBitOffset, + OMX_S16 * pDst, + OMX_U8 predDir, + OMX_INT shortVideoHeader, + OMX_U8 start +) +{ + OMX_U8 last = 0; + const OMX_U8 *pZigzagTable = armVCM4P2_aClassicalZigzagScan; + OMXResult errorCode; + + /* Argument error checks */ + armRetArgErrIf(ppBitStream == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(*ppBitStream == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pBitOffset == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(!armIs4ByteAligned(pDst), OMX_Sts_BadArgErr); + armRetArgErrIf((*pBitOffset < 0) || (*pBitOffset >7), OMX_Sts_BadArgErr); + armRetArgErrIf((predDir > 2), OMX_Sts_BadArgErr); + + switch (predDir) + { + case OMX_VC_NONE: + { + pZigzagTable = armVCM4P2_aClassicalZigzagScan; + break; + } + + case OMX_VC_HORIZONTAL: + { + pZigzagTable = armVCM4P2_aVerticalZigzagScan; + break; + } + + case OMX_VC_VERTICAL: + { + pZigzagTable = armVCM4P2_aHorizontalZigzagScan; + break; + } + } + + errorCode = armVCM4P2_GetVLCBits ( + ppBitStream, + pBitOffset, + pDst, + shortVideoHeader, + start, + &last, + 10, + 62, + 7, + 21, + armVCM4P2_IntraL0RunIdx, + armVCM4P2_IntraVlcL0, + armVCM4P2_IntraL1RunIdx, + armVCM4P2_IntraVlcL1, + armVCM4P2_IntraL0LMAX, + armVCM4P2_IntraL1LMAX, + armVCM4P2_IntraL0RMAX, + armVCM4P2_IntraL1RMAX, + pZigzagTable ); + armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode); + + if (last == 0) + { + return OMX_Sts_Err; + } + return OMX_Sts_NoErr; +} + +/* End of file */ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_EncodeVLCZigzag_intra.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_EncodeVLCZigzag_intra.c new file mode 100644 index 0000000000000000000000000000000000000000..cd6b56d22f796ce45444292578a07e56e8c9a51b --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_EncodeVLCZigzag_intra.c @@ -0,0 +1,145 @@ +/** + * + * File Name: armVCM4P2_EncodeVLCZigzag_intra.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Contains modules for zigzag scanning and VLC encoding + * for intra block. + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armVC.h" +#include "armCOMM_Bitstream.h" +#include "armCOMM.h" +#include "armVCM4P2_Huff_Tables_VLC.h" +#include "armVCM4P2_ZigZag_Tables.h" + + + +/** + * Function: armVCM4P2_EncodeVLCZigzag_Intra + * + * Description: + * Performs zigzag scanning and VLC encoding for one intra block. + * + * Remarks: + * + * Parameters: + * [in] ppBitStream pointer to the pointer to the current byte in + * the bit stream + * [in] pBitOffset pointer to the bit position in the byte pointed + * by *ppBitStream. Valid within 0 to 7. + * [in] pQDctBlkCoef pointer to the quantized DCT coefficient + * [in] predDir AC prediction direction, which is used to decide + * the zigzag scan pattern. This takes one of the + * following values: + * OMX_VC_NONE AC prediction not used. + * Performs classical zigzag + * scan. + * OMX_VC_HORIZONTAL Horizontal prediction. + * Performs alternate-vertical + * zigzag scan. + * OMX_VC_VERTICAL Vertical prediction. + * Performs alternate-horizontal + * zigzag scan. + * [in] pattern block pattern which is used to decide whether + * this block is encoded + * [in] start start indicates whether the encoding begins with 0th element + * or 1st. + * [out] ppBitStream *ppBitStream is updated after the block is encoded, + * so that it points to the current byte in the bit + * stream buffer. + * [out] pBitOffset *pBitOffset is updated so that it points to the + * current bit position in the byte pointed by + * *ppBitStream. + * + * Return Value: + * Standard OMXResult result. See enumeration for possible result codes. + * + */ + +OMXResult armVCM4P2_EncodeVLCZigzag_Intra( + OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + const OMX_S16 *pQDctBlkCoef, + OMX_U8 predDir, + OMX_U8 pattern, + OMX_INT shortVideoHeader, + OMX_U8 start +) +{ + const OMX_U8 *pZigzagTable = armVCM4P2_aClassicalZigzagScan; + OMXResult errorCode; + + /* Argument error checks */ + armRetArgErrIf(ppBitStream == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(*ppBitStream == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pBitOffset == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pQDctBlkCoef == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf((*pBitOffset < 0) || (*pBitOffset >7), OMX_Sts_BadArgErr); + armRetArgErrIf(start > 1, OMX_Sts_BadArgErr); + armRetArgErrIf(predDir > 2, OMX_Sts_BadArgErr); + + if (pattern) + { + switch (predDir) + { + case OMX_VC_NONE: + { + pZigzagTable = armVCM4P2_aClassicalZigzagScan; + break; + } + + case OMX_VC_HORIZONTAL: + { + pZigzagTable = armVCM4P2_aVerticalZigzagScan; + break; + } + + case OMX_VC_VERTICAL: + { + pZigzagTable = armVCM4P2_aHorizontalZigzagScan; + break; + } + } + + errorCode = armVCM4P2_PutVLCBits ( + ppBitStream, + pBitOffset, + pQDctBlkCoef, + shortVideoHeader, + start, + 14, + 20, + 9, + 6, + armVCM4P2_IntraL0RunIdx, + armVCM4P2_IntraVlcL0, + armVCM4P2_IntraL1RunIdx, + armVCM4P2_IntraVlcL1, + armVCM4P2_IntraL0LMAX, + armVCM4P2_IntraL1LMAX, + armVCM4P2_IntraL0RMAX, + armVCM4P2_IntraL1RMAX, + pZigzagTable + ); + armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode); + + } /* Pattern check ends*/ + + return (OMX_Sts_NoErr); + +} + +/* End of file */ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_FillVLCBuffer.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_FillVLCBuffer.c new file mode 100644 index 0000000000000000000000000000000000000000..93c950402ed77cb5742a8d07e85c747bca231a42 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_FillVLCBuffer.c @@ -0,0 +1,153 @@ +/** + * + * File Name: armVCM4P2_FillVLCBuffer.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Contains module for putting VLC bits + * + */ + +#include "omxtypes.h" +#include "armOMX.h" + +#include "armVC.h" +#include "armCOMM.h" +#include "armCOMM_Bitstream.h" + +/** + * Function: armVCM4P2_FillVLCBuffer + * + * Description: + * Performs calculating the VLC bits depending on the escape type and insert + * the same in the bitstream + * + * Remarks: + * + * Parameters: + * [in] ppBitStream pointer to the pointer to the current byte in + * the bit stream + * [in] pBitOffset pointer to the bit position in the byte pointed + * by *ppBitStream. Valid within 0 to 7 + * [in] run Run value (count of zeros) to be encoded + * [in] level Level value (non-zero value) to be encoded + * [in] runPlus Calculated as runPlus = run - (RMAX + 1) + * [in] levelPlus Calculated as + * levelPlus = sign(level)*[abs(level) - LMAX] + * [in] fMode Flag indicating the escape modes + * [in] last status of the last flag + * [in] maxRunForMultipleEntries + * The run value after which level will be equal to 1: + * (considering last and inter/intra status) + * [in] pRunIndexTable Run Index table defined in + * armVCM4P2_Huff_Tables_VLC.h + * [in] pVlcTable VLC table defined in armVCM4P2_Huff_Tables_VLC.h + * [out] ppBitStream *ppBitStream is updated after the block is encoded + * so that it points to the current byte in the bit + * stream buffer. + * [out] pBitOffset *pBitOffset is updated so that it points to the + * current bit position in the byte pointed by + * *ppBitStream. + * + * Return Value: + * Standard OMXResult result. See enumeration for possible result codes. + * + */ + +OMXResult armVCM4P2_FillVLCBuffer ( + OMX_U8 **ppBitStream, + OMX_INT * pBitOffset, + OMX_U32 run, + OMX_S16 level, + OMX_U32 runPlus, + OMX_S16 levelPlus, + OMX_U8 fMode, + OMX_U8 last, + OMX_U8 maxRunForMultipleEntries, + const OMX_U8 *pRunIndexTable, + const ARM_VLC32 *pVlcTable +) +{ + OMX_INT tempIndex; + OMX_U32 tempRun = run, sign = 0; + OMX_S16 tempLevel = level; + + /* Escape sequence addition */ + if (fMode == 1) + { + armPackBits(ppBitStream, pBitOffset, 3, 7); + armPackBits(ppBitStream, pBitOffset, 0, 1); + tempLevel = levelPlus; + + } + else if(fMode == 2) + { + armPackBits(ppBitStream, pBitOffset, 3, 7); + armPackBits(ppBitStream, pBitOffset, 2, 2); + tempRun = runPlus; + } + else if (fMode == 3) + { + armPackBits(ppBitStream, pBitOffset, 3, 7); + armPackBits(ppBitStream, pBitOffset, 3, 2); + } + else if (fMode == 4) + { + armPackBits(ppBitStream, pBitOffset, 3, 7); + armPackBits(ppBitStream, pBitOffset, (OMX_U32)last, 1); + armPackBits(ppBitStream, pBitOffset, tempRun, 6); + if((tempLevel != 0) && (tempLevel != -128)) + { + armPackBits(ppBitStream, pBitOffset, + (OMX_U32) tempLevel, 8); + } + return OMX_Sts_NoErr; + } + + if (tempLevel < 0) + { + sign = 1; + tempLevel = armAbs(tempLevel); + } + /* Putting VLC bits in the stream */ + if (fMode < 3) + { + if (tempRun > maxRunForMultipleEntries) + { + tempIndex = pRunIndexTable [maxRunForMultipleEntries + 1] + + (tempRun - maxRunForMultipleEntries - 1); + } + else + { + tempIndex = pRunIndexTable [tempRun] + (tempLevel -1); + } + + armPackVLC32 (ppBitStream, pBitOffset, + pVlcTable [tempIndex]); + armPackBits(ppBitStream, pBitOffset, (OMX_U32)sign, 1); + } + else + { + if (sign) + { + tempLevel = -tempLevel; + } + tempRun = run; + armPackBits(ppBitStream, pBitOffset, (OMX_U32)last, 1); + armPackBits(ppBitStream, pBitOffset, tempRun, 6); + armPackBits(ppBitStream, pBitOffset, 1, 1); + armPackBits(ppBitStream, pBitOffset, + (OMX_U32) tempLevel, 12); + armPackBits(ppBitStream, pBitOffset, 1, 1); + } + return OMX_Sts_NoErr; +} + +/*End of File*/ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_FillVLDBuffer.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_FillVLDBuffer.c new file mode 100644 index 0000000000000000000000000000000000000000..1712c3a47b5dfd7cb9bdf5d8d99899fee925a356 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_FillVLDBuffer.c @@ -0,0 +1,84 @@ +/** + * + * File Name: armVCM4P2_FillVLDBuffer.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Contains module for VLC get bits from the stream + * + */ + +#include "omxtypes.h" +#include "armOMX.h" + +#include "armVCM4P2_ZigZag_Tables.h" + + +/** + * Function: armVCM4P2_FillVLDBuffer + * + * Description: + * Performs filling of the coefficient buffer according to the run, level + * and sign, also updates the index + * + * Parameters: + * [in] storeRun Stored Run value (count of zeros) + * [in] storeLevel Stored Level value (non-zero value) + * [in] sign Flag indicating the sign of level + * [in] last status of the last flag + * [in] pIndex pointer to coefficient index in 8x8 matrix + * [out] pIndex pointer to updated coefficient index in 8x8 + * matrix + * [in] pZigzagTable pointer to the zigzag tables + * [out] pDst pointer to the coefficient buffer of current + * block. Should be 32-bit aligned + * Return Value: + * Standard OMXResult result. See enumeration for possible result codes. + * + */ + +OMXResult armVCM4P2_FillVLDBuffer( + OMX_U32 storeRun, + OMX_S16 * pDst, + OMX_S16 storeLevel, + OMX_U8 sign, + OMX_U8 last, + OMX_U8 * pIndex, + const OMX_U8 * pZigzagTable +) +{ + /* Store the zero's as per the run length count */ + for (;storeRun > 0; storeRun--, (*pIndex)++) + { + pDst[pZigzagTable[*pIndex]] = 0; + } + /* Store the level depending on the sign*/ + if (sign == 1) + { + pDst[pZigzagTable[*pIndex]] = -storeLevel; + } + else + { + pDst[pZigzagTable[*pIndex]] = storeLevel; + } + (*pIndex)++; + + /* If last is 1, fill the remaining elments of the buffer with zeros */ + if (last == 1) + { + while (*pIndex < 64) + { + pDst[pZigzagTable[*pIndex]] = 0; + (*pIndex)++; + } + } + + return OMX_Sts_NoErr; +} + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_GetVLCBits.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_GetVLCBits.c new file mode 100644 index 0000000000000000000000000000000000000000..953f597c1b2a168bd35d9b16e0df3f625268cf77 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_GetVLCBits.c @@ -0,0 +1,278 @@ +/** + * + * File Name: armVCM4P2_GetVLCBits.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Contains module for VLC get bits from the stream + * + */ + +#include "omxtypes.h" +#include "armOMX.h" + +#include "armVC.h" +#include "armCOMM.h" +#include "armCOMM_Bitstream.h" +#include "armVCM4P2_ZigZag_Tables.h" +#include "armVCM4P2_Huff_Tables_VLC.h" + + +/** + * Function: armVCM4P2_GetVLCBits + * + * Description: + * Performs escape mode decision based on the run, run+, level, level+ and + * last combinations. + * + * Remarks: + * + * Parameters: + * [in] ppBitStream pointer to the pointer to the current byte in + * the bit stream + * [in] pBitOffset pointer to the bit position in the byte pointed + * by *ppBitStream. Valid within 0 to 7 + * [in] start start indicates whether the encoding begins with + * 0th element or 1st. + * [in/out] pLast pointer to last status flag + * [in] runBeginSingleLevelEntriesL0 The run value from which level + * will be equal to 1: last == 0 + * [in] IndexBeginSingleLevelEntriesL0 Array index in the VLC table + * pointing to the + * runBeginSingleLevelEntriesL0 + * [in] runBeginSingleLevelEntriesL1 The run value from which level + * will be equal to 1: last == 1 + * [in] IndexBeginSingleLevelEntriesL1 Array index in the VLC table + * pointing to the + * runBeginSingleLevelEntriesL0 + * [in] pRunIndexTableL0 Run Index table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 0 + * [in] pVlcTableL0 VLC table for last == 0 + * [in] pRunIndexTableL1 Run Index table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 1 + * [in] pVlcTableL1 VLC table for last == 1 + * [in] pLMAXTableL0 Level MAX table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 0 + * [in] pLMAXTableL1 Level MAX table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 1 + * [in] pRMAXTableL0 Run MAX table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 0 + * [in] pRMAXTableL1 Run MAX table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 1 + * [out]pDst pointer to the coefficient buffer of current + * block. Should be 32-bit aligned + * + * Return Value: + * Standard OMXResult result. See enumeration for possible result codes. + * + */ + +OMXResult armVCM4P2_GetVLCBits ( + const OMX_U8 **ppBitStream, + OMX_INT * pBitOffset, + OMX_S16 * pDst, + OMX_INT shortVideoHeader, + OMX_U8 start, + OMX_U8 * pLast, + OMX_U8 runBeginSingleLevelEntriesL0, + OMX_U8 maxIndexForMultipleEntriesL0, + OMX_U8 maxRunForMultipleEntriesL1, + OMX_U8 maxIndexForMultipleEntriesL1, + const OMX_U8 * pRunIndexTableL0, + const ARM_VLC32 *pVlcTableL0, + const OMX_U8 * pRunIndexTableL1, + const ARM_VLC32 *pVlcTableL1, + const OMX_U8 * pLMAXTableL0, + const OMX_U8 * pLMAXTableL1, + const OMX_U8 * pRMAXTableL0, + const OMX_U8 * pRMAXTableL1, + const OMX_U8 * pZigzagTable +) +{ + OMX_U32 storeRun; + OMX_U8 tabIndex, markerBit; + OMX_S16 storeLevel; + OMX_U16 unpackRetIndex; + OMX_U8 i, fType, escape; + OMX_U8 sign = 0; + + /* Unpacking the bitstream and RLD */ + for (i = start; i < 64;) + { + escape = armLookAheadBits(ppBitStream, pBitOffset, 7); + if (escape != 3) + { + fType = 0; /* Not in escape mode */ + } + else + { + armSkipBits (ppBitStream, pBitOffset, 7); + if(shortVideoHeader) + { + *pLast = armGetBits(ppBitStream, pBitOffset, 1); + storeRun = armGetBits(ppBitStream, pBitOffset, 6); + storeLevel = armGetBits(ppBitStream, pBitOffset, 8); + + /* Ref to Table B-18 (c) in MPEG4 Standard- FLC code for */ + /* LEVEL when short_video_header is 1, the storeLevel is */ + /* a signed value and the sign and the unsigned value for */ + /* storeLevel need to be extracted and passed to arm */ + /* FillVLDBuffer function */ + + sign = (storeLevel & 0x80); + if(sign==0x80) + { + storeLevel=(storeLevel^0xff)+1; + sign=1; + + } + + armRetDataErrIf( storeLevel == 0 || sign*storeLevel == 128 , OMX_Sts_Err); /* Invalid FLC */ + armRetDataErrIf((i + storeRun) >= 64, OMX_Sts_Err); + armVCM4P2_FillVLDBuffer( + storeRun, + pDst, + storeLevel, + sign, + *pLast, + &i, + pZigzagTable); + return OMX_Sts_NoErr; + + } + if (armGetBits(ppBitStream, pBitOffset, 1)) + { + if (armGetBits(ppBitStream, pBitOffset, 1)) + { + fType = 3; + } + else + { + fType = 2; + } + } + else + { + fType = 1; + } + } + + if (fType < 3) + { + unpackRetIndex = armUnPackVLC32(ppBitStream, pBitOffset, + pVlcTableL0); + if (unpackRetIndex != ARM_NO_CODEBOOK_INDEX) + { + /* Decode run and level from the index */ + /* last = 0 */ + *pLast = 0; + if (unpackRetIndex > maxIndexForMultipleEntriesL0) + { + storeLevel = 1; + storeRun = (unpackRetIndex - maxIndexForMultipleEntriesL0) + + runBeginSingleLevelEntriesL0; + } + else + { + tabIndex = 1; + while (pRunIndexTableL0[tabIndex] <= unpackRetIndex) + { + tabIndex++; + } + storeRun = tabIndex - 1; + storeLevel = unpackRetIndex - pRunIndexTableL0[tabIndex - 1] + 1; + } + sign = (OMX_U8) armGetBits(ppBitStream, pBitOffset, 1); + + if (fType == 1) + { + storeLevel = (armAbs(storeLevel) + pLMAXTableL0[storeRun]); + } + else if (fType == 2) + { + storeRun = storeRun + pRMAXTableL0[storeLevel-1] + 1; + } + } + else + { + unpackRetIndex = armUnPackVLC32(ppBitStream, pBitOffset, + pVlcTableL1); + + armRetDataErrIf(unpackRetIndex == ARM_NO_CODEBOOK_INDEX, OMX_Sts_Err); + + /* Decode run and level from the index */ + /* last = 1 */ + *pLast = 1; + if (unpackRetIndex > maxIndexForMultipleEntriesL1) + { + storeLevel = 1; + storeRun = (unpackRetIndex - maxIndexForMultipleEntriesL1) + + maxRunForMultipleEntriesL1; + } + else + { + tabIndex = 1; + while (pRunIndexTableL1[tabIndex] <= unpackRetIndex) + { + tabIndex++; + } + storeRun = tabIndex - 1; + storeLevel = unpackRetIndex - pRunIndexTableL1[tabIndex - 1] + 1; + } + sign = (OMX_U8) armGetBits(ppBitStream, pBitOffset, 1); + + if (fType == 1) + { + storeLevel = (armAbs(storeLevel) + pLMAXTableL1[storeRun]); + } + else if (fType == 2) + { + storeRun = storeRun + pRMAXTableL1[storeLevel-1] + 1; + } + } + armRetDataErrIf((i + storeRun) >= 64, OMX_Sts_Err); + armVCM4P2_FillVLDBuffer( + storeRun, + pDst, + storeLevel, + sign, + *pLast, + &i, + pZigzagTable); + } + else + { + *pLast = armGetBits(ppBitStream, pBitOffset, 1); + storeRun = armGetBits(ppBitStream, pBitOffset, 6); + armRetDataErrIf((i + storeRun) >= 64, OMX_Sts_Err); + markerBit = armGetBits(ppBitStream, pBitOffset, 1); + armRetDataErrIf( markerBit == 0, OMX_Sts_Err); + storeLevel = armGetBits(ppBitStream, pBitOffset, 12); + if (storeLevel & 0x800) + { + storeLevel -= 4096; + } + armRetDataErrIf( storeLevel == 0 || storeLevel == -2048 , OMX_Sts_Err); /* Invalid FLC */ + armGetBits(ppBitStream, pBitOffset, 1); + armVCM4P2_FillVLDBuffer( + storeRun, + pDst, + storeLevel, + 0, /* Sign is not used, preprocessing done */ + *pLast, + &i, + pZigzagTable); + + } + } /* End of forloop for i */ + return OMX_Sts_NoErr; +} + +/* End of File */ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c new file mode 100644 index 0000000000000000000000000000000000000000..cd7e9e41887dfa45384ccbca5ed6aa6528da89bd --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c @@ -0,0 +1,495 @@ + /** + * + * File Name: armVCM4P2_Huff_Tables_VLC.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * File: armVCM4P2_Huff_Tables_VLC.c + * Description: Contains all the Huffman tables used in MPEG4 codec + * + */ + +#include "omxtypes.h" +#include "armOMX.h" + +#include "armCOMM_Bitstream.h" + +/* +* For Intra +* last = 0 +*/ +const OMX_U8 armVCM4P2_IntraL0RunIdx[11] = +{ + 0, 27, 37, 42, 46, 49, 52, + 55, 58, 60, 62 +}; + +/* Entry defined for all values +* for run = 0 to 14 +* Note: the last entry is to terminate while decoding +*/ +const ARM_VLC32 armVCM4P2_IntraVlcL0[68] = +{ + {2, 2}, + {3, 6}, + {4, 15}, + {5, 13}, + {5, 12}, + {6, 21}, + {6, 19}, + {6, 18}, + {7, 23}, + {8, 31}, + {8, 30}, + {8, 29}, + {9, 37}, + {9, 36}, + {9, 35}, + {9, 33}, + {10, 33}, + {10, 32}, + {10, 15}, + {10, 14}, + {11, 7}, + {11, 6}, + {11, 32}, + {11, 33}, + {12, 80}, + {12, 81}, + {12, 82}, + {4, 14}, + {6, 20}, + {7, 22}, + {8, 28}, + {9, 32}, + {9, 31}, + {10, 13}, + {11, 34}, + {12, 83}, + {12, 85}, + {5, 11}, + {7, 21}, + {9, 30}, + {10, 12}, + {12, 86}, + {6, 17}, + {8, 27}, + {9, 29}, + {10, 11}, + {6, 16}, + {9, 34}, + {10, 10}, + {6, 13}, + {9, 28}, + {10, 8}, + {7, 18}, + {9, 27}, + {12, 84}, + {7, 20}, + {9, 26}, + {12, 87}, + {8, 25}, + {10, 9}, + {8, 24}, + {11, 35}, + {8, 23}, + {9, 25}, + {9, 24}, + {10, 7}, + {12, 88}, + {0, 0} +}; + +/* +* For Intra +* last = 1 +*/ + +const OMX_U8 armVCM4P2_IntraL1RunIdx[8] = +{ + 0, 8, 11, 13, 15, 17, 19, 21 +}; + +/* Entry defined for all values +* for run = 0 to 20 +* * Note: the last entry is to terminate while decoding +*/ +const ARM_VLC32 armVCM4P2_IntraVlcL1[36] = +{ + {4, 7}, + {6, 12}, + {8, 22}, + {9, 23}, + {10, 6}, + {11, 5}, + {11, 4}, + {12, 89}, + {6, 15}, + {9, 22}, + {10, 5}, + {6, 14}, + {10, 4}, + {7, 17}, + {11, 36}, + {7, 16}, + {11, 37}, + {7, 19}, + {12, 90}, + {8, 21}, + {12, 91}, + {8, 20}, + {8, 19}, + {8, 26}, + {9, 21}, + {9, 20}, + {9, 19}, + {9, 18}, + {9, 17}, + {11, 38}, + {11, 39}, + {12, 92}, + {12, 93}, + {12, 94}, + {12, 95}, + {0, 0} +}; + +/* LMAX table for Intra (Last == 0)*/ +const OMX_U8 armVCM4P2_IntraL0LMAX[15] = +{ + 27, 10, 5, 4, 3, 3, 3, + 3, 2, 2, 1, 1, 1, 1, 1 +}; + +/* LMAX table for Intra (Last == 1)*/ +const OMX_U8 armVCM4P2_IntraL1LMAX[21] = +{ + 8, 3, 2, 2, 2, 2, 2, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1 +}; + +/* RMAX table for Intra (Last == 0) + Level - 1 Indexed +*/ +const OMX_U8 armVCM4P2_IntraL0RMAX[27] = +{ + 14, 9, 7, 3, 2, 1, 1, + 1, 1, 1, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0 +}; + +/* RMAX table for Intra (Last == 1) + Level - 1 Indexed +*/ +const OMX_U8 armVCM4P2_IntraL1RMAX[8] = +{ + 20, 6, 1, 0, 0, 0, 0, 0 +}; + +/* +* For Inter +* last = 0 +*/ +const OMX_U8 armVCM4P2_InterL0RunIdx[12] = +{ + 0, 12, 18, 22, 25, 28, + 31, 34, 36, 38, 40, 42 +}; + +/* Entry defined for all values +* for run = 0 to 26 +* Note: the last entry is to terminate while decoding +*/ +const ARM_VLC32 armVCM4P2_InterVlcL0[59] = +{ + {2, 2}, + {4, 15}, + {6, 21}, + {7, 23}, + {8, 31}, + {9, 37}, + {9, 36}, + {10, 33}, + {10, 32}, + {11, 7}, + {11, 6}, + {11, 32}, + {3, 6}, + {6, 20}, + {8, 30}, + {10, 15}, + {11, 33}, + {12, 80}, + {4, 14}, + {8, 29}, + {10, 14}, + {12, 81}, + {5, 13}, + {9, 35}, + {10, 13}, + {5, 12}, + {9, 34}, + {12, 82}, + {5, 11}, + {10, 12}, + {12, 83}, + {6, 19}, + {10, 11}, + {12, 84}, + {6, 18}, + {10, 10}, + {6, 17}, + {10, 9}, + {6, 16}, + {10, 8}, + {7, 22}, + {12, 85}, + {7, 21}, + {7, 20}, + {8, 28}, + {8, 27}, + {9, 33}, + {9, 32}, + {9, 31}, + {9, 30}, + {9, 29}, + {9, 28}, + {9, 27}, + {9, 26}, + {11, 34}, + {11, 35}, + {12, 86}, + {12, 87}, + {0, 0} +}; + + +/* +* For Intra +* last = 1 +*/ + +const OMX_U8 armVCM4P2_InterL1RunIdx[3] = +{ + 0, 3, 5 +}; + +/* Entry defined for all values +* for run = 0 to 40 +* Note: the last entry is to terminate while decoding +*/ +const ARM_VLC32 armVCM4P2_InterVlcL1[45] = +{ + {4, 7}, + {9, 25}, + {11, 5}, + {6, 15}, + {11, 4}, + {6, 14}, + {6, 13}, + {6, 12}, + {7, 19}, + {7, 18}, + {7, 17}, + {7, 16}, + {8, 26}, + {8, 25}, + {8, 24}, + {8, 23}, + {8, 22}, + {8, 21}, + {8, 20}, + {8, 19}, + {9, 24}, + {9, 23}, + {9, 22}, + {9, 21}, + {9, 20}, + {9, 19}, + {9, 18}, + {9, 17}, + {10, 7}, + {10, 6}, + {10, 5}, + {10, 4}, + {11, 36}, + {11, 37}, + {11, 38}, + {11, 39}, + {12, 88}, + {12, 89}, + {12, 90}, + {12, 91}, + {12, 92}, + {12, 93}, + {12, 94}, + {12, 95}, + { 0, 0} +}; + +/* LMAX table for Intra (Last == 0)*/ +const OMX_U8 armVCM4P2_InterL0LMAX[27] = +{ + 12, 6, 4, 3, 3, 3, 3, 2, + 2, 2, 2, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, +}; + +/* LMAX table for Intra (Last == 1)*/ +const OMX_U8 armVCM4P2_InterL1LMAX[41] = +{ + 3, 2, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, +}; + +/* RMAX table for Intra (Last == 0) + Level - 1 Indexed +*/ +const OMX_U8 armVCM4P2_InterL0RMAX[12] = +{ + 26, 10, 6, 2, 1, 1, + 0, 0, 0, 0, 0, 0 +}; + +/* RMAX table for Intra (Last == 1) + Level - 1 Indexed +*/ +const OMX_U8 armVCM4P2_InterL1RMAX[3] = +{ + 40, 1, 0 +}; + +/* +* For Intra - Luminance +*/ + +const ARM_VLC32 armVCM4P2_aIntraDCLumaIndex[14] = +{ + {3, 3}, + {2, 3}, + {2, 2}, + {3, 2}, + {3, 1}, + {4, 1}, + {5, 1}, + {6, 1}, + {7, 1}, + {8, 1}, + {9, 1}, + {10, 1}, + {11, 1}, + {0, 0} +}; + +/* +* For Intra - Chrominance +*/ + +const ARM_VLC32 armVCM4P2_aIntraDCChromaIndex[14] = +{ + {2, 3}, + {2, 2}, + {2, 1}, + {3, 1}, + {4, 1}, + {5, 1}, + {6, 1}, + {7, 1}, + {8, 1}, + {9, 1}, + {10, 1}, + {11, 1}, + {12, 1}, + {0, 0} +}; + +/* + * Motion vector decoding table + */ + +const ARM_VLC32 armVCM4P2_aVlcMVD[66] = +{ + {13, 5}, + {13, 7}, + {12, 5}, + {12, 7}, + {12, 9}, + {12, 11}, + {12, 13}, + {12, 15}, + {11, 9}, + {11, 11}, + {11, 13}, + {11, 15}, + {11, 17}, + {11, 19}, + {11, 21}, + {11, 23}, + {11, 25}, + {11, 27}, + {11, 29}, + {11, 31}, + {11, 33}, + {11, 35}, + {10, 19}, + {10, 21}, + {10, 23}, + {8, 7}, + {8, 9}, + {8, 11}, + {7, 7}, + {5, 3}, + {4, 3}, + {3, 3}, + {1, 1}, + {3, 2}, + {4, 2}, + {5, 2}, + {7, 6}, + {8, 10}, + {8, 8}, + {8, 6}, + {10, 22}, + {10, 20}, + {10, 18}, + {11, 34}, + {11, 32}, + {11, 30}, + {11, 28}, + {11, 26}, + {11, 24}, + {11, 22}, + {11, 20}, + {11, 18}, + {11, 16}, + {11, 14}, + {11, 12}, + {11, 10}, + {11, 8}, + {12, 14}, + {12, 12}, + {12, 10}, + {12, 8}, + {12, 6}, + {12, 4}, + {13, 6}, + {13, 4}, + { 0, 0} +}; + +/* End of file */ + + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_PutVLCBits.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_PutVLCBits.c new file mode 100644 index 0000000000000000000000000000000000000000..ca9efec70bb70004b169141c5e09a12612ff83c4 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_PutVLCBits.c @@ -0,0 +1,200 @@ +/** + * + * File Name: armVCM4P2_PutVLCBits.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Contains module for VLC put bits to bitstream + * + */ + +#include "omxtypes.h" +#include "armOMX.h" + +#include "armVC.h" +#include "armCOMM.h" +#include "armCOMM_Bitstream.h" +#include "armVCM4P2_ZigZag_Tables.h" +#include "armVCM4P2_Huff_Tables_VLC.h" + + +/** + * Function: armVCM4P2_PutVLCBits + * + * Description: + * Checks the type of Escape Mode and put encoded bits for + * quantized DCT coefficients. + * + * Remarks: + * + * Parameters: + * [in] ppBitStream pointer to the pointer to the current byte in + * the bit stream + * [in] pBitOffset pointer to the bit position in the byte pointed + * by *ppBitStream. Valid within 0 to 7 + * [in] shortVideoHeader binary flag indicating presence of short_video_header; escape modes 0-3 are used if shortVideoHeader==0, + * and escape mode 4 is used when shortVideoHeader==1. + * [in] start start indicates whether the encoding begins with + * 0th element or 1st. + * [in] maxStoreRunL0 Max store possible (considering last and inter/intra) + * for last = 0 + * [in] maxStoreRunL1 Max store possible (considering last and inter/intra) + * for last = 1 + * [in] maxRunForMultipleEntriesL0 + * The run value after which level + * will be equal to 1: + * (considering last and inter/intra status) for last = 0 + * [in] maxRunForMultipleEntriesL1 + * The run value after which level + * will be equal to 1: + * (considering last and inter/intra status) for last = 1 + * [in] pRunIndexTableL0 Run Index table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 0 + * [in] pVlcTableL0 VLC table for last == 0 + * [in] pRunIndexTableL1 Run Index table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 1 + * [in] pVlcTableL1 VLC table for last == 1 + * [in] pLMAXTableL0 Level MAX table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 0 + * [in] pLMAXTableL1 Level MAX table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 1 + * [in] pRMAXTableL0 Run MAX table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 0 + * [in] pRMAXTableL1 Run MAX table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 1 + * [out] pQDctBlkCoef pointer to the quantized DCT coefficient + * [out] ppBitStream *ppBitStream is updated after the block is encoded + * so that it points to the current byte in the bit + * stream buffer. + * [out] pBitOffset *pBitOffset is updated so that it points to the + * current bit position in the byte pointed by + * *ppBitStream. + * + * Return Value: + * Standard OMXResult result. See enumeration for possible result codes. + * + */ + + +OMXResult armVCM4P2_PutVLCBits ( + OMX_U8 **ppBitStream, + OMX_INT * pBitOffset, + const OMX_S16 *pQDctBlkCoef, + OMX_INT shortVideoHeader, + OMX_U8 start, + OMX_U8 maxStoreRunL0, + OMX_U8 maxStoreRunL1, + OMX_U8 maxRunForMultipleEntriesL0, + OMX_U8 maxRunForMultipleEntriesL1, + const OMX_U8 * pRunIndexTableL0, + const ARM_VLC32 *pVlcTableL0, + const OMX_U8 * pRunIndexTableL1, + const ARM_VLC32 *pVlcTableL1, + const OMX_U8 * pLMAXTableL0, + const OMX_U8 * pLMAXTableL1, + const OMX_U8 * pRMAXTableL0, + const OMX_U8 * pRMAXTableL1, + const OMX_U8 * pZigzagTable +) +{ + + OMX_U32 storeRun = 0, run, storeRunPlus; + OMX_U8 last = 0, first = 1, fMode; + OMX_S16 level, storeLevel = 0, storeLevelPlus; + OMX_INT i; + + /* RLE encoding and packing the bits into the streams */ + for (i = start, run=0; i < 64; i++) + { + level = pQDctBlkCoef[pZigzagTable[i]]; + + /* Counting the run */ + if (level == 0) + { + run++; + } + + /* Found a non-zero coeff */ + else + { + if (first == 0) + { + last = 0; + + /* Check for a valid entry in the VLC table */ + storeLevelPlus = armSignCheck(storeLevel) * + (armAbs(storeLevel) - pLMAXTableL0[storeRun]); + storeRunPlus = storeRun - + (pRMAXTableL0[armAbs(storeLevel) - 1] + 1); + + fMode = armVCM4P2_CheckVLCEscapeMode( + storeRun, + storeRunPlus, + storeLevel, + storeLevelPlus, + maxStoreRunL0, + maxRunForMultipleEntriesL0, + shortVideoHeader, + pRunIndexTableL0); + + armVCM4P2_FillVLCBuffer ( + ppBitStream, + pBitOffset, + storeRun, + storeLevel, + storeRunPlus, + storeLevelPlus, + fMode, + last, + maxRunForMultipleEntriesL0, + pRunIndexTableL0, + pVlcTableL0); + } + storeLevel = level; + storeRun = run; + first = 0; + run = 0; + } + + } /* end of for loop for 64 elements */ + + /* writing the last element */ + last = 1; + + /* Check for a valid entry in the VLC table */ + storeLevelPlus = armSignCheck(storeLevel) * + (armAbs(storeLevel) - pLMAXTableL1[run]); + storeRunPlus = storeRun - + (pRMAXTableL1[armAbs(storeLevel) - 1] + 1); + fMode = armVCM4P2_CheckVLCEscapeMode( + storeRun, + storeRunPlus, + storeLevel, + storeLevelPlus, + maxStoreRunL1, + maxRunForMultipleEntriesL1, + shortVideoHeader, + pRunIndexTableL1); + + armVCM4P2_FillVLCBuffer ( + ppBitStream, + pBitOffset, + storeRun, + storeLevel, + storeRunPlus, + storeLevelPlus, + fMode, + last, + maxRunForMultipleEntriesL1, + pRunIndexTableL1, + pVlcTableL1); + return OMX_Sts_NoErr; +} + +/* End of File */ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_SetPredDir.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_SetPredDir.c new file mode 100644 index 0000000000000000000000000000000000000000..a9cd0089029dbfb8a312b0ebc338f6ab3a09a096 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_SetPredDir.c @@ -0,0 +1,89 @@ +/** + * + * File Name: armVCM4P2_SetPredDir.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Contains module for detecting the prediction direction + * + */ + +#include "omxtypes.h" +#include "armOMX.h" + +#include "armVC.h" +#include "armCOMM.h" + +/** + * Function: armVCM4P2_SetPredDir + * + * Description: + * Performs detecting the prediction direction + * + * Remarks: + * + * Parameters: + * [in] blockIndex block index indicating the component type and + * position as defined in subclause 6.1.3.8, of ISO/IEC + * 14496-2. Furthermore, indexes 6 to 9 indicate the + * alpha blocks spatially corresponding to luminance + * blocks 0 to 3 in the same macroblock. + * [in] pCoefBufRow pointer to the coefficient row buffer + * [in] pQpBuf pointer to the quantization parameter buffer + * [out] predQP quantization parameter of the predictor block + * [out] predDir indicates the prediction direction which takes one + * of the following values: + * OMX_VC_HORIZONTAL predict horizontally + * OMX_VC_VERTICAL predict vertically + * + * Return Value: + * Standard OMXResult result. See enumeration for possible result codes. + * + */ + +OMXResult armVCM4P2_SetPredDir( + OMX_INT blockIndex, + OMX_S16 *pCoefBufRow, + OMX_S16 *pCoefBufCol, + OMX_INT *predDir, + OMX_INT *predQP, + const OMX_U8 *pQpBuf +) +{ + OMX_U8 blockDCLeft; + OMX_U8 blockDCTop; + OMX_U8 blockDCTopLeft; + + if (blockIndex == 3) + { + blockDCTop = *(pCoefBufCol - 8); + } + else + { + blockDCTop = *pCoefBufRow; + } + blockDCLeft = *pCoefBufCol; + blockDCTopLeft = *(pCoefBufRow - 8); + + if (armAbs(blockDCLeft - blockDCTopLeft) < armAbs(blockDCTopLeft \ + - blockDCTop)) + { + *predDir = OMX_VC_VERTICAL; + *predQP = pQpBuf[1]; + } + else + { + *predDir = OMX_VC_HORIZONTAL; + *predQP = pQpBuf[0]; + } + return OMX_Sts_NoErr; +} + + +/*End of File*/ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c new file mode 100644 index 0000000000000000000000000000000000000000..a247c690ef95717b79e2156eb160f86148810986 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c @@ -0,0 +1,58 @@ + /** + * + * File Name: armVCM4P2_Zigzag_Tables.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * File: armVCM4P2_ZigZag_Tables.c + * Description: Contains the zigzag tables + * + */ + +#include "omxtypes.h" + +const OMX_U8 armVCM4P2_aClassicalZigzagScan [64] = +{ + 0, 1, 8, 16, 9, 2, 3, 10, + 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, + 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, + 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, + 53, 60, 61, 54, 47, 55, 62, 63 +}; + +const OMX_U8 armVCM4P2_aHorizontalZigzagScan [64] = +{ + 0, 1, 2, 3, 8, 9, 16, 17, + 10, 11, 4, 5, 6, 7, 15, 14, + 13, 12, 19, 18, 24, 25, 32, 33, + 26, 27, 20, 21, 22, 23, 28, 29, + 30, 31, 34, 35, 40, 41, 48, 49, + 42, 43, 36, 37, 38, 39, 44, 45, + 46, 47, 50, 51, 56, 57, 58, 59, + 52, 53, 54, 55, 60, 61, 62, 63 +}; + +const OMX_U8 armVCM4P2_aVerticalZigzagScan [64] = +{ + 0, 8, 16, 24, 1, 9, 2, 10, + 17, 25, 32, 40, 48, 56, 57, 49, + 41, 33, 26, 18, 3, 11, 4, 12, + 19, 27, 34, 42, 50, 58, 35, 43, + 51, 59, 20, 28, 5, 13, 6, 14, + 21, 29, 36, 44, 52, 60, 37, 45, + 53, 61, 22, 30, 7, 15, 23, 31, + 38, 46, 54, 62, 39, 47, 55, 63 +}; + + +/* End of file */ + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_BlockMatch_Half_16x16.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_BlockMatch_Half_16x16.c new file mode 100644 index 0000000000000000000000000000000000000000..dcd3ce1f5f08579d61dc1dc4663f9f8edb120ca3 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_BlockMatch_Half_16x16.c @@ -0,0 +1,111 @@ +/** + * + * File Name: omxVCM4P2_BlockMatch_Half_16x16.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Contains modules for Block matching, a full search algorithm + * is implemented + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armVC.h" +#include "armCOMM.h" + +/** + * Function: omxVCM4P2_BlockMatch_Half_16x16 (6.2.4.2.3) + * + * Description: + * Performs a 16x16 block match with half-pixel resolution. Returns the + * estimated motion vector and associated minimum SAD. This function + * estimates the half-pixel motion vector by interpolating the integer + * resolution motion vector referenced by the input parameter pSrcDstMV, i.e., + * the initial integer MV is generated externally. The input parameters + * pSrcRefBuf and pSearchPointRefPos should be shifted by the winning MV of + * 16x16 integer search prior to calling BlockMatch_Half_16x16. The function + * BlockMatch_Integer_16x16 may be used for integer motion estimation. + * + * Input Arguments: + * + * pSrcRefBuf - pointer to the reference Y plane; points to the reference + * macroblock that corresponds to the location of the current + * macroblock in the current plane. + * refWidth - width of the reference plane + * pRefRect - reference plane valid region rectangle + * pSrcCurrBuf - pointer to the current block in the current macroblock + * buffer extracted from the original plane (linear array, 256 + * entries); must be aligned on a 16-byte boundary. The number of + * bytes between lines (step) is 16. + * pSearchPointRefPos - position of the starting point for half pixel + * search (specified in terms of integer pixel units) in the + * reference plane, i.e., the reference position pointed to by the + * predicted motion vector. + * rndVal - rounding control parameter: 0 - disabled; 1 - enabled. + * pSrcDstMV - pointer to the initial MV estimate; typically generated + * during a prior 16X16 integer search; specified in terms of + * half-pixel units. + * + * Output Arguments: + * + * pSrcDstMV - pointer to estimated MV + * pDstSAD - pointer to minimum SAD + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments. Returned if one of the following + * conditions is true: + * - at least one of the following pointers is NULL: pSrcRefBuf, + * pRefRect, pSrcCurrBuff, pSearchPointRefPos, pSrcDstMV. + * - pSrcCurrBuf is not 16-byte aligned, or + * + */ + +OMXResult omxVCM4P2_BlockMatch_Half_16x16( + const OMX_U8 *pSrcRefBuf, + OMX_INT refWidth, + const OMXRect *pRefRect, + const OMX_U8 *pSrcCurrBuf, + const OMXVCM4P2Coordinate *pSearchPointRefPos, + OMX_INT rndVal, + OMXVCMotionVector *pSrcDstMV, + OMX_INT *pDstSAD +) +{ + + /* For a blocksize of 16x16 */ + OMX_U8 BlockSize = 16; + + /* Argument error checks */ + armRetArgErrIf(pSrcRefBuf == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pRefRect == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pSrcCurrBuf == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pSearchPointRefPos == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pSrcDstMV == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(!armIs16ByteAligned(pSrcCurrBuf), OMX_Sts_BadArgErr); + + return (armVCM4P2_BlockMatch_Half( + pSrcRefBuf, + refWidth, + pRefRect, + pSrcCurrBuf, + pSearchPointRefPos, + rndVal, + pSrcDstMV, + pDstSAD, + BlockSize)); + + +} + +/* End of file */ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_BlockMatch_Half_8x8.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_BlockMatch_Half_8x8.c new file mode 100644 index 0000000000000000000000000000000000000000..6996e6da612339ae50c31ef768ea6632abb2fd63 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_BlockMatch_Half_8x8.c @@ -0,0 +1,109 @@ +/** + * + * File Name: omxVCM4P2_BlockMatch_Half_8x8.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Contains modules for Block matching, a full search algorithm + * is implemented + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armVC.h" +#include "armCOMM.h" + + +/** + * Function: omxVCM4P2_BlockMatch_Half_8x8 (6.2.4.2.4) + * + * Description: + * Performs an 8x8 block match with half-pixel resolution. Returns the + * estimated motion vector and associated minimum SAD. This function + * estimates the half-pixel motion vector by interpolating the integer + * resolution motion vector referenced by the input parameter pSrcDstMV, i.e., + * the initial integer MV is generated externally. The input parameters + * pSrcRefBuf and pSearchPointRefPos should be shifted by the winning MV of + * 8x8 integer search prior to calling BlockMatch_Half_8x8. The function + * BlockMatch_Integer_8x8 may be used for integer motion estimation. + * + * Input Arguments: + * + * pSrcRefBuf - pointer to the reference Y plane; points to the reference + * block that corresponds to the location of the current 8x8 block + * in the current plane. + * refWidth - width of the reference plane + * pRefRect - reference plane valid region rectangle + * pSrcCurrBuf - pointer to the current block in the current macroblock + * buffer extracted from the original plane (linear array, 128 + * entries); must be aligned on a 8-byte boundary. The number of + * bytes between lines (step) is 16. + * pSearchPointRefPos - position of the starting point for half pixel + * search (specified in terms of integer pixel units) in the + * reference plane. + * rndVal - rounding control parameter: 0 - disabled; 1 - enabled. + * pSrcDstMV - pointer to the initial MV estimate; typically generated + * during a prior 8x8 integer search, specified in terms of + * half-pixel units. + * + * Output Arguments: + * + * pSrcDstMV - pointer to estimated MV + * pDstSAD - pointer to minimum SAD + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments. Returned if one of the following + * conditions is true: + * - at least one of the following pointers is NULL: + * pSrcRefBuf, pRefRect, pSrcCurrBuff, pSearchPointRefPos, pSrcDstMV + * - pSrcCurrBuf is not 8-byte aligned + * + */ + +OMXResult omxVCM4P2_BlockMatch_Half_8x8( + const OMX_U8 *pSrcRefBuf, + OMX_INT refWidth, + const OMXRect *pRefRect, + const OMX_U8 *pSrcCurrBuf, + const OMXVCM4P2Coordinate *pSearchPointRefPos, + OMX_INT rndVal, + OMXVCMotionVector *pSrcDstMV, + OMX_INT *pDstSAD +) +{ + /* For a blocksize of 8x8 */ + OMX_U8 BlockSize = 8; + + /* Argument error checks */ + armRetArgErrIf(pSrcRefBuf == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pRefRect == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pSrcCurrBuf == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pSearchPointRefPos == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pSrcDstMV == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(!armIs8ByteAligned(pSrcCurrBuf), OMX_Sts_BadArgErr); + + return (armVCM4P2_BlockMatch_Half( + pSrcRefBuf, + refWidth, + pRefRect, + pSrcCurrBuf, + pSearchPointRefPos, + rndVal, + pSrcDstMV, + pDstSAD, + BlockSize)); + +} + +/* End of file */ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_BlockMatch_Integer_16x16.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_BlockMatch_Integer_16x16.c new file mode 100644 index 0000000000000000000000000000000000000000..e714ef1313bd3bba6fc80c513cc1e1c9b4c2f3ce --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_BlockMatch_Integer_16x16.c @@ -0,0 +1,114 @@ +/** + * + * File Name: omxVCM4P2_BlockMatch_Integer_16x16.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Contains modules for Block matching, a full search algorithm + * is implemented + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armVC.h" +#include "armCOMM.h" + +/** + * Function: omxVCM4P2_BlockMatch_Integer_16x16 (6.2.4.2.1) + * + * Description: + * Performs a 16x16 block search; estimates motion vector and associated + * minimum SAD. Both the input and output motion vectors are represented using + * half-pixel units, and therefore a shift left or right by 1 bit may be + * required, respectively, to match the input or output MVs with other + * functions that either generate output MVs or expect input MVs represented + * using integer pixel units. + * + * Input Arguments: + * + * pSrcRefBuf - pointer to the reference Y plane; points to the reference + * MB that corresponds to the location of the current macroblock in + * the current plane. + * refWidth - width of the reference plane + * pRefRect - pointer to the valid reference plane rectangle; coordinates + * are specified relative to the image origin. Rectangle + * boundaries may extend beyond image boundaries if the image has + * been padded. For example, if padding extends 4 pixels beyond + * frame border, then the value for the left border could be set to + * -4. + * pSrcCurrBuf - pointer to the current block in the current macroblock + * buffer extracted from the original plane (linear array, 256 + * entries); must be aligned on a 16-byte boundary. The number of + * bytes between lines (step) is 16. + * pCurrPointPos - position of the current macroblock in the current plane + * pSrcPreMV - pointer to predicted motion vector; NULL indicates no + * predicted MV + * pSrcPreSAD - pointer to SAD associated with the predicted MV (referenced + * by pSrcPreMV); may be set to NULL if unavailable. + * pMESpec - vendor-specific motion estimation specification structure; + * must have been allocated and then initialized using + * omxVCM4P2_MEInit prior to calling the block matching function. + * + * Output Arguments: + * + * pDstMV - pointer to estimated MV + * pDstSAD - pointer to minimum SAD + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments. Returned if one of the following + * conditions is true: + * - at least one of the following pointers is NULL: pSrcRefBuf, + * pRefRect, pSrcCurrBuff, pCurrPointPos, pDstMV, pDstSAD or + * pMESpec, or + * - pSrcCurrBuf is not 16-byte aligned + * + */ + +OMXResult omxVCM4P2_BlockMatch_Integer_16x16( + const OMX_U8 *pSrcRefBuf, + OMX_INT refWidth, + const OMXRect *pRefRect, + const OMX_U8 *pSrcCurrBuf, + const OMXVCM4P2Coordinate *pCurrPointPos, + const OMXVCMotionVector *pSrcPreMV, + const OMX_INT *pSrcPreSAD, + void *pMESpec, + OMXVCMotionVector *pDstMV, + OMX_INT *pDstSAD +) +{ + + OMX_U8 BlockSize = 16; + + /* Argument error checks */ + armRetArgErrIf(!armIs16ByteAligned(pSrcCurrBuf), OMX_Sts_BadArgErr); + + return ( armVCM4P2_BlockMatch_Integer( + pSrcRefBuf, + refWidth, + pRefRect, + pSrcCurrBuf, + pCurrPointPos, + pSrcPreMV, + pSrcPreSAD, + pMESpec, + pDstMV, + pDstSAD, + BlockSize) + ); + + +} + +/* End of file */ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_BlockMatch_Integer_8x8.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_BlockMatch_Integer_8x8.c new file mode 100644 index 0000000000000000000000000000000000000000..607e64cbff5787e5d8e83770f950a3fab268a296 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_BlockMatch_Integer_8x8.c @@ -0,0 +1,110 @@ +/** + * + * File Name: omxVCM4P2_BlockMatch_Integer_8x8.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Contains modules for Block matching, a full search algorithm + * is implemented + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armVC.h" +#include "armCOMM.h" + +/** + * Function: omxVCM4P2_BlockMatch_Integer_8x8 (6.2.4.2.2) + * + * Description: + * Performs an 8x8 block search; estimates motion vector and associated + * minimum SAD. Both the input and output motion vectors are represented + * using half-pixel units, and therefore a shift left or right by 1 bit may be + * required, respectively, to match the input or output MVs with other + * functions that either generate output MVs or expect input MVs represented + * using integer pixel units. + * + * Input Arguments: + * + * pSrcRefBuf - pointer to the reference Y plane; points to the reference + * block that corresponds to the location of the current 8x8 block + * in the current plane. + * refWidth - width of the reference plane + * pRefRect - pointer to the valid reference plane rectangle; coordinates + * are specified relative to the image origin. Rectangle + * boundaries may extend beyond image boundaries if the image has + * been padded. + * pSrcCurrBuf - pointer to the current block in the current macroblock + * buffer extracted from the original plane (linear array, 128 + * entries); must be aligned on an 8-byte boundary. The number of + * bytes between lines (step) is 16 bytes. + * pCurrPointPos - position of the current block in the current plane + * pSrcPreMV - pointer to predicted motion vector; NULL indicates no + * predicted MV + * pSrcPreSAD - pointer to SAD associated with the predicted MV (referenced + * by pSrcPreMV); may be set to NULL if unavailable. + * pMESpec - vendor-specific motion estimation specification structure; + * must have been allocated and then initialized using + * omxVCM4P2_MEInit prior to calling the block matching function. + * + * Output Arguments: + * + * pDstMV - pointer to estimated MV + * pDstSAD - pointer to minimum SAD + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments. Returned if one of the following + * conditions is true: + * - at least one of the following pointers is NULL: pSrcRefBuf, + * pRefRect, pSrcCurrBuff, pCurrPointPos, pDstMV, pDstSAD or + * pMESpec, or + * - pSrcCurrBuf is not 8-byte aligned + * + */ + +OMXResult omxVCM4P2_BlockMatch_Integer_8x8( + const OMX_U8 *pSrcRefBuf, + OMX_INT refWidth, + const OMXRect *pRefRect, + const OMX_U8 *pSrcCurrBuf, + const OMXVCM4P2Coordinate *pCurrPointPos, + const OMXVCMotionVector *pSrcPreMV, + const OMX_INT *pSrcPreSAD, + void *pMESpec, + OMXVCMotionVector *pDstMV, + OMX_INT *pDstSAD +) +{ + OMX_U8 BlockSize = 8; + + /* Argument error checks */ + armRetArgErrIf(!armIs8ByteAligned(pSrcCurrBuf), OMX_Sts_BadArgErr); + + return ( armVCM4P2_BlockMatch_Integer( + pSrcRefBuf, + refWidth, + pRefRect, + pSrcCurrBuf, + pCurrPointPos, + pSrcPreMV, + pSrcPreSAD, + pMESpec, + pDstMV, + pDstSAD, + BlockSize) + ); + +} + +/* End of file */ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DCT8x8blk.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DCT8x8blk.c new file mode 100644 index 0000000000000000000000000000000000000000..a077ac86b80bb33fec2316af27cf2a0925fbad42 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DCT8x8blk.c @@ -0,0 +1,87 @@ +/** + * + * File Name: omxVCM4P2_DCT8x8blk.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Contains modules for 8x8 block DCT + * + */ + +#include +#include "omxtypes.h" +#include "armOMX.h" + +#include "armCOMM.h" +#include "armVCM4P2_DCT_Table.h" + +/** + * Function: omxVCM4P2_DCT8x8blk (6.2.4.4.1) + * + * Description: + * Computes a 2D forward DCT for a single 8x8 block, as defined in + * [ISO14496-2]. + * + * Input Arguments: + * + * pSrc - pointer to the start of the linearly arranged input buffer; must + * be aligned on a 16-byte boundary. Input values (pixel + * intensities) are valid in the range [-255,255]. + * + * Output Arguments: + * + * pDst - pointer to the start of the linearly arranged output buffer; must + * be aligned on a 16-byte boundary. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments, returned if: + * - pSrc or pDst is NULL. + * - pSrc or pDst is not 16-byte aligned. + * + */ + +OMXResult omxVCM4P2_DCT8x8blk (const OMX_S16 *pSrc, OMX_S16 *pDst) +{ + OMX_INT x, y, u, v; + + /* Argument error checks */ + armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(!armIs16ByteAligned(pSrc), OMX_Sts_BadArgErr); + armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(!armIs16ByteAligned(pDst), OMX_Sts_BadArgErr); + + + for (u = 0; u < 8; u++) + { + for (v = 0; v < 8; v++) + { + OMX_F64 sum = 0.0; + for (x = 0; x < 8; x++) + { + for (y = 0; y < 8; y++) + { + sum += pSrc[(x * 8) + y] * + armVCM4P2_preCalcDCTCos[x][u] * + armVCM4P2_preCalcDCTCos[y][v]; + } + } + pDst[(u * 8) + v]= armRoundFloatToS16 (sum); + } + } + + return OMX_Sts_NoErr; +} + + + +/* End of file */ + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c new file mode 100644 index 0000000000000000000000000000000000000000..51f7babbd629dd779a69c14f69f2469e55ce254c --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c @@ -0,0 +1,115 @@ +/** + * + * File Name: omxVCM4P2_DecodeBlockCoef_Inter.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Contains modules for inter reconstruction + * + */ + + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" + + +/** + * Function: omxVCM4P2_DecodeBlockCoef_Inter (6.2.5.4.2) + * + * Description: + * Decodes the INTER block coefficients. This function performs inverse + * quantization, inverse zigzag positioning, and IDCT (with appropriate + * clipping on each step) on the coefficients. The results (residuals) are + * placed in a contiguous array of 64 elements. For INTER block, the output + * buffer holds the residuals for further reconstruction. + * + * Input Arguments: + * + * ppBitStream - pointer to the pointer to the current byte in the bit + * stream buffer. There is no boundary check for the bit stream + * buffer. + * pBitOffset - pointer to the bit position in the byte pointed to by + * *ppBitStream. *pBitOffset is valid within [0-7] + * QP - quantization parameter + * shortVideoHeader - binary flag indicating presence of + * short_video_header; shortVideoHeader==1 selects linear intra DC + * mode, and shortVideoHeader==0 selects non linear intra DC mode. + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after the block is decoded, so + * that it points to the current byte in the bit stream buffer + * pBitOffset - *pBitOffset is updated so that it points to the current bit + * position in the byte pointed by *ppBitStream + * pDst - pointer to the decoded residual buffer (a contiguous array of 64 + * elements of OMX_S16 data type); must be aligned on a 16-byte + * boundary. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments, if: + * - At least one of the following pointers is Null: + * ppBitStream, *ppBitStream, pBitOffset , pDst + * - *pBitOffset exceeds [0,7] + * - QP <= 0. + * - pDst is not 16-byte aligned + * OMX_Sts_Err - status error. Refer to OMX_Sts_Err of DecodeVLCZigzag_Inter . + * + */ +OMXResult omxVCM4P2_DecodeBlockCoef_Inter( + const OMX_U8 ** ppBitStream, + OMX_INT * pBitOffset, + OMX_S16 * pDst, + OMX_INT QP, + OMX_INT shortVideoHeader +) +{ + /* 64 elements are needed but to align it to 16 bytes need + 15 more elements of padding */ + OMX_S16 tempBuf[79]; + OMX_S16 *pTempBuf1; + OMXResult errorCode; + /* Aligning the local buffers */ + pTempBuf1 = armAlignTo16Bytes(tempBuf); + + /* Argument error checks */ + armRetArgErrIf(ppBitStream == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(*ppBitStream == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pBitOffset == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(!armIs16ByteAligned(pDst), OMX_Sts_BadArgErr); + armRetArgErrIf(((QP <= 0) || (QP >= 32)), OMX_Sts_BadArgErr); + armRetArgErrIf(((*pBitOffset < 0) || (*pBitOffset > 7)), OMX_Sts_BadArgErr); + + + /* VLD and zigzag */ + errorCode = omxVCM4P2_DecodeVLCZigzag_Inter(ppBitStream, pBitOffset, + pTempBuf1,shortVideoHeader); + armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode); + + /* Dequantization */ + errorCode = omxVCM4P2_QuantInvInter_I( + pTempBuf1, + QP); + armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode); + + /* Inverse transform */ + errorCode = omxVCM4P2_IDCT8x8blk(pTempBuf1, pDst); + armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode); + + return OMX_Sts_NoErr; +} + +/* End of file */ + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c new file mode 100644 index 0000000000000000000000000000000000000000..a0b237673f0b72711be02f0c6a13ab24dbb53730 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c @@ -0,0 +1,225 @@ +/** + * + * File Name: omxVCM4P2_DecodeBlockCoef_Intra.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Contains modules for intra reconstruction + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armVC.h" + +/** + * Function: omxVCM4P2_DecodeBlockCoef_Intra (6.2.5.4.1) + * + * Description: + * Decodes the INTRA block coefficients. Inverse quantization, inversely + * zigzag positioning, and IDCT, with appropriate clipping on each step, are + * performed on the coefficients. The results are then placed in the output + * frame/plane on a pixel basis. Note: This function will be used only when + * at least one non-zero AC coefficient of current block exists in the bit + * stream. The DC only condition will be handled in another function. + * + * + * Input Arguments: + * + * ppBitStream - pointer to the pointer to the current byte in the bit + * stream buffer. There is no boundary check for the bit stream + * buffer. + * pBitOffset - pointer to the bit position in the byte pointed to by + * *ppBitStream. *pBitOffset is valid within [0-7]. + * step - width of the destination plane + * pCoefBufRow - pointer to the coefficient row buffer; must be aligned on + * an 8-byte boundary. + * pCoefBufCol - pointer to the coefficient column buffer; must be aligned + * on an 8-byte boundary. + * curQP - quantization parameter of the macroblock which the current block + * belongs to + * pQPBuf - pointer to the quantization parameter buffer + * blockIndex - block index indicating the component type and position as + * defined in [ISO14496-2], subclause 6.1.3.8, Figure 6-5. + * intraDCVLC - a code determined by intra_dc_vlc_thr and QP. This allows a + * mechanism to switch between two VLC for coding of Intra DC + * coefficients as per [ISO14496-2], Table 6-21. + * ACPredFlag - a flag equal to ac_pred_flag (of luminance) indicating if + * the ac coefficients of the first row or first column are + * differentially coded for intra coded macroblock. + * shortVideoHeader - binary flag indicating presence of + * short_video_header; shortVideoHeader==1 selects linear intra DC + * mode, and shortVideoHeader==0 selects non linear intra DC mode. + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after the block is decoded, so + * that it points to the current byte in the bit stream buffer + * pBitOffset - *pBitOffset is updated so that it points to the current bit + * position in the byte pointed by *ppBitStream + * pDst - pointer to the block in the destination plane; must be aligned on + * an 8-byte boundary. + * pCoefBufRow - pointer to the updated coefficient row buffer. + * pCoefBufCol - pointer to the updated coefficient column buffer Note: + * The coefficient buffers must be updated in accordance with the + * update procedure defined in section 6.2.2. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments, if: + * - At least one of the following pointers is NULL: + * ppBitStream, *ppBitStream, pBitOffset, pCoefBufRow, pCoefBufCol, + * pQPBuf, pDst. + * - *pBitOffset exceeds [0,7] + * - curQP exceeds (1, 31) + * - blockIndex exceeds [0,5] + * - step is not the multiple of 8 + * - a pointer alignment requirement was violated. + * OMX_Sts_Err - status error. Refer to OMX_Sts_Err of DecodeVLCZigzag_Intra. + * + */ + +OMXResult omxVCM4P2_DecodeBlockCoef_Intra( + const OMX_U8 ** ppBitStream, + OMX_INT *pBitOffset, + OMX_U8 *pDst, + OMX_INT step, + OMX_S16 *pCoefBufRow, + OMX_S16 *pCoefBufCol, + OMX_U8 curQP, + const OMX_U8 *pQPBuf, + OMX_INT blockIndex, + OMX_INT intraDCVLC, + OMX_INT ACPredFlag, + OMX_INT shortVideoHeader + ) +{ + OMX_S16 tempBuf1[79], tempBuf2[79]; + OMX_S16 *pTempBuf1, *pTempBuf2; + OMX_INT predDir, predACDir, i, j, count; + OMX_INT predQP; + OMXVCM4P2VideoComponent videoComp; + OMXResult errorCode; + + /* Argument error checks */ + armRetArgErrIf(ppBitStream == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(*ppBitStream == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pBitOffset == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pCoefBufRow == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pCoefBufCol == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pQPBuf == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(!armIs8ByteAligned(pDst), OMX_Sts_BadArgErr); + armRetArgErrIf(((curQP <= 0) || (curQP >= 32)), OMX_Sts_BadArgErr); + armRetArgErrIf((*pBitOffset < 0) || (*pBitOffset >7), OMX_Sts_BadArgErr); + armRetArgErrIf((blockIndex < 0) || (blockIndex > 5), OMX_Sts_BadArgErr); + armRetArgErrIf((step % 8) != 0, OMX_Sts_BadArgErr); + + + /* Aligning the local buffers */ + pTempBuf1 = armAlignTo16Bytes(tempBuf1); + pTempBuf2 = armAlignTo16Bytes(tempBuf2); + + /* Setting the AC prediction direction and prediction direction */ + armVCM4P2_SetPredDir( + blockIndex, + pCoefBufRow, + pCoefBufCol, + &predDir, + &predQP, + pQPBuf); + + predACDir = predDir; + + armRetArgErrIf(((predQP <= 0) || (predQP >= 32)), OMX_Sts_BadArgErr); + + if (ACPredFlag == 0) + { + predACDir = OMX_VC_NONE; + } + + /* Setting the videoComp */ + if (blockIndex <= 3) + { + videoComp = OMX_VC_LUMINANCE; + } + else + { + videoComp = OMX_VC_CHROMINANCE; + } + + + /* VLD and zigzag */ + if (intraDCVLC == 1) + { + errorCode = omxVCM4P2_DecodeVLCZigzag_IntraDCVLC( + ppBitStream, + pBitOffset, + pTempBuf1, + predACDir, + shortVideoHeader, + videoComp); + armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode); + } + else + { + errorCode = omxVCM4P2_DecodeVLCZigzag_IntraACVLC( + ppBitStream, + pBitOffset, + pTempBuf1, + predACDir, + shortVideoHeader); + armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode); + } + + /* AC DC prediction */ + errorCode = omxVCM4P2_PredictReconCoefIntra( + pTempBuf1, + pCoefBufRow, + pCoefBufCol, + curQP, + predQP, + predDir, + ACPredFlag, + videoComp); + armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode); + + /* Dequantization */ + errorCode = omxVCM4P2_QuantInvIntra_I( + pTempBuf1, + curQP, + videoComp, + shortVideoHeader); + armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode); + + /* Inverse transform */ + errorCode = omxVCM4P2_IDCT8x8blk (pTempBuf1, pTempBuf2); + armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode); + + /* Placing the linear array into the destination plane and clipping + it to 0 to 255 */ + for (j = 0, count = 0; j < 8; j++) + { + for(i = 0; i < 8; i++, count++) + { + pDst[i] = armClip (0, 255, pTempBuf2[count]); + } + pDst += step; + } + + return OMX_Sts_NoErr; +} + +/* End of file */ + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP.c new file mode 100644 index 0000000000000000000000000000000000000000..7e159b7a7e02f2c079847033eccc1abeb3a63675 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP.c @@ -0,0 +1,243 @@ +/** + * + * File Name: omxVCM4P2_DecodePadMV_PVOP.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Contains module for decoding MV and padding the same + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM_Bitstream.h" +#include "armCOMM.h" +#include "armVCM4P2_Huff_Tables_VLC.h" + + + +/** + * Function: omxVCM4P2_DecodePadMV_PVOP (6.2.5.1.1) + * + * Description: + * Decodes and pads the four motion vectors associated with a non-intra P-VOP + * macroblock. For macroblocks of type OMX_VC_INTER4V, the output MV is + * padded as specified in [ISO14496-2], subclause 7.6.1.6. Otherwise, for + * macroblocks of types other than OMX_VC_INTER4V, the decoded MV is copied to + * all four output MV buffer entries. + * + * Input Arguments: + * + * ppBitStream - pointer to the pointer to the current byte in the bit + * stream buffer + * pBitOffset - pointer to the bit position in the byte pointed to by + * *ppBitStream. *pBitOffset is valid within [0-7]. + * pSrcMVLeftMB, pSrcMVUpperMB, and pSrcMVUpperRightMB - pointers to the + * motion vector buffers of the macroblocks specially at the left, + * upper, and upper-right side of the current macroblock, + * respectively; a value of NULL indicates unavailability. Note: + * Any neighborhood macroblock outside the current VOP or video + * packet or outside the current GOB (when short_video_header is + * 1 ) for which gob_header_empty is 0 is treated as + * transparent, according to [ISO14496-2], subclause 7.6.5. + * fcodeForward - a code equal to vop_fcode_forward in MPEG-4 bit stream + * syntax + * MBType - the type of the current macroblock. If MBType is not equal to + * OMX_VC_INTER4V, the destination motion vector buffer is still + * filled with the same decoded vector. + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after the block is decoded, so + * that it points to the current byte in the bit stream buffer + * pBitOffset - *pBitOffset is updated so that it points to the current bit + * position in the byte pointed by *ppBitStream + * pDstMVCurMB - pointer to the motion vector buffer for the current + * macroblock; contains four decoded motion vectors + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments: + * - At least one of the following pointers is NULL: + * ppBitStream, *ppBitStream, pBitOffset, pDstMVCurMB + * - *pBitOffset exceeds [0,7] + * - fcodeForward exceeds (0,7] + * - MBType less than zero + * - motion vector buffer is not 4-byte aligned. + * OMX_Sts_Err - status error + * + */ + +OMXResult omxVCM4P2_DecodePadMV_PVOP( + const OMX_U8 ** ppBitStream, + OMX_INT * pBitOffset, + OMXVCMotionVector * pSrcMVLeftMB, + OMXVCMotionVector *pSrcMVUpperMB, + OMXVCMotionVector * pSrcMVUpperRightMB, + OMXVCMotionVector * pDstMVCurMB, + OMX_INT fcodeForward, + OMXVCM4P2MacroblockType MBType + ) +{ + OMXVCMotionVector diffMV; + OMXVCMotionVector dstMVPredME[12]; + OMX_INT iBlk, i, count = 1; + OMX_S32 mvHorResidual = 1, mvVerResidual = 1, mvHorData, mvVerData; + OMX_S8 scaleFactor, index; + OMX_S16 high, low, range; + + + /* Argument error checks */ + armRetArgErrIf(ppBitStream == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(*ppBitStream == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pBitOffset == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pDstMVCurMB == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(((*pBitOffset < 0) || (*pBitOffset > 7)), OMX_Sts_BadArgErr); + armRetArgErrIf(((fcodeForward < 1) || (fcodeForward > 7)), \ + OMX_Sts_BadArgErr); + armRetArgErrIf(!armIs4ByteAligned(pDstMVCurMB), OMX_Sts_BadArgErr); + + if ((MBType == OMX_VC_INTRA) || + (MBType == OMX_VC_INTRA_Q) + ) + { + /* All MV's are zero */ + for (i = 0; i < 4; i++) + { + pDstMVCurMB[i].dx = 0; + pDstMVCurMB[i].dy = 0; + } + + return OMX_Sts_NoErr; + } + + if ((MBType == OMX_VC_INTER4V) || (MBType == OMX_VC_INTER4V_Q)) + { + count = 4; + } + else if ((MBType == OMX_VC_INTER) || (MBType == OMX_VC_INTER_Q)) + { + count = 1; + } + + /* Calculating the scale factor */ + scaleFactor = 1 << (fcodeForward -1); + high = ( 32 * scaleFactor) - 1; + low = ( (-32) * scaleFactor); + range = ( 64 * scaleFactor); + + /* Huffman decoding and MV reconstruction */ + for (iBlk = 0; iBlk < count; iBlk++) + { + + /* Huffman decoding to get Horizontal data and residual */ + index = armUnPackVLC32(ppBitStream, pBitOffset, + armVCM4P2_aVlcMVD); + armRetDataErrIf(index == -1, OMX_Sts_Err); + + mvHorData = index - 32; + + if ((fcodeForward > 1) && (mvHorData != 0)) + { + mvHorResidual = (OMX_S32) armGetBits(ppBitStream, + pBitOffset, (fcodeForward -1)); + } + + /* Huffman decoding to get Vertical data and residual */ + index = armUnPackVLC32(ppBitStream, pBitOffset, armVCM4P2_aVlcMVD); + armRetDataErrIf(index == -1, OMX_Sts_Err); + + mvVerData = index - 32; + + if ((fcodeForward > 1) && (mvVerData != 0)) + { + mvVerResidual = (OMX_S32) armGetBits(ppBitStream, + pBitOffset, (fcodeForward -1)); + } + + /* Calculating the differtial MV */ + if ( (scaleFactor == 1) || (mvHorData == 0) ) + { + diffMV.dx = mvHorData; + } + else + { + diffMV.dx = ((armAbs(mvHorData) - 1) * fcodeForward) + + mvHorResidual + 1; + if (mvHorData < 0) + { + diffMV.dx = -diffMV.dx; + } + } + + if ( (scaleFactor == 1) || (mvVerData == 0) ) + { + diffMV.dy = mvVerData; + } + else + { + diffMV.dy = ((armAbs(mvVerData) - 1) * fcodeForward) + + mvVerResidual + 1; + if (mvVerData < 0) + { + diffMV.dy = -diffMV.dy; + } + } + + /* Find the predicted vector */ + omxVCM4P2_FindMVpred ( + pDstMVCurMB, + pSrcMVLeftMB, + pSrcMVUpperMB, + pSrcMVUpperRightMB, + &pDstMVCurMB[iBlk], + dstMVPredME, + iBlk); + + /* Adding the difference to the predicted MV to reconstruct MV */ + pDstMVCurMB[iBlk].dx += diffMV.dx; + pDstMVCurMB[iBlk].dy += diffMV.dy; + + /* Checking the range and keeping it within the limits */ + if ( pDstMVCurMB[iBlk].dx < low ) + { + pDstMVCurMB[iBlk].dx += range; + } + if (pDstMVCurMB[iBlk].dx > high) + { + pDstMVCurMB[iBlk].dx -= range; + } + + if ( pDstMVCurMB[iBlk].dy < low ) + { + pDstMVCurMB[iBlk].dy += range; + } + if (pDstMVCurMB[iBlk].dy > high) + { + pDstMVCurMB[iBlk].dy -= range; + } + } + + if ((MBType == OMX_VC_INTER) || (MBType == OMX_VC_INTER_Q)) + { + pDstMVCurMB[1] = pDstMVCurMB[0]; + pDstMVCurMB[2] = pDstMVCurMB[0]; + pDstMVCurMB[3] = pDstMVCurMB[0]; + } + + return OMX_Sts_NoErr; +} + + +/* End of file */ + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter.c new file mode 100644 index 0000000000000000000000000000000000000000..88a8d0452383b1aaf18201f2008fe151ae72fdab --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter.c @@ -0,0 +1,120 @@ +/** + * + * File Name: omxVCM4P2_DecodeVLCZigzag_Inter.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Contains modules for zigzag scanning and VLC decoding + * for inter block. + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armVC.h" +#include "armCOMM_Bitstream.h" +#include "armCOMM.h" +#include "armVCM4P2_Huff_Tables_VLC.h" +#include "armVCM4P2_ZigZag_Tables.h" + + + +/** + * Function: omxVCM4P2_DecodeVLCZigzag_Inter (6.2.5.2.3) + * + * Description: + * Performs VLC decoding and inverse zigzag scan for one inter-coded block. + * + * Input Arguments: + * + * ppBitStream - double pointer to the current byte in the stream buffer + * pBitOffset - pointer to the next available bit in the current stream + * byte referenced by *ppBitStream. The parameter *pBitOffset is + * valid within the range [0-7]. + * shortVideoHeader - binary flag indicating presence of + * short_video_header; escape modes 0-3 are used if + * shortVideoHeader==0, and escape mode 4 is used when + * shortVideoHeader==1. + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after the block is decoded such + * that it points to the current byte in the stream buffer + * pBitOffset - *pBitOffset is updated after decoding such that it points + * to the next available bit in the stream byte referenced by + * *ppBitStream + * pDst - pointer to the coefficient buffer of current block; must be + * 4-byte aligned. + * + * Return Value: + * + * OMX_Sts_BadArgErr - bad arguments: + * - At least one of the following pointers is NULL: + * ppBitStream, *ppBitStream, pBitOffset, pDst + * - pDst is not 4-byte aligned + * - *pBitOffset exceeds [0,7] + * OMX_Sts_Err - status error, if: + * - At least one mark bit is equal to zero + * - Encountered an illegal stream code that cannot be found in the VLC table + * - Encountered an illegal code in the VLC FLC table + * - The number of coefficients is greater than 64 + * + */ + +OMXResult omxVCM4P2_DecodeVLCZigzag_Inter( + const OMX_U8 ** ppBitStream, + OMX_INT * pBitOffset, + OMX_S16 * pDst, + OMX_INT shortVideoHeader +) +{ + OMX_U8 last,start = 0; + const OMX_U8 *pZigzagTable = armVCM4P2_aClassicalZigzagScan; + OMXResult errorCode; + + /* Argument error checks */ + armRetArgErrIf(ppBitStream == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(*ppBitStream == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pBitOffset == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(!armIs4ByteAligned(pDst), OMX_Sts_BadArgErr); + + errorCode = armVCM4P2_GetVLCBits ( + ppBitStream, + pBitOffset, + pDst, + shortVideoHeader, + start, + &last, + 11, + 42, + 2, + 5, + armVCM4P2_InterL0RunIdx, + armVCM4P2_InterVlcL0, + armVCM4P2_InterL1RunIdx, + armVCM4P2_InterVlcL1, + armVCM4P2_InterL0LMAX, + armVCM4P2_InterL1LMAX, + armVCM4P2_InterL0RMAX, + armVCM4P2_InterL1RMAX, + pZigzagTable ); + armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode); + + if (last == 0) + { + return OMX_Sts_Err; + } + return OMX_Sts_NoErr; +} + +/* End of file */ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC.c new file mode 100644 index 0000000000000000000000000000000000000000..96593d1d3e54bc372c0aabce4761bb5fea1061b6 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC.c @@ -0,0 +1,103 @@ +/** + * + * File Name: omxVCM4P2_DecodeVLCZigzag_IntraACVLC.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Contains modules for zigzag scanning and VLC decoding + * for intra block. + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armVC.h" +#include "armCOMM.h" + + + +/** + * Function: omxVCM4P2_DecodeVLCZigzag_IntraACVLC (6.2.5.2.2) + * + * Description: + * Performs VLC decoding and inverse zigzag scan of AC and DC coefficients + * for one intra block. Two versions of the function (DCVLC and ACVLC) are + * provided in order to support the two different methods of processing DC + * coefficients, as described in [ISO14496-2], subclause 7.4.1.4, Intra DC + * Coefficient Decoding for the Case of Switched VLC Encoding. + * + * Input Arguments: + * + * ppBitStream - pointer to the pointer to the current byte in the + * bitstream buffer + * pBitOffset - pointer to the bit position in the current byte referenced + * by *ppBitStream. The parameter *pBitOffset is valid in the + * range [0-7]. Bit Position in one byte: |Most Least| *pBitOffset + * |0 1 2 3 4 5 6 7| + * predDir - AC prediction direction; used to select the zigzag scan + * pattern; takes one of the following values: OMX_VC_NONE - AC + * prediction not used; performs classical zigzag scan. + * OMX_VC_HORIZONTAL - Horizontal prediction; performs + * alternate-vertical zigzag scan; OMX_VC_VERTICAL - Vertical + * prediction; performs alternate-horizontal zigzag scan. + * shortVideoHeader - binary flag indicating presence of + * short_video_header; escape modes 0-3 are used if + * shortVideoHeader==0, and escape mode 4 is used when + * shortVideoHeader==1. + * videoComp - video component type (luminance or chrominance) of the + * current block + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after the block is decoded such + * that it points to the current byte in the bit stream buffer + * pBitOffset - *pBitOffset is updated such that it points to the current + * bit position in the byte pointed by *ppBitStream + * pDst - pointer to the coefficient buffer of current block; must be + * 4-byte aligned. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments At least one of the following + * pointers is NULL: ppBitStream, *ppBitStream, pBitOffset, pDst, + * or At least one of the following conditions is true: + * *pBitOffset exceeds [0,7], preDir exceeds [0,2], or pDst is + * not 4-byte aligned + * OMX_Sts_Err In DecodeVLCZigzag_IntraDCVLC, dc_size > 12 At least one of + * mark bits equals zero Illegal stream encountered; code cannot + * be located in VLC table Forbidden code encountered in the VLC + * FLC table The number of coefficients is greater than 64 + * + */ + + +OMXResult omxVCM4P2_DecodeVLCZigzag_IntraACVLC( + const OMX_U8 ** ppBitStream, + OMX_INT * pBitOffset, + OMX_S16 * pDst, + OMX_U8 predDir, + OMX_INT shortVideoHeader +) +{ + OMX_U8 start = 0; + + return armVCM4P2_DecodeVLCZigzag_Intra( + ppBitStream, + pBitOffset, + pDst, + predDir, + shortVideoHeader, + start); +} + +/* End of file */ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC.c new file mode 100644 index 0000000000000000000000000000000000000000..95e00d79e119179099920b6ec554e7482c07f579 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC.c @@ -0,0 +1,170 @@ +/** + * + * File Name: omxVCM4P2_DecodeVLCZigzag_IntraDCVLC.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Contains modules for zigzag scanning and VLC decoding + * for intra block. + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armVC.h" +#include "armCOMM_Bitstream.h" +#include "armCOMM.h" +#include "armVCM4P2_Huff_Tables_VLC.h" +#include "armVCM4P2_ZigZag_Tables.h" + + + + +/** + * Function: omxVCM4P2_DecodeVLCZigzag_IntraDCVLC (6.2.5.2.2) + * + * Description: + * Performs VLC decoding and inverse zigzag scan of AC and DC coefficients + * for one intra block. Two versions of the function (DCVLC and ACVLC) are + * provided in order to support the two different methods of processing DC + * coefficients, as described in [ISO14496-2], subclause 7.4.1.4, Intra DC + * Coefficient Decoding for the Case of Switched VLC Encoding. + * + * Input Arguments: + * + * ppBitStream - pointer to the pointer to the current byte in the + * bitstream buffer + * pBitOffset - pointer to the bit position in the current byte referenced + * by *ppBitStream. The parameter *pBitOffset is valid in the + * range [0-7]. + * Bit Position in one byte: |Most Least| + * *pBitOffset |0 1 2 3 4 5 6 7| + * predDir - AC prediction direction; used to select the zigzag scan + * pattern; takes one of the following values: + * - OMX_VC_NONE - AC prediction not used; + * performs classical zigzag scan. + * - OMX_VC_HORIZONTAL - Horizontal prediction; + * performs alternate-vertical zigzag scan; + * - OMX_VC_VERTICAL - Vertical prediction; + * performs alternate-horizontal zigzag scan. + * shortVideoHeader - binary flag indicating presence of + * short_video_header; escape modes 0-3 are used if + * shortVideoHeader==0, and escape mode 4 is used when + * shortVideoHeader==1. + * videoComp - video component type (luminance or chrominance) of the + * current block + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after the block is decoded such + * that it points to the current byte in the bit stream buffer + * pBitOffset - *pBitOffset is updated such that it points to the current + * bit position in the byte pointed by *ppBitStream + * pDst - pointer to the coefficient buffer of current block; must be + * 4-byte aligned. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments, if: + * - At least one of the following pointers is NULL: + * ppBitStream, *ppBitStream, pBitOffset, pDst + * - *pBitOffset exceeds [0,7] + * - preDir exceeds [0,2] + * - pDst is not 4-byte aligned + * OMX_Sts_Err - if: + * - In DecodeVLCZigzag_IntraDCVLC, dc_size > 12 + * - At least one of mark bits equals zero + * - Illegal stream encountered; code cannot be located in VLC table + * - Forbidden code encountered in the VLC FLC table. + * - The number of coefficients is greater than 64 + * + */ + +OMXResult omxVCM4P2_DecodeVLCZigzag_IntraDCVLC( + const OMX_U8 ** ppBitStream, + OMX_INT * pBitOffset, + OMX_S16 * pDst, + OMX_U8 predDir, + OMX_INT shortVideoHeader, + OMXVCM4P2VideoComponent videoComp +) +{ + /* Dummy initilaization to remove compilation error */ + OMX_S8 DCValueSize = 0; + OMX_U16 powOfSize, fetchDCbits; + OMX_U8 start = 1; + + /* Argument error checks */ + armRetArgErrIf(ppBitStream == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(*ppBitStream == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pBitOffset == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(!armIs4ByteAligned(pDst), OMX_Sts_BadArgErr); + armRetArgErrIf((*pBitOffset < 0) || (*pBitOffset > 7), OMX_Sts_BadArgErr); + armRetArgErrIf((predDir > 2), OMX_Sts_BadArgErr); + + /* Insert the code into the bitstream */ + if (videoComp == OMX_VC_LUMINANCE) + { + DCValueSize = armUnPackVLC32(ppBitStream, + pBitOffset, armVCM4P2_aIntraDCLumaIndex); + } + else if (videoComp == OMX_VC_CHROMINANCE) + { + DCValueSize = armUnPackVLC32(ppBitStream, + pBitOffset, armVCM4P2_aIntraDCChromaIndex); + } + armRetDataErrIf(DCValueSize == -1, OMX_Sts_Err); + armRetDataErrIf(DCValueSize > 12, OMX_Sts_Err); + + + if (DCValueSize == 0) + { + pDst[0] = 0; + } + else + { + fetchDCbits = (OMX_U16) armGetBits(ppBitStream, pBitOffset, \ + DCValueSize); + + if ( (fetchDCbits >> (DCValueSize - 1)) == 0) + { + /* calulate pow */ + powOfSize = (1 << DCValueSize); + + pDst[0] = (OMX_S16) (fetchDCbits ^ (powOfSize - 1)); + pDst[0] = -pDst[0]; + } + else + { + pDst[0] = fetchDCbits; + } + + if (DCValueSize > 8) + { + /* reading and checking the marker bit*/ + armRetDataErrIf (armGetBits(ppBitStream, pBitOffset, 1) == 0, \ + OMX_Sts_Err); + } + } + + return armVCM4P2_DecodeVLCZigzag_Intra( + ppBitStream, + pBitOffset, + pDst, + predDir, + shortVideoHeader, + start); +} + +/* End of file */ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_EncodeMV.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_EncodeMV.c new file mode 100644 index 0000000000000000000000000000000000000000..def2b6d35172bd5ff93b7987a7488a25fa3bf991 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_EncodeMV.c @@ -0,0 +1,212 @@ +/** + * + * File Name: omxVCM4P2_EncodeMV.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Contains module for predicting MV of MB + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armCOMM_Bitstream.h" +#include "armVCM4P2_Huff_Tables_VLC.h" + + + +/** + * Function: omxVCM4P2_EncodeMV (6.2.4.5.4) + * + * Description: + * Predicts a motion vector for the current macroblock, encodes the + * difference, and writes the output to the stream buffer. The input MVs + * pMVCurMB, pSrcMVLeftMB, pSrcMVUpperMB, and pSrcMVUpperRightMB should lie + * within the ranges associated with the input parameter fcodeForward, as + * described in [ISO14496-2], subclause 7.6.3. This function provides a + * superset of the functionality associated with the function + * omxVCM4P2_FindMVpred. + * + * Input Arguments: + * + * ppBitStream - double pointer to the current byte in the bitstream buffer + * pBitOffset - index of the first free (next available) bit in the stream + * buffer referenced by *ppBitStream, valid in the range 0 to 7. + * pMVCurMB - pointer to the current macroblock motion vector; a value of + * NULL indicates unavailability. + * pSrcMVLeftMB - pointer to the source left macroblock motion vector; a + * value of NULLindicates unavailability. + * pSrcMVUpperMB - pointer to source upper macroblock motion vector; a + * value of NULL indicates unavailability. + * pSrcMVUpperRightMB - pointer to source upper right MB motion vector; a + * value of NULL indicates unavailability. + * fcodeForward - an integer with values from 1 to 7; used in encoding + * motion vectors related to search range, as described in + * [ISO14496-2], subclause 7.6.3. + * MBType - macro block type, valid in the range 0 to 5 + * + * Output Arguments: + * + * ppBitStream - updated pointer to the current byte in the bit stream + * buffer + * pBitOffset - updated index of the next available bit position in stream + * buffer referenced by *ppBitStream + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments + * - At least one of the following pointers is NULL: ppBitStream, + * *ppBitStream, pBitOffset, pMVCurMB + * - *pBitOffset < 0, or *pBitOffset >7. + * - fcodeForward <= 0, or fcodeForward > 7, or MBType < 0. + * + */ + +OMXResult omxVCM4P2_EncodeMV( + OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + const OMXVCMotionVector * pMVCurMB, + const OMXVCMotionVector * pSrcMVLeftMB, + const OMXVCMotionVector * pSrcMVUpperMB, + const OMXVCMotionVector * pSrcMVUpperRightMB, + OMX_INT fcodeForward, + OMXVCM4P2MacroblockType MBType +) +{ + OMXVCMotionVector dstMVPred, diffMV; + OMXVCMotionVector dstMVPredME[12]; + /* Initialized to remove compilation warning */ + OMX_INT iBlk, i, count = 1; + OMX_S32 mvHorResidual, mvVerResidual, mvHorData, mvVerData; + OMX_U8 scaleFactor, index; + + /* Argument error checks */ + armRetArgErrIf(ppBitStream == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(*ppBitStream == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pBitOffset == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pMVCurMB == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(((*pBitOffset < 0) || (*pBitOffset > 7)), OMX_Sts_BadArgErr); + armRetArgErrIf(((fcodeForward < 1) || (fcodeForward > 7)), \ + OMX_Sts_BadArgErr); + + if ((MBType == OMX_VC_INTRA) || + (MBType == OMX_VC_INTRA_Q) + ) + { + /* No candidate vectors hence make them zero */ + for (i = 0; i < 12; i++) + { + dstMVPredME[i].dx = 0; + dstMVPredME[i].dy = 0; + } + + return OMX_Sts_NoErr; + } + + if ((MBType == OMX_VC_INTER4V) || (MBType == OMX_VC_INTER4V_Q)) + { + count = 4; + } + else if ((MBType == OMX_VC_INTER) || (MBType == OMX_VC_INTER_Q)) + { + count = 1; + } + + /* Calculating the scale factor */ + scaleFactor = 1 << (fcodeForward -1); + + for (iBlk = 0; iBlk < count; iBlk++) + { + + /* Find the predicted vector */ + omxVCM4P2_FindMVpred ( + pMVCurMB, + pSrcMVLeftMB, + pSrcMVUpperMB, + pSrcMVUpperRightMB, + &dstMVPred, + dstMVPredME, + iBlk ); + + /* Calculating the differential motion vector (diffMV) */ + diffMV.dx = pMVCurMB[iBlk].dx - dstMVPred.dx; + diffMV.dy = pMVCurMB[iBlk].dy - dstMVPred.dy; + + /* Calculating the mv_data and mv_residual for Horizantal MV */ + if (diffMV.dx == 0) + { + mvHorResidual = 0; + mvHorData = 0; + } + else + { + mvHorResidual = ( armAbs(diffMV.dx) - 1) % scaleFactor; + mvHorData = (armAbs(diffMV.dx) - mvHorResidual + (scaleFactor - 1)) + / scaleFactor; + if (diffMV.dx < 0) + { + mvHorData = -mvHorData; + } + } + + /* Calculating the mv_data and mv_residual for Vertical MV */ + if (diffMV.dy == 0) + { + mvVerResidual = 0; + mvVerData = 0; + } + else + { + mvVerResidual = ( armAbs(diffMV.dy) - 1) % scaleFactor; + mvVerData = (armAbs(diffMV.dy) - mvVerResidual + (scaleFactor - 1)) + / scaleFactor; + if (diffMV.dy < 0) + { + mvVerData = -mvVerData; + } + } + + /* Huffman encoding */ + + /* The index is actually calculate as + index = ((float) (mvHorData/2) + 16) * 2, + meaning the MV data is halfed and then normalized + to begin with zero and then doubled to take care of indexing + the fractional part included */ + index = mvHorData + 32; + armPackVLC32 (ppBitStream, pBitOffset, armVCM4P2_aVlcMVD[index]); + if ((fcodeForward > 1) && (diffMV.dx != 0)) + { + armPackBits (ppBitStream, pBitOffset, mvHorResidual, (fcodeForward -1)); + } + + /* The index is actually calculate as + index = ((float) (mvVerData/2) + 16) * 2, + meaning the MV data is halfed and then normalized + to begin with zero and then doubled to take care of indexing + the fractional part included */ + index = mvVerData + 32; + armPackVLC32 (ppBitStream, pBitOffset, armVCM4P2_aVlcMVD[index]); + if ((fcodeForward > 1) && (diffMV.dy != 0)) + { + armPackBits (ppBitStream, pBitOffset, mvVerResidual, (fcodeForward -1)); + } + } + + return OMX_Sts_NoErr; +} + + +/* End of file */ + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_EncodeVLCZigzag_Inter.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_EncodeVLCZigzag_Inter.c new file mode 100644 index 0000000000000000000000000000000000000000..b6c73ea4abe163c9279361faf0f700bd4421a836 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_EncodeVLCZigzag_Inter.c @@ -0,0 +1,112 @@ +/** + * + * File Name: omxVCM4P2_EncodeVLCZigzag_Inter.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Contains modules for zigzag scanning and VLC encoding + * for inter block. + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armVC.h" +#include "armCOMM_Bitstream.h" +#include "armCOMM.h" +#include "armVCM4P2_Huff_Tables_VLC.h" +#include "armVCM4P2_ZigZag_Tables.h" + + + +/** + * Function: omxVCM4P2_EncodeVLCZigzag_Inter (6.2.4.5.3) + * + * Description: + * Performs classical zigzag scanning and VLC encoding for one inter block. + * + * Input Arguments: + * + * ppBitStream - pointer to the pointer to the current byte in the bit + * stream + * pBitOffset - pointer to the bit position in the byte pointed by + * *ppBitStream. Valid within 0 to 7 + * pQDctBlkCoef - pointer to the quantized DCT coefficient + * pattern - block pattern which is used to decide whether this block is + * encoded + * shortVideoHeader - binary flag indicating presence of + * short_video_header; escape modes 0-3 are used if + * shortVideoHeader==0, and escape mode 4 is used when + * shortVideoHeader==1. + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after the block is encoded so that + * it points to the current byte in the bit stream buffer. + * pBitOffset - *pBitOffset is updated so that it points to the current bit + * position in the byte pointed by *ppBitStream. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - Bad arguments + * - At least one of the pointers: is NULL: ppBitStream, *ppBitStream, + * pBitOffset, pQDctBlkCoef + * - *pBitOffset < 0, or *pBitOffset >7. + * + */ +OMXResult omxVCM4P2_EncodeVLCZigzag_Inter( + OMX_U8 **ppBitStream, + OMX_INT * pBitOffset, + const OMX_S16 *pQDctBlkCoef, + OMX_U8 pattern, + OMX_INT shortVideoHeader +) +{ + OMX_U8 start = 0; + const OMX_U8 *pZigzagTable = armVCM4P2_aClassicalZigzagScan; + + /* Argument error checks */ + armRetArgErrIf(ppBitStream == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(*ppBitStream == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pBitOffset == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pQDctBlkCoef == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf((*pBitOffset < 0) || (*pBitOffset >7), OMX_Sts_BadArgErr); + + if (pattern) + { + armVCM4P2_PutVLCBits ( + ppBitStream, + pBitOffset, + pQDctBlkCoef, + shortVideoHeader, + start, + 26, + 40, + 10, + 1, + armVCM4P2_InterL0RunIdx, + armVCM4P2_InterVlcL0, + armVCM4P2_InterL1RunIdx, + armVCM4P2_InterVlcL1, + armVCM4P2_InterL0LMAX, + armVCM4P2_InterL1LMAX, + armVCM4P2_InterL0RMAX, + armVCM4P2_InterL1RMAX, + pZigzagTable + ); + } /* Pattern check ends*/ + + return OMX_Sts_NoErr; + +} + +/* End of file */ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_EncodeVLCZigzag_IntraACVLC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_EncodeVLCZigzag_IntraACVLC.c new file mode 100644 index 0000000000000000000000000000000000000000..d047942cb5a1686f5702528cbd88da8a0ac100ec --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_EncodeVLCZigzag_IntraACVLC.c @@ -0,0 +1,97 @@ +/** + * + * File Name: omxVCM4P2_EncodeVLCZigzag_IntraACVLC.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Contains modules for zigzag scanning and VLC encoding + * for intra block. + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armVC.h" + + +/** + * Function: omxVCM4P2_EncodeVLCZigzag_IntraACVLC (6.2.4.5.2) + * + * Description: + * Performs zigzag scan and VLC encoding of AC and DC coefficients for one + * intra block. Two versions of the function (DCVLC and ACVLC) are provided + * in order to support the two different methods of processing DC + * coefficients, as described in [ISO14496-2], subclause 7.4.1.4, Intra DC + * Coefficient Decoding for the Case of Switched VLC Encoding. + * + * Input Arguments: + * + * ppBitStream - double pointer to the current byte in the bitstream + * pBitOffset - pointer to the bit position in the byte pointed by + * *ppBitStream. Valid within 0 to 7. + * pQDctBlkCoef - pointer to the quantized DCT coefficient + * predDir - AC prediction direction, which is used to decide the zigzag + * scan pattern; takes one of the following values: + * - OMX_VC_NONE - AC prediction not used. + * Performs classical zigzag scan. + * - OMX_VC_HORIZONTAL - Horizontal prediction. + * Performs alternate-vertical zigzag scan. + * - OMX_VC_VERTICAL - Vertical prediction. + * Performs alternate-horizontal zigzag scan. + * pattern - block pattern which is used to decide whether this block is + * encoded + * shortVideoHeader - binary flag indicating presence of + * short_video_header; escape modes 0-3 are used if + * shortVideoHeader==0, and escape mode 4 is used when + * shortVideoHeader==1. + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after the block is encoded, so + * that it points to the current byte in the bit stream buffer. + * pBitOffset - *pBitOffset is updated so that it points to the current bit + * position in the byte pointed by *ppBitStream. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - Bad arguments: + * - At least one of the following pointers is NULL: ppBitStream, + * *ppBitStream, pBitOffset, pQDctBlkCoef. + * - *pBitOffset < 0, or *pBitOffset >7. + * - PredDir is not one of: OMX_VC_NONE, OMX_VC_HORIZONTAL, or + * OMX_VC_VERTICAL. + * - VideoComp is not one component of enum OMXVCM4P2VideoComponent. + * + */ + +OMXResult omxVCM4P2_EncodeVLCZigzag_IntraACVLC( + OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + const OMX_S16 *pQDctBlkCoef, + OMX_U8 predDir, + OMX_U8 pattern, + OMX_INT shortVideoHeader +) +{ + OMX_U8 start = 0; + + return armVCM4P2_EncodeVLCZigzag_Intra( + ppBitStream, + pBitOffset, + pQDctBlkCoef, + predDir, + pattern, + shortVideoHeader, + start); +} + +/* End of file */ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_EncodeVLCZigzag_IntraDCVLC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_EncodeVLCZigzag_IntraDCVLC.c new file mode 100644 index 0000000000000000000000000000000000000000..c57acd2fdfc01654da2fe330b792b9e585d8262a --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_EncodeVLCZigzag_IntraDCVLC.c @@ -0,0 +1,160 @@ +/** + * + * File Name: omxVCM4P2_EncodeVLCZigzag_IntraDCVLC.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Contains modules for zigzag scanning and VLC encoding + * for intra block. + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armVC.h" +#include "armCOMM_Bitstream.h" +#include "armCOMM.h" +#include "armVCM4P2_Huff_Tables_VLC.h" +#include "armVCM4P2_ZigZag_Tables.h" + + + +/** + * Function: omxVCM4P2_EncodeVLCZigzag_IntraDCVLC (6.2.4.5.2) + * + * Description: + * Performs zigzag scan and VLC encoding of AC and DC coefficients for one + * intra block. Two versions of the function (DCVLC and ACVLC) are provided + * in order to support the two different methods of processing DC + * coefficients, as described in [ISO14496-2], subclause 7.4.1.4, "Intra DC + * Coefficient Decoding for the Case of Switched VLC Encoding". + * + * Input Arguments: + * + * ppBitStream - double pointer to the current byte in the bitstream + * pBitOffset - pointer to the bit position in the byte pointed by + * *ppBitStream. Valid within 0 to 7. + * pQDctBlkCoef - pointer to the quantized DCT coefficient + * predDir - AC prediction direction, which is used to decide the zigzag + * scan pattern; takes one of the following values: + * - OMX_VC_NONE - AC prediction not used. + * Performs classical zigzag scan. + * - OMX_VC_HORIZONTAL - Horizontal prediction. + * Performs alternate-vertical zigzag scan. + * - OMX_VC_VERTICAL - Vertical prediction. + * Performs alternate-horizontal zigzag scan. + * pattern - block pattern which is used to decide whether this block is + * encoded + * shortVideoHeader - binary flag indicating presence of + * short_video_header; escape modes 0-3 are used if + * shortVideoHeader==0, and escape mode 4 is used when + * shortVideoHeader==1. + * videoComp - video component type (luminance, chrominance) of the current + * block + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after the block is encoded, so + * that it points to the current byte in the bit stream buffer. + * pBitOffset - *pBitOffset is updated so that it points to the current bit + * position in the byte pointed by *ppBitStream. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - Bad arguments: + * - At least one of the following pointers is NULL: ppBitStream, + * *ppBitStream, pBitOffset, pQDctBlkCoef. + * - *pBitOffset < 0, or *pBitOffset >7. + * - PredDir is not one of: OMX_VC_NONE, OMX_VC_HORIZONTAL, or + * OMX_VC_VERTICAL. + * - VideoComp is not one component of enum OMXVCM4P2VideoComponent. + * + */ + +OMXResult omxVCM4P2_EncodeVLCZigzag_IntraDCVLC( + OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + const OMX_S16 *pQDctBlkCoef, + OMX_U8 predDir, + OMX_U8 pattern, + OMX_INT shortVideoHeader, + OMXVCM4P2VideoComponent videoComp +) +{ + OMX_S16 dcValue, powOfSize; + OMX_U8 DCValueSize, start = 1; + OMX_U16 absDCValue; + + /* Argument error checks */ + armRetArgErrIf(ppBitStream == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(*ppBitStream == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pBitOffset == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pQDctBlkCoef == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf((*pBitOffset < 0) || (*pBitOffset >7), OMX_Sts_BadArgErr); + armRetArgErrIf((videoComp != OMX_VC_LUMINANCE) && (videoComp != OMX_VC_CHROMINANCE), OMX_Sts_BadArgErr); + armRetArgErrIf((predDir != OMX_VC_NONE) && (predDir != OMX_VC_HORIZONTAL) && (predDir != OMX_VC_VERTICAL) , OMX_Sts_BadArgErr); + + if (pattern) + { + dcValue = pQDctBlkCoef[0]; + absDCValue = armAbs(dcValue); + + /* Find the size */ + DCValueSize = armLogSize (absDCValue); + absDCValue = armAbs(dcValue); + + /* Insert the code into the bitstream */ + if (videoComp == OMX_VC_LUMINANCE) + { + + armPackVLC32 (ppBitStream, pBitOffset, + armVCM4P2_aIntraDCLumaIndex[DCValueSize]); + } + else if (videoComp == OMX_VC_CHROMINANCE) + { + + armPackVLC32 (ppBitStream, pBitOffset, + armVCM4P2_aIntraDCChromaIndex[DCValueSize]); + } + + /* Additional code generation in case of negative + dc value the additional */ + if (DCValueSize > 0) + { + if (dcValue < 0) + { + /* calulate 2 pow */ + powOfSize = (1 << DCValueSize); + + absDCValue = absDCValue ^ (powOfSize - 1); + } + armPackBits(ppBitStream, pBitOffset, (OMX_U32)absDCValue, \ + DCValueSize); + + if (DCValueSize > 8) + { + armPackBits(ppBitStream, pBitOffset, 1, 1); + } + } + } + + return armVCM4P2_EncodeVLCZigzag_Intra( + ppBitStream, + pBitOffset, + pQDctBlkCoef, + predDir, + pattern, + shortVideoHeader, + start); +} + +/* End of file */ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_FindMVpred.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_FindMVpred.c new file mode 100644 index 0000000000000000000000000000000000000000..a0cff481c5882dc32f91b60c84fdfe24ae7733c7 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_FindMVpred.c @@ -0,0 +1,188 @@ +/** + * + * File Name: omxVCM4P2_FindMVpred.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Contains module for predicting MV of MB + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" + +/** + * Function: omxVCM4P2_FindMVpred (6.2.3.1.1) + * + * Description: + * Predicts a motion vector for the current block using the procedure + * specified in [ISO14496-2], subclause 7.6.5. The resulting predicted MV is + * returned in pDstMVPred. If the parameter pDstMVPredME if is not NULL then + * the set of three MV candidates used for prediction is also returned, + * otherwise pDstMVPredMEis NULL upon return. + * + * Input Arguments: + * + * pSrcMVCurMB - pointer to the MV buffer associated with the current Y + * macroblock; a value of NULL indicates unavailability. + * pSrcCandMV1 - pointer to the MV buffer containing the 4 MVs associated + * with the MB located to the left of the current MB; set to NULL + * if there is no MB to the left. + * pSrcCandMV2 - pointer to the MV buffer containing the 4 MVs associated + * with the MB located above the current MB; set to NULL if there + * is no MB located above the current MB. + * pSrcCandMV3 - pointer to the MV buffer containing the 4 MVs associated + * with the MB located to the right and above the current MB; set + * to NULL if there is no MB located to the above-right. + * iBlk - the index of block in the current macroblock + * pDstMVPredME - MV candidate return buffer; if set to NULL then + * prediction candidate MVs are not returned and pDstMVPredME will + * be NULL upon function return; if pDstMVPredME is non-NULL then it + * must point to a buffer containing sufficient space for three + * return MVs. + * + * Output Arguments: + * + * pDstMVPred - pointer to the predicted motion vector + * pDstMVPredME - if non-NULL upon input then pDstMVPredME points upon + * return to a buffer containing the three motion vector candidates + * used for prediction as specified in [ISO14496-2], subclause + * 7.6.5, otherwise if NULL upon input then pDstMVPredME is NULL + * upon output. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned under any of the following + * conditions: + * - the pointer pDstMVPred is NULL + * - the parameter iBlk does not fall into the range 0 <= iBlk<=3 + * + */ + +OMXResult omxVCM4P2_FindMVpred( + const OMXVCMotionVector* pSrcMVCurMB, + const OMXVCMotionVector* pSrcCandMV1, + const OMXVCMotionVector* pSrcCandMV2, + const OMXVCMotionVector* pSrcCandMV3, + OMXVCMotionVector* pDstMVPred, + OMXVCMotionVector* pDstMVPredME, + OMX_INT iBlk + ) +{ + OMXVCMotionVector CandMV; + const OMXVCMotionVector *pCandMV1; + const OMXVCMotionVector *pCandMV2; + const OMXVCMotionVector *pCandMV3; + + /* Argument error checks */ + armRetArgErrIf(iBlk!=0 && pSrcMVCurMB == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pDstMVPred == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf((iBlk < 0) || (iBlk > 3), OMX_Sts_BadArgErr); + + CandMV.dx = CandMV.dy = 0; + /* Based on the position of the block extract the motion vectors and + the tranperancy status */ + + + /* Set the default value for these to be used if pSrcCandMV[1|2|3] == NULL */ + pCandMV1 = pCandMV2 = pCandMV3 = &CandMV; + + + switch (iBlk) + { + case 0: + { + if(pSrcCandMV1 != NULL) + { + pCandMV1 = &pSrcCandMV1[1]; + } + if(pSrcCandMV2 != NULL) + { + pCandMV2 = &pSrcCandMV2[2]; + } + if(pSrcCandMV3 != NULL) + { + pCandMV3 = &pSrcCandMV3[2]; + } + if ((pSrcCandMV1 == NULL) && (pSrcCandMV2 == NULL)) + { + pCandMV1 = pCandMV2 = pCandMV3; + } + else if((pSrcCandMV1 == NULL) && (pSrcCandMV3 == NULL)) + { + pCandMV1 = pCandMV3 = pCandMV2; + } + else if((pSrcCandMV2 == NULL) && (pSrcCandMV3 == NULL)) + { + pCandMV2 = pCandMV3 = pCandMV1; + } + break; + } + case 1: + { + pCandMV1 = &pSrcMVCurMB[0]; + if(pSrcCandMV2 != NULL) + { + pCandMV2 = &pSrcCandMV2[3]; + } + if(pSrcCandMV3 != NULL) + { + pCandMV3 = &pSrcCandMV3[2]; + } + if((pSrcCandMV2 == NULL) && (pSrcCandMV3 == NULL)) + { + pCandMV2 = pCandMV3 = pCandMV1; + } + break; + } + case 2: + { + if(pSrcCandMV1 != NULL) + { + pCandMV1 = &pSrcCandMV1[3]; + } + pCandMV2 = &pSrcMVCurMB[0]; + pCandMV3 = &pSrcMVCurMB[1]; + break; + } + case 3: + { + pCandMV1 = &pSrcMVCurMB[2]; + pCandMV2 = &pSrcMVCurMB[0]; + pCandMV3 = &pSrcMVCurMB[1]; + break; + } + } + + /* Find the median of the 3 candidate MV's */ + pDstMVPred->dx = armMedianOf3 (pCandMV1->dx, pCandMV2->dx, pCandMV3->dx); + pDstMVPred->dy = armMedianOf3 (pCandMV1->dy, pCandMV2->dy, pCandMV3->dy); + + if (pDstMVPredME != NULL) + { + /* Store the candidate MV's into the pDstMVPredME, these can be used + in the fast algorithm if implemented */ + pDstMVPredME[0].dx = pCandMV1->dx; + pDstMVPredME[0].dy = pCandMV1->dy; + pDstMVPredME[1].dx = pCandMV2->dx; + pDstMVPredME[1].dy = pCandMV2->dy; + pDstMVPredME[2].dx = pCandMV3->dx; + pDstMVPredME[2].dy = pCandMV3->dy; + } + + return OMX_Sts_NoErr; +} + + +/* End of file */ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_IDCT8x8blk.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_IDCT8x8blk.c new file mode 100644 index 0000000000000000000000000000000000000000..1886d92f115f98b8fbd334ff6a97a11ea98d99ec --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_IDCT8x8blk.c @@ -0,0 +1,92 @@ +/** + * + * File Name: omxVCM4P2_IDCT8x8blk.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Contains modules for 8x8 block IDCT + * + */ + + +#include +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armVCM4P2_DCT_Table.h" + +/** + * Function: omxVCM4P2_IDCT8x8blk (6.2.3.2.1) + * + * Description: + * Computes a 2D inverse DCT for a single 8x8 block, as defined in + * [ISO14496-2]. + * + * Input Arguments: + * + * pSrc - pointer to the start of the linearly arranged IDCT input buffer; + * must be aligned on a 16-byte boundary. According to + * [ISO14496-2], the input coefficient values should lie within the + * range [-2048, 2047]. + * + * Output Arguments: + * + * pDst - pointer to the start of the linearly arranged IDCT output buffer; + * must be aligned on a 16-byte boundary. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments: + * - pSrc or pDst is NULL. + * - pSrc or pDst is not 16-byte aligned. + * + */ +OMXResult omxVCM4P2_IDCT8x8blk (const OMX_S16 *pSrc, OMX_S16 *pDst) +{ + OMX_INT x, y, u, v; + + /* Argument error checks */ + armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(!armIs16ByteAligned(pSrc), OMX_Sts_BadArgErr); + armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(!armIs16ByteAligned(pDst), OMX_Sts_BadArgErr); + + for (x = 0; x < 8; x++) + { + for (y = 0; y < 8; y++) + { + OMX_F64 sum = 0.0; + for (u = 0; u < 8; u++) + { + for (v = 0; v < 8; v++) + { + sum += pSrc[(u * 8) + v] * + armVCM4P2_preCalcDCTCos[x][u] * + armVCM4P2_preCalcDCTCos[y][v]; + } + } + pDst[(x * 8) + y] = (OMX_S16) floor(sum + 0.5); + + /* Saturate to [-256, 255] */ + pDst[(x * 8) + y] = armClip ( + -256, + 255, + pDst[(x * 8) + y]); + } + } + + return OMX_Sts_NoErr; +} + +/* End of file */ + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_MCReconBlock.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_MCReconBlock.c new file mode 100644 index 0000000000000000000000000000000000000000..7b3faeeb4973cd08ce0d1e6d2d07cf6591064454 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_MCReconBlock.c @@ -0,0 +1,357 @@ +/** + * + * File Name: omxVCM4P2_MCReconBlock.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * Description: + * MPEG4 motion compensation prediction for an 8x8 block using + * interpolation + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" + +/** + * Function: armVCM4P2_HalfPelVer + * + * Description: + * Performs half pel motion compensation for an 8x8 block using vertical + * interpolation described in ISO/IEC 14496-2, subclause 7.6.2. + * + * Remarks: + * + * Parameters: + * [in] pSrc pointer to the block in the reference plane. + * [in] srcStep distance between the start of consecutive lines + * in the reference plane, in bytes; must be a multiple + * of 8. + * [in] rndVal rounding control parameter: 0 - disabled; 1 - enabled. + * [out] pDst pointer to the linaer 8x8 destination buffer; + * + */ +static OMXVoid armVCM4P2_HalfPelVer( + const OMX_U8 *pSrc, + OMX_INT srcStep, + OMX_U8 *pDst, + OMX_INT rndVal) +{ + const OMX_U8 *pTempSrc1; + const OMX_U8 *pTempSrc2; + OMX_INT y, x; + + pTempSrc1 = pSrc; + pTempSrc2 = pSrc + srcStep; + srcStep -= 8; + for (y = 0; y < 8; y++) + { + for (x = 0; x < 8; x++) + { + *pDst++ = ((*pTempSrc1++ + *pTempSrc2++) + 1 - rndVal) >> 1; + } + pTempSrc1 += srcStep; + pTempSrc2 += srcStep; + } +} + +/** + * Function: armVCM4P2_HalfPelHor + * + * Description: + * Performs half pel motion compensation for an 8x8 block using horizontal + * interpolation described in ISO/IEC 14496-2, subclause 7.6.2. + * + * Remarks: + * + * Parameters: + * [in] pSrc pointer to the block in the reference plane. + * [in] srcStep distance between the start of consecutive lines + * in the reference plane, in bytes; must be a multiple + * of 8. + * [in] rndVal rounding control parameter: 0 - disabled; 1 - enabled. + * [out] pDst pointer to the linaer 8x8 destination buffer; + * + */ +static OMXVoid armVCM4P2_HalfPelHor( + const OMX_U8 *pSrc, + OMX_INT srcStep, + OMX_U8 *pDst, + OMX_INT rndVal) +{ + const OMX_U8 *pTempSrc1; + const OMX_U8 *pTempSrc2; + OMX_INT y, x; + + pTempSrc1 = pSrc; + pTempSrc2 = pTempSrc1 + 1; + + srcStep -= 8; + for (y=0; y<8; y++) + { + for (x=0; x<8; x++) + { + *pDst++ = ((*pTempSrc1++ + *pTempSrc2++) + 1 - rndVal) >> 1; + } + pTempSrc1 += srcStep; + pTempSrc2 += srcStep; + } +} + + +/** + * Function: armVCM4P2_HalfPelVerHor + * + * Description: + * Performs half pel motion compensation for an 8x8 block using both + * horizontal and vertical interpolation described in ISO/IEC 14496-2, + * subclause 7.6.2. + * + * Remarks: + * + * Parameters: + * [in] pSrc pointer to the block in the reference plane. + * [in] srcStep distance between the start of consecutive lines + * in the reference plane, in bytes; must be a multiple + * of 8. + * [in] rndVal rounding control parameter: 0 - disabled; 1 - enabled. + * [out] pDst pointer to the linaer 8x8 destination buffer; + * + */ +static OMXVoid armVCM4P2_HalfPelVerHor( + const OMX_U8 *pSrc, + OMX_INT srcStep, + OMX_U8 *pDst, + OMX_INT rndVal) +{ + const OMX_U8 *pTempSrc1; + const OMX_U8 *pTempSrc2; + const OMX_U8 *pTempSrc3; + const OMX_U8 *pTempSrc4; + OMX_INT y, x; + + pTempSrc1 = pSrc; + pTempSrc2 = pSrc + srcStep; + pTempSrc3 = pSrc + 1; + pTempSrc4 = pSrc + srcStep + 1; + + srcStep -= 8; + for (y=0; y<8; y++) + { + for (x=0; x<8; x++) + { + *pDst++ = ((*pTempSrc1++ + *pTempSrc2++ + *pTempSrc3++ + *pTempSrc4++) + + 2 - rndVal) >> 2; + } + pTempSrc1 += srcStep; + pTempSrc2 += srcStep; + pTempSrc3 += srcStep; + pTempSrc4 += srcStep; + } +} + +/** + * Function: armVCM4P2_MCReconBlock_NoRes + * + * Description: + * Do motion compensation and copy the result to the current block. + * + * Remarks: + * + * Parameters: + * [in] pSrc pointer to the block in the reference plane. + * [in] srcStep distance between the start of consecutive lines + * in the reference plane, in bytes; must be a multiple + * of 8. + * [in] dstStep distance between the start of consecutive lines in the + * destination plane, in bytes; must be a multiple of 8. + * [in] predictType bilinear interpolation type, as defined in section 6.2.1.2. + * [in] rndVal rounding control parameter: 0 - disabled; 1 - enabled. + * [out] pDst pointer to the destination buffer; must be 8-byte aligned. + * If prediction residuals are added then output intensities + * are clipped to the range [0,255]. + * + */ +static OMXVoid armVCM4P2_MCReconBlock_NoRes( + const OMX_U8 *pSrc, + OMX_INT srcStep, + OMX_U8 *pDst, + OMX_INT dstStep) +{ + OMX_U8 x,y,count,index; + + /* Copying the ref 8x8 blk to the curr blk */ + for (y = 0, count = 0, index = 0; y < 8; y++,index += (srcStep -8), count += (dstStep - 8)) + { + for (x = 0; x < 8; x++, count++,index++) + { + pDst[count] = pSrc[index]; + } + } +} + +/** + * Function: armVCM4P2_MCReconBlock_Res + * + * Description: + * Reconstructs INTER block by summing the motion compensation results + * and the results of the inverse transformation (prediction residuals). + * Output intensities are clipped to the range [0,255]. + * + * Remarks: + * + * Parameters: + * [in] pSrc pointer to the block in the reference plane. + * [in] pSrcResidue pointer to a buffer containing the 16-bit prediction + * residuals. If the pointer is NULL,then no prediction + * is done, only motion compensation, i.e., the block is + * moved with interpolation. + * [in] dstStep distance between the start of consecutive lines in the + * destination plane, in bytes; must be a multiple of 8. + * [out] pDst pointer to the destination buffer; must be 8-byte aligned. + * If prediction residuals are added then output intensities + * are clipped to the range [0,255]. + * + */ +static OMXVoid armVCM4P2_MCReconBlock_Res( + const OMX_U8 *pSrc, + const OMX_S16 *pSrcResidue, + OMX_U8 *pDst, + OMX_INT dstStep) +{ + + OMX_U8 x,y; + OMX_INT temp; + + for(y = 0; y < 8; y++) + { + for(x = 0; x < 8; x++) + { + temp = pSrc[x] + pSrcResidue[x]; + pDst[x] = armClip(0,255,temp); + } + pDst += dstStep; + pSrc += 8; + pSrcResidue += 8; + } +} + +/** + * Function: omxVCM4P2_MCReconBlock (6.2.5.5.1) + * + * Description: + * Performs motion compensation prediction for an 8x8 block using + * interpolation described in [ISO14496-2], subclause 7.6.2. + * + * Input Arguments: + * + * pSrc - pointer to the block in the reference plane. + * srcStep - distance between the start of consecutive lines in the + * reference plane, in bytes; must be a multiple of 8. + * dstStep - distance between the start of consecutive lines in the + * destination plane, in bytes; must be a multiple of 8. + * pSrcResidue - pointer to a buffer containing the 16-bit prediction + * residuals; must be 16-byte aligned. If the pointer is NULL, then + * no prediction is done, only motion compensation, i.e., the block + * is moved with interpolation. + * predictType - bilinear interpolation type, as defined in section + * 6.2.1.2. + * rndVal - rounding control parameter: 0 - disabled; 1 - enabled. + * + * Output Arguments: + * + * pDst - pointer to the destination buffer; must be 8-byte aligned. If + * prediction residuals are added then output intensities are + * clipped to the range [0,255]. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned under any of the following + * conditions: + * - pDst is not 8-byte aligned. + * - pSrcResidue is not 16-byte aligned. + * - one or more of the following pointers is NULL: pSrc or pDst. + * - either srcStep or dstStep is not a multiple of 8. + * - invalid type specified for the parameter predictType. + * - the parameter rndVal is not equal either to 0 or 1. + * + */ +OMXResult omxVCM4P2_MCReconBlock( + const OMX_U8 *pSrc, + OMX_INT srcStep, + const OMX_S16 *pSrcResidue, + OMX_U8 *pDst, + OMX_INT dstStep, + OMX_INT predictType, + OMX_INT rndVal) +{ + /* Definitions and Initializations*/ + OMX_U8 pTempDst[64]; + + /* Argument error checks */ + armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(!armIs8ByteAligned(pDst), OMX_Sts_BadArgErr); + armRetArgErrIf(!armIs16ByteAligned(pSrcResidue), OMX_Sts_BadArgErr); + armRetArgErrIf(((dstStep % 8) || (srcStep % 8)), OMX_Sts_BadArgErr); + armRetArgErrIf(((predictType != OMX_VC_INTEGER_PIXEL) && + (predictType != OMX_VC_HALF_PIXEL_X) && + (predictType != OMX_VC_HALF_PIXEL_Y) && + (predictType != OMX_VC_HALF_PIXEL_XY) + ),OMX_Sts_BadArgErr); + armRetArgErrIf(((rndVal != 0) && (rndVal != 1)),OMX_Sts_BadArgErr); + + switch(predictType) + { + case OMX_VC_INTEGER_PIXEL: + armVCM4P2_MCReconBlock_NoRes(pSrc, + srcStep, + &(pTempDst[0]), + 8); + break; + case OMX_VC_HALF_PIXEL_X: + armVCM4P2_HalfPelHor(pSrc, + srcStep, + &(pTempDst[0]), + rndVal); + break; + case OMX_VC_HALF_PIXEL_Y: + armVCM4P2_HalfPelVer(pSrc, + srcStep, + &(pTempDst[0]), + rndVal); + break; + case OMX_VC_HALF_PIXEL_XY: + armVCM4P2_HalfPelVerHor(pSrc, + srcStep, + &(pTempDst[0]), + rndVal); + break; + } + + if(pSrcResidue == NULL) + { + armVCM4P2_MCReconBlock_NoRes(&(pTempDst[0]), + 8, + pDst, + dstStep); + } + else + { + armVCM4P2_MCReconBlock_Res(&(pTempDst[0]), + pSrcResidue, + pDst, + dstStep); + } + + return OMX_Sts_NoErr; +} + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_MEGetBufSize.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_MEGetBufSize.c new file mode 100644 index 0000000000000000000000000000000000000000..a8e51da6d2785e58021b3a82293e1e3ddd681d54 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_MEGetBufSize.c @@ -0,0 +1,70 @@ +/** + * + * File Name: omxVCM4P2_MEGetBufSize.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Initialization modules for the vendor specific Motion Estimation structure. + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armVC.h" +#include "armCOMM.h" + +/** + * Function: omxVCM4P2_MEGetBufSize (6.2.4.1.1) + * + * Description: + * Computes the size, in bytes, of the vendor-specific specification + * structure for the following motion estimation functions: + * BlockMatch_Integer_8x8, BlockMatch_Integer_16x16, and MotionEstimationMB. + * + * Input Arguments: + * + * MEmode - motion estimation mode; available modes are defined by the + * enumerated type OMXVCM4P2MEMode + * pMEParams - motion estimation parameters + * + * Output Arguments: + * + * pSize - pointer to the number of bytes required for the specification + * structure + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - one or more of the following is true: + * - an invalid value was specified for the parameter MEmode + * - a negative or zero value was specified for the + * parameter pMEParams->searchRange + * + */ + +OMXResult omxVCM4P2_MEGetBufSize( + OMXVCM4P2MEMode MEMode, + const OMXVCM4P2MEParams *pMEParams, + OMX_U32 *pSize + ) +{ + armRetArgErrIf(!pMEParams, OMX_Sts_BadArgErr); + armRetArgErrIf(!pSize, OMX_Sts_BadArgErr); + armRetArgErrIf(pMEParams->searchRange <= 0, OMX_Sts_BadArgErr); + armRetArgErrIf((MEMode != OMX_VC_M4P10_FAST_SEARCH) && + (MEMode != OMX_VC_M4P10_FULL_SEARCH), OMX_Sts_BadArgErr); + + *pSize = (OMX_INT) sizeof(ARMVCM4P2_MESpec); + + return OMX_Sts_NoErr; +} + +/* End of file */ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_MEInit.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_MEInit.c new file mode 100644 index 0000000000000000000000000000000000000000..419e71a6a339e672a31e078a4910df2fad02f180 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_MEInit.c @@ -0,0 +1,84 @@ +/** + * + * File Name: omxVCM4P2_MEInit.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Initialization modules for the vendor specific Motion Estimation structure. + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armVC.h" +#include "armCOMM.h" + +/** + * Function: omxVCM4P2_MEInit (6.2.4.1.2) + * + * Description: + * Initializes the vendor-specific specification structure required for the + * following motion estimation functions: BlockMatch_Integer_8x8, + * BlockMatch_Integer_16x16, and MotionEstimationMB. Memory for the + * specification structure *pMESpec must be allocated prior to calling the + * function, and should be aligned on a 4-byte boundary. Following + * initialization by this function, the vendor-specific structure *pMESpec + * should contain an implementation-specific representation of all motion + * estimation parameters received via the structure pMEParams, for example + * rndVal, searchRange, etc. The number of bytes required for the + * specification structure can be determined using the function + * omxVCM4P2_MEGetBufSize. + * + * Input Arguments: + * + * MEmode - motion estimation mode; available modes are defined by the + * enumerated type OMXVCM4P2MEMode + * pMEParams - motion estimation parameters + * pMESpec - pointer to the uninitialized ME specification structure + * + * Output Arguments: + * + * pMESpec - pointer to the initialized ME specification structure + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - one or more of the following is true: + * - an invalid value was specified for the parameter MEmode + * - a negative or zero value was specified for the + * parameter pMEParams->searchRange + * + */ + +OMXResult omxVCM4P2_MEInit( + OMXVCM4P2MEMode MEMode, + const OMXVCM4P2MEParams *pMEParams, + void *pMESpec + ) +{ + ARMVCM4P2_MESpec *armMESpec = (ARMVCM4P2_MESpec *) pMESpec; + + armRetArgErrIf(!pMEParams, OMX_Sts_BadArgErr); + armRetArgErrIf(!pMESpec, OMX_Sts_BadArgErr); + armRetArgErrIf((MEMode != OMX_VC_M4P2_FAST_SEARCH) && + (MEMode != OMX_VC_M4P2_FULL_SEARCH), OMX_Sts_BadArgErr); + armRetArgErrIf(pMEParams->searchRange <= 0, OMX_Sts_BadArgErr); + + armMESpec->MEParams.searchEnable8x8 = pMEParams->searchEnable8x8; + armMESpec->MEParams.halfPelSearchEnable = pMEParams->halfPelSearchEnable; + armMESpec->MEParams.searchRange = pMEParams->searchRange; + armMESpec->MEParams.rndVal = pMEParams->rndVal; + armMESpec->MEMode = MEMode; + + return OMX_Sts_NoErr; +} + +/* End of file */ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_MotionEstimationMB.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_MotionEstimationMB.c new file mode 100644 index 0000000000000000000000000000000000000000..95490505296978abaf07c3808494a659766730b9 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_MotionEstimationMB.c @@ -0,0 +1,630 @@ +/** + * + * File Name: omxVCM4P2_MotionEstimationMB.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Contains module for motion search 16x16 macroblock + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armVC.h" +#include "armCOMM.h" + +/** + * Function: armVCM4P2_BlockMatch_16x16 + * + * Description: + * 16x16 block match wrapper function, calls omxVCM4P2_BlockMatch_Integer_16x16. + * If half pel search is enabled it also calls omxVCM4P2_BlockMatch_Half_16x16 + * + * Remarks: + * + * Parameters: + * [in] pSrcRefBuf pointer to the reference Y plane; points to the reference MB that + * corresponds to the location of the current macroblock in the current + * plane. + * [in] srcRefStep width of the reference plane + * [in] pRefRect pointer to the valid rectangular in reference plane. Relative to image origin. + * It's not limited to the image boundary, but depended on the padding. For example, + * if you pad 4 pixels outside the image border, then the value for left border + * can be -4 + * [in] pSrcCurrBuf pointer to the current macroblock extracted from original plane (linear array, + * 256 entries); must be aligned on an 16-byte boundary. + * [in] pCurrPointPos position of the current macroblock in the current plane + * [in] pSrcPreMV pointer to predicted motion vector; NULL indicates no predicted MV + * [in] pSrcPreSAD pointer to SAD associated with the predicted MV (referenced by pSrcPreMV); may be set to NULL if unavailable. + * [in] pMESpec vendor-specific motion estimation specification structure; must have been allocated + * and then initialized using omxVCM4P2_MEInit prior to calling the block matching + * function. + * [out] pDstMV pointer to estimated MV + * [out] pDstSAD pointer to minimum SAD + * * + * Return Value: + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments + * + */ +static OMXResult armVCM4P2_BlockMatch_16x16( + const OMX_U8 *pSrcRefBuf, + const OMX_INT srcRefStep, + const OMXRect *pRefRect, + const OMX_U8 *pSrcCurrBuf, + const OMXVCM4P2Coordinate *pCurrPointPos, + OMXVCMotionVector *pSrcPreMV, + OMX_INT *pSrcPreSAD, + void *pMESpec, + OMXVCMotionVector *pDstMV, + OMX_INT *pDstSAD +) +{ + OMXVCM4P2MEParams *pMEParams = (OMXVCM4P2MEParams *)pMESpec; + OMX_INT rndVal; + + rndVal = pMEParams->rndVal; + + omxVCM4P2_BlockMatch_Integer_16x16( + pSrcRefBuf, + srcRefStep, + pRefRect, + pSrcCurrBuf, + pCurrPointPos, + pSrcPreMV, + pSrcPreSAD, + pMEParams, + pDstMV, + pDstSAD); + + if (pMEParams->halfPelSearchEnable) + { + omxVCM4P2_BlockMatch_Half_16x16( + pSrcRefBuf, + srcRefStep, + pRefRect, + pSrcCurrBuf, + pCurrPointPos, + rndVal, + pDstMV, + pDstSAD); + } + + return OMX_Sts_NoErr; +} + +/** + * Function: armVCM4P2_BlockMatch_8x8 + * + * Description: + * 8x8 block match wrapper function, calls omxVCM4P2_BlockMatch_Integer_8x8. + * If half pel search is enabled it also calls omxVCM4P2_BlockMatch_Half_8x8 + * + * Remarks: + * + * Parameters: + * [in] pSrcRefBuf pointer to the reference Y plane; points to the reference MB that + * corresponds to the location of the current macroblock in the current + * plane. + * [in] srcRefStep width of the reference plane + * [in] pRefRect pointer to the valid rectangular in reference plane. Relative to image origin. + * It's not limited to the image boundary, but depended on the padding. For example, + * if you pad 4 pixels outside the image border, then the value for left border + * can be -4 + * [in] pSrcCurrBuf pointer to the current macroblock extracted from original plane (linear array, + * 256 entries); must be aligned on an 16-byte boundary. + * [in] pCurrPointPos position of the current macroblock in the current plane + * [in] pSrcPreMV pointer to predicted motion vector; NULL indicates no predicted MV + * [in] pSrcPreSAD pointer to SAD associated with the predicted MV (referenced by pSrcPreMV); may be set to NULL if unavailable. + * [in] pMESpec vendor-specific motion estimation specification structure; must have been allocated + * and then initialized using omxVCM4P2_MEInit prior to calling the block matching + * function. + * [out] pDstMV pointer to estimated MV + * [out] pDstSAD pointer to minimum SAD + * * + * Return Value: + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments + * + */ +static OMXResult armVCM4P2_BlockMatch_8x8( + const OMX_U8 *pSrcRefBuf, + OMX_INT srcRefStep, + const OMXRect *pRefRect, + const OMX_U8 *pSrcCurrBuf, + const OMXVCM4P2Coordinate *pCurrPointPos, + OMXVCMotionVector *pSrcPreMV, + OMX_INT *pSrcPreSAD, + void *pMESpec, + OMXVCMotionVector *pSrcDstMV, + OMX_INT *pDstSAD +) +{ + OMXVCM4P2MEParams *pMEParams = (OMXVCM4P2MEParams *)pMESpec; + OMX_INT rndVal; + + rndVal = pMEParams->rndVal; + + omxVCM4P2_BlockMatch_Integer_8x8( + pSrcRefBuf, + srcRefStep, + pRefRect, + pSrcCurrBuf, + pCurrPointPos, + pSrcPreMV, + pSrcPreSAD, + pMEParams, + pSrcDstMV, + pDstSAD); + + if (pMEParams->halfPelSearchEnable) + { + omxVCM4P2_BlockMatch_Half_8x8( + pSrcRefBuf, + srcRefStep, + pRefRect, + pSrcCurrBuf, + pCurrPointPos, + rndVal, + pSrcDstMV, + pDstSAD); + } + + return OMX_Sts_NoErr; +} + + +/** + * Function: omxVCM4P2_MotionEstimationMB (6.2.4.3.1) + * + * Description: + * Performs motion search for a 16x16 macroblock. Selects best motion search + * strategy from among inter-1MV, inter-4MV, and intra modes. Supports + * integer and half pixel resolution. + * + * Input Arguments: + * + * pSrcCurrBuf - pointer to the top-left corner of the current MB in the + * original picture plane; must be aligned on a 16-byte boundary. + * The function does not expect source data outside the region + * bounded by the MB to be available; for example it is not + * necessary for the caller to guarantee the availability of + * pSrcCurrBuf[-SrcCurrStep], i.e., the row of pixels above the MB + * to be processed. + * srcCurrStep - width of the original picture plane, in terms of full + * pixels; must be a multiple of 16. + * pSrcRefBuf - pointer to the reference Y plane; points to the reference + * plane location corresponding to the location of the current + * macroblock in the current plane; must be aligned on a 16-byte + * boundary. + * srcRefStep - width of the reference picture plane, in terms of full + * pixels; must be a multiple of 16. + * pRefRect - reference plane valid region rectangle, specified relative to + * the image origin + * pCurrPointPos - position of the current macroblock in the current plane + * pMESpec - pointer to the vendor-specific motion estimation specification + * structure; must be allocated and then initialized using + * omxVCM4P2_MEInit prior to calling this function. + * pMBInfo - array, of dimension four, containing pointers to information + * associated with four nearby MBs: + * - pMBInfo[0] - pointer to left MB information + * - pMBInfo[1] - pointer to top MB information + * - pMBInfo[2] - pointer to top-left MB information + * - pMBInfo[3] - pointer to top-right MB information + * Any pointer in the array may be set equal to NULL if the + * corresponding MB doesn't exist. For each MB, the following structure + * members are used: + * - mbType - macroblock type, either OMX_VC_INTRA, OMX_VC_INTER, or + * OMX_VC_INTER4V + * - pMV0[2][2] - estimated motion vectors; represented + * in 1/2 pixel units + * - sliceID - number of the slice to which the MB belongs + * pSrcDstMBCurr - pointer to information structure for the current MB. + * The following entries should be set prior to calling the + * function: sliceID - the number of the slice the to which the + * current MB belongs. The structure elements cbpy and cbpc are + * ignored. + * + * Output Arguments: + * + * pSrcDstMBCurr - pointer to updated information structure for the current + * MB after MB-level motion estimation has been completed. The + * following structure members are updated by the ME function: + * - mbType - macroblock type: OMX_VC_INTRA, OMX_VC_INTER, or + * OMX_VC_INTER4V. + * - pMV0[2][2] - estimated motion vectors; represented in + * terms of 1/2 pel units. + * - pMVPred[2][2] - predicted motion vectors; represented + * in terms of 1/2 pel units. + * The structure members cbpy and cbpc are not updated by the function. + * pDstSAD - pointer to the minimum SAD for INTER1V, or sum of minimum SADs + * for INTER4V + * pDstBlockSAD - pointer to an array of SAD values for each of the four + * 8x8 luma blocks in the MB. The block SADs are in scan order for + * each MB. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments. Returned if one or more of the + * following conditions is true: + * - at least one of the following pointers is NULL: pSrcCurrBuf, + * pSrcRefBuf, pRefRect, pCurrPointPos, pMBInter, pMBIntra, + * pSrcDstMBCurr, or pDstSAD. + * + */ + +OMXResult omxVCM4P2_MotionEstimationMB ( + const OMX_U8 *pSrcCurrBuf, + OMX_S32 srcCurrStep, + const OMX_U8 *pSrcRefBuf, + OMX_S32 srcRefStep, + const OMXRect*pRefRect, + const OMXVCM4P2Coordinate *pCurrPointPos, + void *pMESpec, + const OMXVCM4P2MBInfoPtr *pMBInfo, + OMXVCM4P2MBInfo *pSrcDstMBCurr, + OMX_U16 *pDstSAD, + OMX_U16 *pDstBlockSAD +) +{ + + OMX_INT intraSAD, average, count, index, x, y; + OMXVCMotionVector dstMV16x16; + OMX_INT dstSAD16x16; + OMX_INT dstSAD8x8; + OMXVCM4P2MEParams *pMEParams; + OMXVCM4P2Coordinate TempCurrPointPos; + OMXVCM4P2Coordinate *pTempCurrPointPos; + OMX_U8 aTempSrcCurrBuf[271]; + OMX_U8 *pTempSrcCurrBuf; + OMX_U8 *pDst; + OMX_U8 aDst[71]; + OMX_S32 dstStep = 8; + OMX_INT predictType; + OMX_S32 Sad; + const OMX_U8 *pTempSrcRefBuf; + OMXVCMotionVector* pSrcCandMV1[4]; + OMXVCMotionVector* pSrcCandMV2[4]; + OMXVCMotionVector* pSrcCandMV3[4]; + + /* Argument error checks */ + armRetArgErrIf(!armIs16ByteAligned(pSrcCurrBuf), OMX_Sts_BadArgErr); + armRetArgErrIf(!armIs16ByteAligned(pSrcRefBuf), OMX_Sts_BadArgErr); + armRetArgErrIf(((srcCurrStep % 16) || (srcRefStep % 16)), OMX_Sts_BadArgErr); + armRetArgErrIf(pSrcCurrBuf == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pSrcRefBuf == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pRefRect == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pCurrPointPos == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pSrcDstMBCurr == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pDstSAD == NULL, OMX_Sts_BadArgErr); + + + pTempCurrPointPos = &(TempCurrPointPos); + pTempSrcCurrBuf = armAlignTo16Bytes(aTempSrcCurrBuf); + pMEParams = (OMXVCM4P2MEParams *)pMESpec; + pTempCurrPointPos->x = pCurrPointPos->x; + pTempCurrPointPos->y = pCurrPointPos->y; + pSrcDstMBCurr->mbType = OMX_VC_INTER; + + /* Preparing a linear buffer for block match */ + for (y = 0, index = count = 0; y < 16; y++, index += srcCurrStep - 16) + { + for(x = 0; x < 16; x++, count++, index++) + { + pTempSrcCurrBuf[count] = pSrcCurrBuf[index]; + } + } + for(y = 0, index = 0; y < 2; y++) + { + for(x = 0; x < 2; x++,index++) + { + if((pMBInfo[0] != NULL) && (pMBInfo[0]->mbType != OMX_VC_INTRA)) + { + pSrcCandMV1[index] = &(pMBInfo[0]->pMV0[y][x]); + } + else + { + pSrcCandMV1[index] = NULL; + } + if((pMBInfo[1] != NULL) && (pMBInfo[1]->mbType != OMX_VC_INTRA)) + { + pSrcCandMV2[index] = &(pMBInfo[1]->pMV0[y][x]); + } + else + { + pSrcCandMV2[index] = NULL; + } + if((pMBInfo[3] != NULL) && (pMBInfo[3]->mbType != OMX_VC_INTRA)) + { + pSrcCandMV3[index] = &(pMBInfo[3]->pMV0[y][x]); + } + else + { + pSrcCandMV3[index] = NULL; + } + } + } + /* Calculating SAD at MV(0,0) */ + armVCCOMM_SAD(pTempSrcCurrBuf, + 16, + pSrcRefBuf, + srcRefStep, + &Sad, + 16, + 16); + *pDstSAD = Sad; + + /* Mode decision for NOT_CODED MB */ + if(*pDstSAD == 0) + { + pSrcDstMBCurr->pMV0[0][0].dx = 0; + pSrcDstMBCurr->pMV0[0][0].dy = 0; + *pDstSAD = 0; + return OMX_Sts_NoErr; + } + + omxVCM4P2_FindMVpred( + &(pSrcDstMBCurr->pMV0[0][0]), + pSrcCandMV1[0], + pSrcCandMV2[0], + pSrcCandMV3[0], + &(pSrcDstMBCurr->pMVPred[0][0]), + NULL, + 0); + + /* Inter 1 MV */ + armVCM4P2_BlockMatch_16x16( + pSrcRefBuf, + srcRefStep, + pRefRect, + pTempSrcCurrBuf, + pCurrPointPos, + &(pSrcDstMBCurr->pMVPred[0][0]), + NULL, + pMEParams, + &dstMV16x16, + &dstSAD16x16); + + /* Initialize all with 1 MV values */ + pSrcDstMBCurr->pMV0[0][0].dx = dstMV16x16.dx; + pSrcDstMBCurr->pMV0[0][0].dy = dstMV16x16.dy; + pSrcDstMBCurr->pMV0[0][1].dx = dstMV16x16.dx; + pSrcDstMBCurr->pMV0[0][1].dy = dstMV16x16.dy; + pSrcDstMBCurr->pMV0[1][0].dx = dstMV16x16.dx; + pSrcDstMBCurr->pMV0[1][0].dy = dstMV16x16.dy; + pSrcDstMBCurr->pMV0[1][1].dx = dstMV16x16.dx; + pSrcDstMBCurr->pMV0[1][1].dy = dstMV16x16.dy; + + *pDstSAD = dstSAD16x16; + + if (pMEParams->searchEnable8x8) + { + /* Inter 4MV */ + armVCM4P2_BlockMatch_8x8 (pSrcRefBuf, + srcRefStep, pRefRect, + pTempSrcCurrBuf, pTempCurrPointPos, + &(pSrcDstMBCurr->pMVPred[0][0]), NULL, + pMEParams, &(pSrcDstMBCurr->pMV0[0][0]), + &dstSAD8x8 + ); + pDstBlockSAD[0] = dstSAD8x8; + *pDstSAD = dstSAD8x8; + pTempCurrPointPos->x += 8; + pSrcRefBuf += 8; + omxVCM4P2_FindMVpred( + &(pSrcDstMBCurr->pMV0[0][1]), + pSrcCandMV1[1], + pSrcCandMV2[1], + pSrcCandMV3[1], + &(pSrcDstMBCurr->pMVPred[0][1]), + NULL, + 1); + + armVCM4P2_BlockMatch_8x8 (pSrcRefBuf, + srcRefStep, pRefRect, + pTempSrcCurrBuf, pTempCurrPointPos, + &(pSrcDstMBCurr->pMVPred[0][1]), NULL, + pMEParams, &(pSrcDstMBCurr->pMV0[0][1]), + &dstSAD8x8 + ); + pDstBlockSAD[1] = dstSAD8x8; + *pDstSAD += dstSAD8x8; + pTempCurrPointPos->x -= 8; + pTempCurrPointPos->y += 8; + pSrcRefBuf += (srcRefStep * 8) - 8; + + omxVCM4P2_FindMVpred( + &(pSrcDstMBCurr->pMV0[1][0]), + pSrcCandMV1[2], + pSrcCandMV2[2], + pSrcCandMV3[2], + &(pSrcDstMBCurr->pMVPred[1][0]), + NULL, + 2); + armVCM4P2_BlockMatch_8x8 (pSrcRefBuf, + srcRefStep, pRefRect, + pTempSrcCurrBuf, pTempCurrPointPos, + &(pSrcDstMBCurr->pMVPred[1][0]), NULL, + pMEParams, &(pSrcDstMBCurr->pMV0[1][0]), + &dstSAD8x8 + ); + pDstBlockSAD[2] = dstSAD8x8; + *pDstSAD += dstSAD8x8; + pTempCurrPointPos->x += 8; + pSrcRefBuf += 8; + omxVCM4P2_FindMVpred( + &(pSrcDstMBCurr->pMV0[1][1]), + pSrcCandMV1[3], + pSrcCandMV2[3], + pSrcCandMV3[3], + &(pSrcDstMBCurr->pMVPred[1][1]), + NULL, + 3); + armVCM4P2_BlockMatch_8x8 (pSrcRefBuf, + srcRefStep, pRefRect, + pTempSrcCurrBuf, pTempCurrPointPos, + &(pSrcDstMBCurr->pMVPred[1][1]), NULL, + pMEParams, &(pSrcDstMBCurr->pMV0[1][1]), + &dstSAD8x8 + ); + pDstBlockSAD[3] = dstSAD8x8; + *pDstSAD += dstSAD8x8; + + + /* Checking if 4MV is equal to 1MV */ + if ( + (pSrcDstMBCurr->pMV0[0][0].dx != dstMV16x16.dx) || + (pSrcDstMBCurr->pMV0[0][0].dy != dstMV16x16.dy) || + (pSrcDstMBCurr->pMV0[0][1].dx != dstMV16x16.dx) || + (pSrcDstMBCurr->pMV0[0][1].dy != dstMV16x16.dy) || + (pSrcDstMBCurr->pMV0[1][0].dx != dstMV16x16.dx) || + (pSrcDstMBCurr->pMV0[1][0].dy != dstMV16x16.dy) || + (pSrcDstMBCurr->pMV0[1][1].dx != dstMV16x16.dx) || + (pSrcDstMBCurr->pMV0[1][1].dy != dstMV16x16.dy) + ) + { + /* select the 4 MV */ + pSrcDstMBCurr->mbType = OMX_VC_INTER4V; + } + } + + /* finding the error in intra mode */ + for (count = 0, average = 0; count < 256 ; count++) + { + average = average + pTempSrcCurrBuf[count]; + } + average = average/256; + + intraSAD = 0; + + /* Intra SAD calculation */ + for (count = 0; count < 256 ; count++) + { + intraSAD += armAbs ((pTempSrcCurrBuf[count]) - (average)); + } + + /* Using the MPEG4 VM formula for intra/inter mode decision + Var < (SAD - 2*NB) where NB = N^2 is the number of pixels + of the macroblock.*/ + + if (intraSAD <= (*pDstSAD - 512)) + { + pSrcDstMBCurr->mbType = OMX_VC_INTRA; + pSrcDstMBCurr->pMV0[0][0].dx = 0; + pSrcDstMBCurr->pMV0[0][0].dy = 0; + *pDstSAD = intraSAD; + pDstBlockSAD[0] = 0xFFFF; + pDstBlockSAD[1] = 0xFFFF; + pDstBlockSAD[2] = 0xFFFF; + pDstBlockSAD[3] = 0xFFFF; + } + + if(pSrcDstMBCurr->mbType == OMX_VC_INTER) + { + pTempSrcRefBuf = pSrcRefBuf + (srcRefStep * dstMV16x16.dy) + dstMV16x16.dx; + + if((dstMV16x16.dx & 0x1) && (dstMV16x16.dy & 0x1)) + { + predictType = OMX_VC_HALF_PIXEL_XY; + } + else if(dstMV16x16.dx & 0x1) + { + predictType = OMX_VC_HALF_PIXEL_X; + } + else if(dstMV16x16.dy & 0x1) + { + predictType = OMX_VC_HALF_PIXEL_Y; + } + else + { + predictType = OMX_VC_INTEGER_PIXEL; + } + + pDst = armAlignTo8Bytes(&(aDst[0])); + /* Calculating Block SAD at MV(dstMV16x16.dx,dstMV16x16.dy) */ + /* Block 0 */ + omxVCM4P2_MCReconBlock(pTempSrcRefBuf, + srcRefStep, + NULL, + pDst, + dstStep, + predictType, + pMEParams->rndVal); + + armVCCOMM_SAD(pTempSrcCurrBuf, + 16, + pDst, + dstStep, + &Sad, + 8, + 8); + pDstBlockSAD[0] = Sad; + + /* Block 1 */ + omxVCM4P2_MCReconBlock(pTempSrcRefBuf + 8, + srcRefStep, + NULL, + pDst, + dstStep, + predictType, + pMEParams->rndVal); + + armVCCOMM_SAD(pTempSrcCurrBuf + 8, + 16, + pDst, + dstStep, + &Sad, + 8, + 8); + pDstBlockSAD[1] = Sad; + + /* Block 2 */ + omxVCM4P2_MCReconBlock(pTempSrcRefBuf + (srcRefStep*8), + srcRefStep, + NULL, + pDst, + dstStep, + predictType, + pMEParams->rndVal); + + armVCCOMM_SAD(pTempSrcCurrBuf + (16*8), + 16, + pDst, + dstStep, + &Sad, + 8, + 8); + pDstBlockSAD[2] = Sad; + + /* Block 3 */ + omxVCM4P2_MCReconBlock(pTempSrcRefBuf + (srcRefStep*8) + 8, + srcRefStep, + NULL, + pDst, + dstStep, + predictType, + pMEParams->rndVal); + + armVCCOMM_SAD(pTempSrcCurrBuf + (16*8) + 8, + 16, + pDst, + dstStep, + &Sad, + 8, + 8); + pDstBlockSAD[3] = Sad; + } + return OMX_Sts_NoErr; +} + +/* End of file */ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra.c new file mode 100644 index 0000000000000000000000000000000000000000..1613f477743f282b8b8b65289947a37bca08c877 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra.c @@ -0,0 +1,121 @@ + /** + * + * File Name: omxVCM4P2_PredictReconCoefIntra.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * File: omxVCM4P2_PredictReconCoefIntra_S16.c + * Description: Contains modules for AC DC prediction + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armVC.h" + +/** + * Function: omxVCM4P2_PredictReconCoefIntra (6.2.5.4.3) + * + * Description: + * Performs adaptive DC/AC coefficient prediction for an intra block. Prior + * to the function call, prediction direction (predDir) should be selected as + * specified in [ISO14496-2], subclause 7.4.3.1. + * + * Input Arguments: + * + * pSrcDst - pointer to the coefficient buffer which contains the quantized + * coefficient residuals (PQF) of the current block; must be + * aligned on a 4-byte boundary. The output coefficients are + * saturated to the range [-2048, 2047]. + * pPredBufRow - pointer to the coefficient row buffer; must be aligned on + * a 4-byte boundary. + * pPredBufCol - pointer to the coefficient column buffer; must be aligned + * on a 4-byte boundary. + * curQP - quantization parameter of the current block. curQP may equal to + * predQP especially when the current block and the predictor block + * are in the same macroblock. + * predQP - quantization parameter of the predictor block + * predDir - indicates the prediction direction which takes one of the + * following values: OMX_VC_HORIZONTAL - predict horizontally + * OMX_VC_VERTICAL - predict vertically + * ACPredFlag - a flag indicating if AC prediction should be performed. It + * is equal to ac_pred_flag in the bit stream syntax of MPEG-4 + * videoComp - video component type (luminance or chrominance) of the + * current block + * + * Output Arguments: + * + * pSrcDst - pointer to the coefficient buffer which contains the quantized + * coefficients (QF) of the current block + * pPredBufRow - pointer to the updated coefficient row buffer + * pPredBufCol - pointer to the updated coefficient column buffer Note: + * Buffer update: Update the AC prediction buffer (both row and + * column buffer). + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments, if: + * - At least one of the pointers is NULL: + * pSrcDst, pPredBufRow, or pPredBufCol. + * - curQP <= 0, + * - predQP <= 0, + * - curQP >31, + * - predQP > 31, + * - preDir exceeds [1,2] + * - pSrcDst, pPredBufRow, or pPredBufCol is not 4-byte aligned. + * + */ + +OMXResult omxVCM4P2_PredictReconCoefIntra( + OMX_S16 * pSrcDst, + OMX_S16 * pPredBufRow, + OMX_S16 * pPredBufCol, + OMX_INT curQP, + OMX_INT predQP, + OMX_INT predDir, + OMX_INT ACPredFlag, + OMXVCM4P2VideoComponent videoComp + ) +{ + OMX_U8 flag; + /* Argument error checks */ + armRetArgErrIf(pSrcDst == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pPredBufRow == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pPredBufCol == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(curQP <= 0, OMX_Sts_BadArgErr); + armRetArgErrIf(predQP <= 0, OMX_Sts_BadArgErr); + armRetArgErrIf(curQP > 31, OMX_Sts_BadArgErr); + armRetArgErrIf(predQP > 31, OMX_Sts_BadArgErr); + armRetArgErrIf((predDir != 1) && (predDir != 2), OMX_Sts_BadArgErr); + armRetArgErrIf(!armIs4ByteAligned(pSrcDst), OMX_Sts_BadArgErr); + armRetArgErrIf(!armIs4ByteAligned(pPredBufRow), OMX_Sts_BadArgErr); + armRetArgErrIf(!armIs4ByteAligned(pPredBufCol), OMX_Sts_BadArgErr); + + flag = 0; + return armVCM4P2_ACDCPredict( + pSrcDst, + NULL, + pPredBufRow, + pPredBufCol, + curQP, + predQP, + predDir, + ACPredFlag, + videoComp, + flag, + NULL); + +} + +/* End of file */ + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_QuantInter_I.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_QuantInter_I.c new file mode 100644 index 0000000000000000000000000000000000000000..5964f732a2abb9b8bc823313bd743dd7758ea376 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_QuantInter_I.c @@ -0,0 +1,117 @@ +/** + * + * File Name: omxVCM4P2_QuantInter_I.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Contains modules for inter Quantization + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" + +/** + * Function: omxVCM4P2_QuantInter_I (6.2.4.4.3) + * + * Description: + * Performs quantization on an inter coefficient block; supports + * bits_per_pixel == 8. + * + * Input Arguments: + * + * pSrcDst - pointer to the input inter block coefficients; must be aligned + * on a 16-byte boundary. + * QP - quantization parameter (quantizer_scale) + * shortVideoHeader - binary flag indicating presence of short_video_header; + * shortVideoHeader==1 selects linear intra DC mode, and + * shortVideoHeader==0 selects non linear intra DC mode. + * + * Output Arguments: + * + * pSrcDst - pointer to the output (quantized) interblock coefficients. + * When shortVideoHeader==1, AC coefficients are saturated on the + * interval [-127, 127], and DC coefficients are saturated on the + * interval [1, 254]. When shortVideoHeader==0, AC coefficients + * are saturated on the interval [-2047, 2047]. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments: + * - pSrcDst is NULL. + * - QP <= 0 or QP >= 32. + * + */ + +OMXResult omxVCM4P2_QuantInter_I( + OMX_S16 * pSrcDst, + OMX_U8 QP, + OMX_INT shortVideoHeader +) +{ + + /* Definitions and Initializations*/ + OMX_INT coeffCount; + OMX_INT fSign; + OMX_INT maxClpAC = 0, minClpAC = 0; + OMX_INT maxClpDC = 0, minClpDC = 0; + + /* Argument error checks */ + armRetArgErrIf(pSrcDst == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(((QP <= 0) || (QP >= 32)), OMX_Sts_BadArgErr); + /* One argument check is delayed until we have ascertained that */ + /* pQMatrix is not NULL. */ + + /* Set the Clip Range based on SVH on/off */ + if(shortVideoHeader == 1) + { + maxClpDC = 254; + minClpDC = 1; + maxClpAC = 127; + minClpAC = -127; + } + else + { + maxClpDC = 2047; + minClpDC = -2047; + maxClpAC = 2047; + minClpAC = -2047; + } + + /* Second Inverse quantisation method */ + for (coeffCount = 0; coeffCount < 64; coeffCount++) + { + fSign = armSignCheck (pSrcDst[coeffCount]); + pSrcDst[coeffCount] = (armAbs(pSrcDst[coeffCount]) + - (QP/2))/(2 * QP); + pSrcDst[coeffCount] *= fSign; + + /* Clip */ + if (coeffCount == 0) + { + pSrcDst[coeffCount] = + (OMX_S16) armClip (minClpDC, maxClpDC, pSrcDst[coeffCount]); + } + else + { + pSrcDst[coeffCount] = + (OMX_S16) armClip (minClpAC, maxClpAC, pSrcDst[coeffCount]); + } + } + return OMX_Sts_NoErr; + +} + +/* End of file */ + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_QuantIntra_I.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_QuantIntra_I.c new file mode 100644 index 0000000000000000000000000000000000000000..a10da68779e34116a4161e5e0ca293f050a67a86 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_QuantIntra_I.c @@ -0,0 +1,153 @@ +/** + * + * File Name: omxVCM4P2_QuantIntra_I.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Contains modules for intra Quantization + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +/** + * Function: omxVCM4P2_QuantIntra_I (6.2.4.4.2) + * + * Description: + * Performs quantization on intra block coefficients. This function supports + * bits_per_pixel == 8. + * + * Input Arguments: + * + * pSrcDst - pointer to the input intra block coefficients; must be aligned + * on a 16-byte boundary. + * QP - quantization parameter (quantizer_scale). + * blockIndex - block index indicating the component type and position, + * valid in the range 0 to 5, as defined in [ISO14496-2], subclause + * 6.1.3.8. + * shortVideoHeader - binary flag indicating presence of + * short_video_header; shortVideoHeader==1 selects linear intra DC + * mode, and shortVideoHeader==0 selects non linear intra DC mode. + * + * Output Arguments: + * + * pSrcDst - pointer to the output (quantized) interblock coefficients. + * When shortVideoHeader==1, AC coefficients are saturated on the + * interval [-127, 127], and DC coefficients are saturated on the + * interval [1, 254]. When shortVideoHeader==0, AC coefficients + * are saturated on the interval [-2047, 2047]. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments: + * - pSrcDst is NULL. + * - blockIndex < 0 or blockIndex >= 10 + * - QP <= 0 or QP >= 32. + * + */ + +OMXResult omxVCM4P2_QuantIntra_I( + OMX_S16 * pSrcDst, + OMX_U8 QP, + OMX_INT blockIndex, + OMX_INT shortVideoHeader + ) +{ + + /* Definitions and Initializations*/ + /* Initialized to remove compilation error */ + OMX_INT dcScaler = 0, coeffCount,fSign; + OMX_INT maxClpAC, minClpAC; + + /* Argument error checks */ + armRetArgErrIf(pSrcDst == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(((blockIndex < 0) || (blockIndex >= 10)), OMX_Sts_BadArgErr); + armRetArgErrIf(((QP <= 0) || (QP >= 32)), OMX_Sts_BadArgErr); + /* One argument check is delayed until we have ascertained that */ + /* pQMatrix is not NULL. */ + + + /* Set the Clip Range based on SVH on/off */ + if(shortVideoHeader == 1) + { + maxClpAC = 127; + minClpAC = -127; + dcScaler = 8; + /* Dequant the DC value, this applies to both the methods */ + pSrcDst[0] = armIntDivAwayFromZero (pSrcDst[0], dcScaler); + + /* Clip between 1 and 254 */ + pSrcDst[0] = (OMX_S16) armClip (1, 254, pSrcDst[0]); + } + else + { + maxClpAC = 2047; + minClpAC = -2047; + /* Calculate the DC scaler value */ + if ((blockIndex < 4) || (blockIndex > 5)) + { + if (QP >= 1 && QP <= 4) + { + dcScaler = 8; + } + else if (QP >= 5 && QP <= 8) + { + dcScaler = 2 * QP; + } + else if (QP >= 9 && QP <= 24) + { + dcScaler = QP + 8; + } + else + { + dcScaler = (2 * QP) - 16; + } + } + else if (blockIndex < 6) + { + if (QP >= 1 && QP <= 4) + { + dcScaler = 8; + } + else if (QP >= 5 && QP <= 24) + { + dcScaler = (QP + 13)/2; + } + else + { + dcScaler = QP - 6; + } + } + + /* Dequant the DC value, this applies to both the methods */ + pSrcDst[0] = armIntDivAwayFromZero (pSrcDst[0], dcScaler); + } + + /* Second Inverse quantisation method */ + for (coeffCount = 1; coeffCount < 64; coeffCount++) + { + fSign = armSignCheck (pSrcDst[coeffCount]); + pSrcDst[coeffCount] = armAbs(pSrcDst[coeffCount])/(2 * QP); + pSrcDst[coeffCount] *= fSign; + + /* Clip */ + pSrcDst[coeffCount] = + (OMX_S16) armClip (minClpAC, maxClpAC, pSrcDst[coeffCount]); + } + return OMX_Sts_NoErr; + +} + +/* End of file */ + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_QuantInvInter_I.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_QuantInvInter_I.c new file mode 100644 index 0000000000000000000000000000000000000000..6e0de5c62f0a96339b93c6fcbf916cc653d5d355 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_QuantInvInter_I.c @@ -0,0 +1,96 @@ +/** + * + * File Name: omxVCM4P2_QuantInvInter_I.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Contains modules for inter inverse Quantization + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" + + +/** + * Function: omxVCM4P2_QuantInvInter_I (6.2.5.3.2) + * + * Description: + * Performs the second inverse quantization mode on an intra/inter coded + * block. Supports bits_per_pixel = 8. The output coefficients are clipped to + * the range [-2048, 2047]. + * + * Input Arguments: + * + * pSrcDst - pointer to the input (quantized) intra/inter block; must be + * aligned on a 16-byte boundary. + * QP - quantization parameter (quantizer_scale) + * videoComp - video component type of the current block. Takes one of the + * following flags: OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE (intra + * version only). + * shortVideoHeader - binary flag indicating presence of short_video_header + * (intra version only). + * + * Output Arguments: + * + * pSrcDst - pointer to the output (dequantized) intra/inter block + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; one or more of the following is + * true: + * - pSrcDst is NULL + * - QP <= 0 or QP >=31 + * - videoComp is neither OMX_VC_LUMINANCE nor OMX_VC_CHROMINANCE. + * + */ + +OMXResult omxVCM4P2_QuantInvInter_I( + OMX_S16 * pSrcDst, + OMX_INT QP + ) +{ + + OMX_INT coeffCount, Sign; + + /* Argument error checks */ + armRetArgErrIf(pSrcDst == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(((QP <= 0) || (QP >= 32)), OMX_Sts_BadArgErr); + + /* Second Inverse quantisation method */ + for (coeffCount = 0; coeffCount < 64; coeffCount++) + { + /* check sign */ + Sign = armSignCheck (pSrcDst[coeffCount]); + + /* Quantize the coeff */ + if (QP & 0x1) + { + pSrcDst[coeffCount] = (2* armAbs(pSrcDst[coeffCount]) + 1) * QP; + pSrcDst[coeffCount] *= Sign; + } + else + { + pSrcDst[coeffCount] = (2* armAbs(pSrcDst[coeffCount]) + 1) + * QP - 1; + pSrcDst[coeffCount] *= Sign; + } + /* Saturate */ + pSrcDst[coeffCount] = armClip (-2048, 2047, pSrcDst[coeffCount]); + } + return OMX_Sts_NoErr; +} + +/* End of file */ + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I.c new file mode 100644 index 0000000000000000000000000000000000000000..a946d7b905c448a3890acd43e5ca072e9c6e7a36 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I.c @@ -0,0 +1,153 @@ +/** + * + * File Name: omxVCM4P2_QuantInvIntra_I.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Contains modules for intra inverse Quantization + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" + +/** + * Function: omxVCM4P2_QuantInvIntra_I (6.2.5.3.2) + * + * Description: + * Performs the second inverse quantization mode on an intra/inter coded + * block. Supports bits_per_pixel = 8. The output coefficients are clipped to + * the range [-2048, 2047]. + * + * Input Arguments: + * + * pSrcDst - pointer to the input (quantized) intra/inter block; must be + * aligned on a 16-byte boundary. + * QP - quantization parameter (quantizer_scale) + * videoComp - video component type of the current block. Takes one of the + * following flags: OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE (intra + * version only). + * shortVideoHeader - binary flag indicating presence of short_video_header + * (intra version only). + * + * Output Arguments: + * + * pSrcDst - pointer to the output (dequantized) intra/inter block + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; one or more of the following is + * true: + * - pSrcDst is NULL + * - QP <= 0 or QP >=31 + * - videoComp is neither OMX_VC_LUMINANCE nor OMX_VC_CHROMINANCE. + * + */ + +OMXResult omxVCM4P2_QuantInvIntra_I( + OMX_S16 * pSrcDst, + OMX_INT QP, + OMXVCM4P2VideoComponent videoComp, + OMX_INT shortVideoHeader +) +{ + + /* Initialized to remove compilation error */ + OMX_INT dcScaler = 0, coeffCount, Sign; + + /* Argument error checks */ + armRetArgErrIf(pSrcDst == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(((QP <= 0) || (QP >= 32)), OMX_Sts_BadArgErr); + armRetArgErrIf(((videoComp != OMX_VC_LUMINANCE) && (videoComp != OMX_VC_CHROMINANCE)), OMX_Sts_BadArgErr); + + /* Calculate the DC scaler value */ + + /* linear intra DC mode */ + if(shortVideoHeader) + { + dcScaler = 8; + } + /* nonlinear intra DC mode */ + else + { + + if (videoComp == OMX_VC_LUMINANCE) + { + if (QP >= 1 && QP <= 4) + { + dcScaler = 8; + } + else if (QP >= 5 && QP <= 8) + { + dcScaler = 2 * QP; + } + else if (QP >= 9 && QP <= 24) + { + dcScaler = QP + 8; + } + else + { + dcScaler = (2 * QP) - 16; + } + } + + else if (videoComp == OMX_VC_CHROMINANCE) + { + if (QP >= 1 && QP <= 4) + { + dcScaler = 8; + } + else if (QP >= 5 && QP <= 24) + { + dcScaler = (QP + 13)/2; + } + else + { + dcScaler = QP - 6; + } + } + } + /* Dequant the DC value, this applies to both the methods */ + pSrcDst[0] = pSrcDst[0] * dcScaler; + + /* Saturate */ + pSrcDst[0] = armClip (-2048, 2047, pSrcDst[0]); + + /* Second Inverse quantisation method */ + for (coeffCount = 1; coeffCount < 64; coeffCount++) + { + /* check sign */ + Sign = armSignCheck (pSrcDst[coeffCount]); + + if (QP & 0x1) + { + pSrcDst[coeffCount] = (2* armAbs(pSrcDst[coeffCount]) + 1) * QP; + pSrcDst[coeffCount] *= Sign; + } + else + { + pSrcDst[coeffCount] = + (2* armAbs(pSrcDst[coeffCount]) + 1) * QP - 1; + pSrcDst[coeffCount] *= Sign; + } + + /* Saturate */ + pSrcDst[coeffCount] = armClip (-2048, 2047, pSrcDst[coeffCount]); + } + return OMX_Sts_NoErr; + +} + +/* End of file */ + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_TransRecBlockCoef_inter.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_TransRecBlockCoef_inter.c new file mode 100644 index 0000000000000000000000000000000000000000..6e0c59b1053c94f9540e6ed0c17cedfe487fa609 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_TransRecBlockCoef_inter.c @@ -0,0 +1,108 @@ +/** + * + * File Name: omxVCM4P2_TransRecBlockCoef_inter.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Contains modules DCT->quant and reconstructing the inter texture data + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" + + +/** + * Function: omxVCM4P2_TransRecBlockCoef_inter (6.2.4.4.5) + * + * Description: + * Implements DCT, and quantizes the DCT coefficients of the inter block + * while reconstructing the texture residual. There is no boundary check for + * the bit stream buffer. + * + * Input Arguments: + * + * pSrc -pointer to the residuals to be encoded; must be aligned on an + * 16-byte boundary. + * QP - quantization parameter. + * shortVideoHeader - binary flag indicating presence of short_video_header; + * shortVideoHeader==1 selects linear intra DC mode, and + * shortVideoHeader==0 selects non linear intra DC mode. + * + * Output Arguments: + * + * pDst - pointer to the quantized DCT coefficients buffer; must be aligned + * on a 16-byte boundary. + * pRec - pointer to the reconstructed texture residuals; must be aligned + * on a 16-byte boundary. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments: + * - At least one of the following pointers is either NULL or + * not 16-byte aligned: + * - pSrc + * - pDst + * - pRec + * - QP <= 0 or QP >= 32. + * + */ + +OMXResult omxVCM4P2_TransRecBlockCoef_inter( + const OMX_S16 *pSrc, + OMX_S16 * pDst, + OMX_S16 * pRec, + OMX_U8 QP, + OMX_INT shortVideoHeader +) +{ + /* 64 elements are needed but to align it to 16 bytes need + 8 more elements of padding */ + OMX_S16 tempBuffer[72]; + OMX_S16 *pTempBuffer; + OMX_INT i; + + /* Aligning the local buffers */ + pTempBuffer = armAlignTo16Bytes(tempBuffer); + + /* Argument error checks */ + armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pRec == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(!armIs16ByteAligned(pSrc), OMX_Sts_BadArgErr); + armRetArgErrIf(!armIs16ByteAligned(pRec), OMX_Sts_BadArgErr); + armRetArgErrIf(!armIs16ByteAligned(pDst), OMX_Sts_BadArgErr); + armRetArgErrIf(((QP <= 0) || (QP >= 32)), OMX_Sts_BadArgErr); + + omxVCM4P2_DCT8x8blk (pSrc, pDst); + omxVCM4P2_QuantInter_I( + pDst, + QP, + shortVideoHeader); + + for (i = 0; i < 64; i++) + { + pTempBuffer[i] = pDst[i]; + } + + omxVCM4P2_QuantInvInter_I( + pTempBuffer, + QP); + omxVCM4P2_IDCT8x8blk (pTempBuffer, pRec); + + return OMX_Sts_NoErr; +} + +/* End of file */ + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_TransRecBlockCoef_intra.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_TransRecBlockCoef_intra.c new file mode 100644 index 0000000000000000000000000000000000000000..dd444f91b6eebdb9052d509a0e57bf746edcd020 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_TransRecBlockCoef_intra.c @@ -0,0 +1,260 @@ +/** + * + * File Name: omxVCM4P2_TransRecBlockCoef_intra.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Contains modules DCT->quant and reconstructing the intra texture data + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armVC.h" + + +/** + * Function: omxVCM4P2_TransRecBlockCoef_intra (6.2.4.4.4) + * + * Description: + * Quantizes the DCT coefficients, implements intra block AC/DC coefficient + * prediction, and reconstructs the current intra block texture for prediction + * on the next frame. Quantized row and column coefficients are returned in + * the updated coefficient buffers. + * + * Input Arguments: + * + * pSrc - pointer to the pixels of current intra block; must be aligned on + * an 8-byte boundary. + * pPredBufRow - pointer to the coefficient row buffer containing + * ((num_mb_per_row * 2 + 1) * 8) elements of type OMX_S16. + * Coefficients are organized into blocks of eight as described + * below (Internal Prediction Coefficient Update Procedures). The + * DC coefficient is first, and the remaining buffer locations + * contain the quantized AC coefficients. Each group of eight row + * buffer elements combined with one element eight elements ahead + * contains the coefficient predictors of the neighboring block + * that is spatially above or to the left of the block currently to + * be decoded. A negative-valued DC coefficient indicates that this + * neighboring block is not INTRA-coded or out of bounds, and + * therefore the AC and DC coefficients are invalid. Pointer must + * be aligned on an 8-byte boundary. + * pPredBufCol - pointer to the prediction coefficient column buffer + * containing 16 elements of type OMX_S16. Coefficients are + * organized as described in section 6.2.2.5. Pointer must be + * aligned on an 8-byte boundary. + * pSumErr - pointer to a flag indicating whether or not AC prediction is + * required; AC prediction is enabled if *pSumErr >=0, but the + * value is not used for coefficient prediction, i.e., the sum of + * absolute differences starts from 0 for each call to this + * function. Otherwise AC prediction is disabled if *pSumErr < 0 . + * blockIndex - block index indicating the component type and position, as + * defined in [ISO14496-2], subclause 6.1.3.8. + * curQp - quantization parameter of the macroblock to which the current + * block belongs + * pQpBuf - pointer to a 2-element quantization parameter buffer; pQpBuf[0] + * contains the quantization parameter associated with the 8x8 + * block left of the current block (QPa), and pQpBuf[1] contains + * the quantization parameter associated with the 8x8 block above + * the current block (QPc). In the event that the corresponding + * block is outside of the VOP bound, the Qp value will not affect + * the intra prediction process, as described in [ISO14496-2], + * sub-clause 7.4.3.3, Adaptive AC Coefficient Prediction. + * srcStep - width of the source buffer; must be a multiple of 8. + * dstStep - width of the reconstructed destination buffer; must be a + * multiple of 16. + * shortVideoHeader - binary flag indicating presence of + * short_video_header; shortVideoHeader==1 selects linear intra DC + * mode, and shortVideoHeader==0 selects non linear intra DC mode. + * + * Output Arguments: + * + * pDst - pointer to the quantized DCT coefficient buffer; pDst[0] contains + * the predicted DC coefficient; the remaining entries contain the + * quantized AC coefficients (without prediction). The pointer + * pDstmust be aligned on a 16-byte boundary. + * pRec - pointer to the reconstructed texture; must be aligned on an + * 8-byte boundary. + * pPredBufRow - pointer to the updated coefficient row buffer + * pPredBufCol - pointer to the updated coefficient column buffer + * pPreACPredict - if prediction is enabled, the parameter points to the + * start of the buffer containing the coefficient differences for + * VLC encoding. The entry pPreACPredict[0]indicates prediction + * direction for the current block and takes one of the following + * values: OMX_VC_NONE (prediction disabled), OMX_VC_HORIZONTAL, or + * OMX_VC_VERTICAL. The entries + * pPreACPredict[1]-pPreACPredict[7]contain predicted AC + * coefficients. If prediction is disabled (*pSumErr<0) then the + * contents of this buffer are undefined upon return from the + * function + * pSumErr - pointer to the value of the accumulated AC coefficient errors, + * i.e., sum of the absolute differences between predicted and + * unpredicted AC coefficients + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - Bad arguments: + * - At least one of the following pointers is NULL: pSrc, pDst, pRec, + * pCoefBufRow, pCoefBufCol, pQpBuf, pPreACPredict, pSumErr. + * - blockIndex < 0 or blockIndex >= 10; + * - curQP <= 0 or curQP >= 32. + * - srcStep, or dstStep <= 0 or not a multiple of 8. + * - pDst is not 16-byte aligned: . + * - At least one of the following pointers is not 8-byte aligned: + * pSrc, pRec. + * + * Note: The coefficient buffers must be updated in accordance with the + * update procedures defined in section in 6.2.2. + * + */ + +OMXResult omxVCM4P2_TransRecBlockCoef_intra( + const OMX_U8 *pSrc, + OMX_S16 * pDst, + OMX_U8 * pRec, + OMX_S16 *pPredBufRow, + OMX_S16 *pPredBufCol, + OMX_S16 * pPreACPredict, + OMX_INT *pSumErr, + OMX_INT blockIndex, + OMX_U8 curQp, + const OMX_U8 *pQpBuf, + OMX_INT srcStep, + OMX_INT dstStep, + OMX_INT shortVideoHeader +) +{ + /* 64 elements are needed but to align it to 16 bytes need + 8 more elements of padding */ + OMX_S16 tempBuf1[79], tempBuf2[79]; + OMX_S16 tempBuf3[79]; + OMX_S16 *pTempBuf1, *pTempBuf2,*pTempBuf3; + OMXVCM4P2VideoComponent videoComp; + OMX_U8 flag; + OMX_INT x, y, count, predDir; + OMX_INT predQP, ACPredFlag; + + + /* Aligning the local buffers */ + pTempBuf1 = armAlignTo16Bytes(tempBuf1); + pTempBuf2 = armAlignTo16Bytes(tempBuf2); + pTempBuf3 = armAlignTo16Bytes(tempBuf3); + + /* Argument error checks */ + armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pRec == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(!armIs8ByteAligned(pSrc), OMX_Sts_BadArgErr); + armRetArgErrIf(!armIs8ByteAligned(pRec), OMX_Sts_BadArgErr); + armRetArgErrIf(!armIs16ByteAligned(pDst), OMX_Sts_BadArgErr); + armRetArgErrIf(pPredBufRow == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pPredBufCol == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pPreACPredict == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pSumErr == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pQpBuf == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf((srcStep <= 0) || (dstStep <= 0) || + (dstStep & 7) || (srcStep & 7) + , OMX_Sts_BadArgErr); + armRetArgErrIf((blockIndex < 0) || (blockIndex > 9), OMX_Sts_BadArgErr); + + armRetArgErrIf((curQp <= 0) || (curQp >=32), OMX_Sts_BadArgErr); + + + /* Setting the videoComp */ + if (blockIndex <= 3) + { + videoComp = OMX_VC_LUMINANCE; + } + else + { + videoComp = OMX_VC_CHROMINANCE; + } + /* Converting from 2-d to 1-d buffer */ + for (y = 0, count = 0; y < 8; y++) + { + for(x= 0; x < 8; x++, count++) + { + pTempBuf1[count] = pSrc[(y*srcStep) + x]; + } + } + + omxVCM4P2_DCT8x8blk (pTempBuf1, pTempBuf2); + omxVCM4P2_QuantIntra_I( + pTempBuf2, + curQp, + blockIndex, + shortVideoHeader); + + /* Converting from 1-D to 2-D buffer */ + for (y = 0, count = 0; y < 8; y++) + { + for(x = 0; x < 8; x++, count++) + { + /* storing tempbuf2 to tempbuf1 */ + pTempBuf1[count] = pTempBuf2[count]; + pDst[(y*dstStep) + x] = pTempBuf2[count]; + } + } + + /* AC and DC prediction */ + armVCM4P2_SetPredDir( + blockIndex, + pPredBufRow, + pPredBufCol, + &predDir, + &predQP, + pQpBuf); + + armRetDataErrIf(((predQP <= 0) || (predQP >= 32)), OMX_Sts_BadArgErr); + + flag = 1; + if (*pSumErr < 0) + { + ACPredFlag = 0; + } + else + { + ACPredFlag = 1; + } + + armVCM4P2_ACDCPredict( + pTempBuf2, + pPreACPredict, + pPredBufRow, + pPredBufCol, + curQp, + predQP, + predDir, + ACPredFlag, + videoComp, + flag, + pSumErr); + + /* Reconstructing the texture data */ + omxVCM4P2_QuantInvIntra_I( + pTempBuf1, + curQp, + videoComp, + shortVideoHeader); + omxVCM4P2_IDCT8x8blk (pTempBuf1, pTempBuf3); + for(count = 0; count < 64; count++) + { + pRec[count] = armMax(0,pTempBuf3[count]); + } + + return OMX_Sts_NoErr; +} + +/* End of file */ + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/src/armVC_Version.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/src/armVC_Version.c new file mode 100644 index 0000000000000000000000000000000000000000..5d9368140505b5ca7168766b15bdd1b7f518b722 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/src/armVC_Version.c @@ -0,0 +1,6 @@ +#include "omxtypes.h" +#include "armCOMM_Version.h" + +#ifdef ARM_INCLUDE_VERSION_DESCRIPTIONS +const char * const omxVC_VersionDescription = "ARM OpenMAX DL v" ARM_VERSION_STRING " Rel=" OMX_ARM_RELEASE_TAG " Arch=" OMX_ARM_BUILD_ARCHITECTURE " Tools=" OMX_ARM_BUILD_TOOLCHAIN ; +#endif /* ARM_INCLUDE_VERSION_DESCRIPTIONS */ diff --git a/media/libstagefright/codecs/on2/h264dec/source/DecTestBench.c b/media/libstagefright/codecs/on2/h264dec/source/DecTestBench.c new file mode 100755 index 0000000000000000000000000000000000000000..dcf2ef6fffab3fff75b92ff27468f5764f0729f4 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/DecTestBench.c @@ -0,0 +1,761 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "H264SwDecApi.h" +#include +#include +#include + +/*------------------------------------------------------------------------------ + Module defines +------------------------------------------------------------------------------*/ + +/* CHECK_MEMORY_USAGE prints and sums the memory allocated in calls to + * H264SwDecMalloc() */ +/* #define CHECK_MEMORY_USAGE */ + +/* _NO_OUT disables output file writing */ +/* #define _NO_OUT */ + +/* Debug prints */ +#define DEBUG(argv) printf argv + +/* CVS tag name for identification */ +const char tagName[256] = "$Name: FIRST_ANDROID_COPYRIGHT $"; + +void WriteOutput(char *filename, u8 *data, u32 picSize); +u32 NextPacket(u8 **pStrm); +u32 CropPicture(u8 *pOutImage, u8 *pInImage, + u32 picWidth, u32 picHeight, CropParams *pCropParams); + +/* Global variables for stream handling */ +u8 *streamStop = NULL; +u32 packetize = 0; +u32 nalUnitStream = 0; +FILE *foutput = NULL; + +#ifdef SOC_DESIGNER + +// Initialisation function defined in InitCache.s +extern void cache_init(void); + +/*------------------------------------------------------------------------------ + + Function name: $Sub$$main + + Purpose: + This function is called at the end of the C library initialisation and + before main. Its purpose is to do any further initialisation before the + application start. + +------------------------------------------------------------------------------*/ +int $Sub$$main(char argc, char * argv[]) +{ + cache_init(); // does some extra setup work setting up caches + return $Super$$main(argc, argv); // calls the original function +} +#endif + +/*------------------------------------------------------------------------------ + + Function name: main + + Purpose: + main function of decoder testbench. Provides command line interface + with file I/O for H.264 decoder. Prints out the usage information + when executed without arguments. + +------------------------------------------------------------------------------*/ + +int main(int argc, char **argv) +{ + + u32 i, tmp; + u32 maxNumPics = 0; + u8 *byteStrmStart; + u8 *imageData; + u8 *tmpImage = NULL; + u32 strmLen; + u32 picSize; + H264SwDecInst decInst; + H264SwDecRet ret; + H264SwDecInput decInput; + H264SwDecOutput decOutput; + H264SwDecPicture decPicture; + H264SwDecInfo decInfo; + H264SwDecApiVersion decVer; + u32 picDecodeNumber; + u32 picDisplayNumber; + u32 numErrors = 0; + u32 cropDisplay = 0; + u32 disableOutputReordering = 0; + + FILE *finput; + + char outFileName[256] = ""; + + /* Print API version number */ + decVer = H264SwDecGetAPIVersion(); + DEBUG(("H.264 Decoder API v%d.%d\n", decVer.major, decVer.minor)); + + /* Print tag name if '-T' argument present */ + if ( argc > 1 && strcmp(argv[1], "-T") == 0 ) + { + DEBUG(("%s\n", tagName)); + return 0; + } + + /* Check that enough command line arguments given, if not -> print usage + * information out */ + if (argc < 2) + { + DEBUG(( + "Usage: %s [-Nn] [-Ooutfile] [-P] [-U] [-C] [-R] [-T] file.h264\n", + argv[0])); + DEBUG(("\t-Nn forces decoding to stop after n pictures\n")); +#if defined(_NO_OUT) + DEBUG(("\t-Ooutfile output writing disabled at compile time\n")); +#else + DEBUG(("\t-Ooutfile write output to \"outfile\" (default out_wxxxhyyy.yuv)\n")); + DEBUG(("\t-Onone does not write output\n")); +#endif + DEBUG(("\t-P packet-by-packet mode\n")); + DEBUG(("\t-U NAL unit stream mode\n")); + DEBUG(("\t-C display cropped image (default decoded image)\n")); + DEBUG(("\t-R disable DPB output reordering\n")); + DEBUG(("\t-T to print tag name and exit\n")); + return 0; + } + + /* read command line arguments */ + for (i = 1; i < (u32)(argc-1); i++) + { + if ( strncmp(argv[i], "-N", 2) == 0 ) + { + maxNumPics = (u32)atoi(argv[i]+2); + } + else if ( strncmp(argv[i], "-O", 2) == 0 ) + { + strcpy(outFileName, argv[i]+2); + } + else if ( strcmp(argv[i], "-P") == 0 ) + { + packetize = 1; + } + else if ( strcmp(argv[i], "-U") == 0 ) + { + nalUnitStream = 1; + } + else if ( strcmp(argv[i], "-C") == 0 ) + { + cropDisplay = 1; + } + else if ( strcmp(argv[i], "-R") == 0 ) + { + disableOutputReordering = 1; + } + } + + /* open input file for reading, file name given by user. If file open + * fails -> exit */ + finput = fopen(argv[argc-1],"rb"); + if (finput == NULL) + { + DEBUG(("UNABLE TO OPEN INPUT FILE\n")); + return -1; + } + + /* check size of the input file -> length of the stream in bytes */ + fseek(finput,0L,SEEK_END); + strmLen = (u32)ftell(finput); + rewind(finput); + + /* allocate memory for stream buffer. if unsuccessful -> exit */ + byteStrmStart = (u8 *)malloc(sizeof(u8)*strmLen); + if (byteStrmStart == NULL) + { + DEBUG(("UNABLE TO ALLOCATE MEMORY\n")); + return -1; + } + + /* read input stream from file to buffer and close input file */ + fread(byteStrmStart, sizeof(u8), strmLen, finput); + fclose(finput); + + /* initialize decoder. If unsuccessful -> exit */ + ret = H264SwDecInit(&decInst, disableOutputReordering); + if (ret != H264SWDEC_OK) + { + DEBUG(("DECODER INITIALIZATION FAILED\n")); + free(byteStrmStart); + return -1; + } + + /* initialize H264SwDecDecode() input structure */ + streamStop = byteStrmStart + strmLen; + decInput.pStream = byteStrmStart; + decInput.dataLen = strmLen; + decInput.intraConcealmentMethod = 0; + + /* get pointer to next packet and the size of packet + * (for packetize or nalUnitStream modes) */ + if ( (tmp = NextPacket(&decInput.pStream)) != 0 ) + decInput.dataLen = tmp; + + picDecodeNumber = picDisplayNumber = 1; + /* main decoding loop */ + do + { + /* Picture ID is the picture number in decoding order */ + decInput.picId = picDecodeNumber; + + /* call API function to perform decoding */ + ret = H264SwDecDecode(decInst, &decInput, &decOutput); + + switch(ret) + { + + case H264SWDEC_HDRS_RDY_BUFF_NOT_EMPTY: + /* Stream headers were successfully decoded + * -> stream information is available for query now */ + + ret = H264SwDecGetInfo(decInst, &decInfo); + if (ret != H264SWDEC_OK) + return -1; + + DEBUG(("Profile %d\n", decInfo.profile)); + + DEBUG(("Width %d Height %d\n", + decInfo.picWidth, decInfo.picHeight)); + + if (cropDisplay && decInfo.croppingFlag) + { + DEBUG(("Cropping params: (%d, %d) %dx%d\n", + decInfo.cropParams.cropLeftOffset, + decInfo.cropParams.cropTopOffset, + decInfo.cropParams.cropOutWidth, + decInfo.cropParams.cropOutHeight)); + + /* Cropped frame size in planar YUV 4:2:0 */ + picSize = decInfo.cropParams.cropOutWidth * + decInfo.cropParams.cropOutHeight; + picSize = (3 * picSize)/2; + tmpImage = malloc(picSize); + if (tmpImage == NULL) + return -1; + } + else + { + /* Decoder output frame size in planar YUV 4:2:0 */ + picSize = decInfo.picWidth * decInfo.picHeight; + picSize = (3 * picSize)/2; + } + + DEBUG(("videoRange %d, matrixCoefficients %d\n", + decInfo.videoRange, decInfo.matrixCoefficients)); + + /* update H264SwDecDecode() input structure, number of bytes + * "consumed" is computed as difference between the new stream + * pointer and old stream pointer */ + decInput.dataLen -= + (u32)(decOutput.pStrmCurrPos - decInput.pStream); + decInput.pStream = decOutput.pStrmCurrPos; + + /* If -O option not used, generate default file name */ + if (outFileName[0] == 0) + sprintf(outFileName, "out_w%dh%d.yuv", + decInfo.picWidth, decInfo.picHeight); + break; + + case H264SWDEC_PIC_RDY_BUFF_NOT_EMPTY: + /* Picture is ready and more data remains in input buffer + * -> update H264SwDecDecode() input structure, number of bytes + * "consumed" is computed as difference between the new stream + * pointer and old stream pointer */ + decInput.dataLen -= + (u32)(decOutput.pStrmCurrPos - decInput.pStream); + decInput.pStream = decOutput.pStrmCurrPos; + /* fall through */ + + case H264SWDEC_PIC_RDY: + + /*lint -esym(644,tmpImage,picSize) variable initialized at + * H264SWDEC_HDRS_RDY_BUFF_NOT_EMPTY case */ + + if (ret == H264SWDEC_PIC_RDY) + decInput.dataLen = NextPacket(&decInput.pStream); + + /* If enough pictures decoded -> force decoding to end + * by setting that no more stream is available */ + if (maxNumPics && picDecodeNumber == maxNumPics) + decInput.dataLen = 0; + + /* Increment decoding number for every decoded picture */ + picDecodeNumber++; + + /* use function H264SwDecNextPicture() to obtain next picture + * in display order. Function is called until no more images + * are ready for display */ + while ( H264SwDecNextPicture(decInst, &decPicture, 0) == + H264SWDEC_PIC_RDY ) + { + DEBUG(("PIC %d, type %s", picDisplayNumber, + decPicture.isIdrPicture ? "IDR" : "NON-IDR")); + if (picDisplayNumber != decPicture.picId) + DEBUG((", decoded pic %d", decPicture.picId)); + if (decPicture.nbrOfErrMBs) + { + DEBUG((", concealed %d\n", decPicture.nbrOfErrMBs)); + } + else + DEBUG(("\n")); + fflush(stdout); + + numErrors += decPicture.nbrOfErrMBs; + + /* Increment display number for every displayed picture */ + picDisplayNumber++; + + /*lint -esym(644,decInfo) always initialized if pictures + * available for display */ + + /* Write output picture to file */ + imageData = (u8*)decPicture.pOutputPicture; + if (cropDisplay && decInfo.croppingFlag) + { + tmp = CropPicture(tmpImage, imageData, + decInfo.picWidth, decInfo.picHeight, + &decInfo.cropParams); + if (tmp) + return -1; + WriteOutput(outFileName, tmpImage, picSize); + } + else + { + WriteOutput(outFileName, imageData, picSize); + } + } + + break; + + case H264SWDEC_STRM_PROCESSED: + case H264SWDEC_STRM_ERR: + /* Input stream was decoded but no picture is ready + * -> Get more data */ + decInput.dataLen = NextPacket(&decInput.pStream); + break; + + default: + DEBUG(("FATAL ERROR\n")); + return -1; + + } + /* keep decoding until all data from input stream buffer consumed */ + } while (decInput.dataLen > 0); + + /* if output in display order is preferred, the decoder shall be forced + * to output pictures remaining in decoded picture buffer. Use function + * H264SwDecNextPicture() to obtain next picture in display order. Function + * is called until no more images are ready for display. Second parameter + * for the function is set to '1' to indicate that this is end of the + * stream and all pictures shall be output */ + while (H264SwDecNextPicture(decInst, &decPicture, 1) == H264SWDEC_PIC_RDY) + { + DEBUG(("PIC %d, type %s", picDisplayNumber, + decPicture.isIdrPicture ? "IDR" : "NON-IDR")); + if (picDisplayNumber != decPicture.picId) + DEBUG((", decoded pic %d", decPicture.picId)); + if (decPicture.nbrOfErrMBs) + { + DEBUG((", concealed %d\n", decPicture.nbrOfErrMBs)); + } + else + DEBUG(("\n")); + fflush(stdout); + + numErrors += decPicture.nbrOfErrMBs; + + /* Increment display number for every displayed picture */ + picDisplayNumber++; + + /* Write output picture to file */ + imageData = (u8*)decPicture.pOutputPicture; + if (cropDisplay && decInfo.croppingFlag) + { + tmp = CropPicture(tmpImage, imageData, + decInfo.picWidth, decInfo.picHeight, + &decInfo.cropParams); + if (tmp) + return -1; + WriteOutput(outFileName, tmpImage, picSize); + } + else + { + WriteOutput(outFileName, imageData, picSize); + } + } + + /* release decoder instance */ + H264SwDecRelease(decInst); + + if (foutput) + fclose(foutput); + + /* free allocated buffers */ + free(byteStrmStart); + free(tmpImage); + + DEBUG(("Output file: %s\n", outFileName)); + + DEBUG(("DECODING DONE\n")); + if (numErrors || picDecodeNumber == 1) + { + DEBUG(("ERRORS FOUND\n")); + return 1; + } + + return 0; +} + +/*------------------------------------------------------------------------------ + + Function name: WriteOutput + + Purpose: + Write picture pointed by data to file. Size of the + picture in pixels is indicated by picSize. + +------------------------------------------------------------------------------*/ +void WriteOutput(char *filename, u8 *data, u32 picSize) +{ + + /* foutput is global file pointer */ + if (foutput == NULL) + { + /* open output file for writing, can be disabled with define. + * If file open fails -> exit */ + if (strcmp(filename, "none") != 0) + { +#if !defined(_NO_OUT) + foutput = fopen(filename, "wb"); + if (foutput == NULL) + { + DEBUG(("UNABLE TO OPEN OUTPUT FILE\n")); + exit(100); + } +#endif + } + } + + if (foutput && data) + fwrite(data, 1, picSize, foutput); +} + +/*------------------------------------------------------------------------------ + + Function name: NextPacket + + Purpose: + Get the pointer to start of next packet in input stream. Uses + global variables 'packetize' and 'nalUnitStream' to determine the + decoder input stream mode and 'streamStop' to determine the end + of stream. There are three possible stream modes: + default - the whole stream at once + packetize - a single NAL-unit with start code prefix + nalUnitStream - a single NAL-unit without start code prefix + + pStrm stores pointer to the start of previous decoder input and is + replaced with pointer to the start of the next decoder input. + + Returns the packet size in bytes + +------------------------------------------------------------------------------*/ +u32 NextPacket(u8 **pStrm) +{ + + u32 index; + u32 maxIndex; + u32 zeroCount; + u8 *stream; + u8 byte; + static u32 prevIndex=0; + + /* For default stream mode all the stream is in first packet */ + if (!packetize && !nalUnitStream) + return 0; + + index = 0; + stream = *pStrm + prevIndex; + maxIndex = (u32)(streamStop - stream); + + if (maxIndex == 0) + return(0); + + /* leading zeros of first NAL unit */ + do + { + byte = stream[index++]; + } while (byte != 1 && index < maxIndex); + + /* invalid start code prefix */ + if (index == maxIndex || index < 3) + { + DEBUG(("INVALID BYTE STREAM\n")); + exit(100); + } + + /* nalUnitStream is without start code prefix */ + if (nalUnitStream) + { + stream += index; + maxIndex -= index; + index = 0; + } + + zeroCount = 0; + + /* Search stream for next start code prefix */ + /*lint -e(716) while(1) used consciously */ + while (1) + { + byte = stream[index++]; + if (!byte) + zeroCount++; + + if ( (byte == 0x01) && (zeroCount >= 2) ) + { + /* Start code prefix has two zeros + * Third zero is assumed to be leading zero of next packet + * Fourth and more zeros are assumed to be trailing zeros of this + * packet */ + if (zeroCount > 3) + { + index -= 4; + zeroCount -= 3; + } + else + { + index -= zeroCount+1; + zeroCount = 0; + } + break; + } + else if (byte) + zeroCount = 0; + + if (index == maxIndex) + { + break; + } + + } + + /* Store pointer to the beginning of the packet */ + *pStrm = stream; + prevIndex = index; + + /* nalUnitStream is without trailing zeros */ + if (nalUnitStream) + index -= zeroCount; + + return(index); + +} + +/*------------------------------------------------------------------------------ + + Function name: CropPicture + + Purpose: + Perform cropping for picture. Input picture pInImage with dimensions + picWidth x picHeight is cropped with pCropParams and the resulting + picture is stored in pOutImage. + +------------------------------------------------------------------------------*/ +u32 CropPicture(u8 *pOutImage, u8 *pInImage, + u32 picWidth, u32 picHeight, CropParams *pCropParams) +{ + + u32 i, j; + u32 outWidth, outHeight; + u8 *pOut, *pIn; + + if (pOutImage == NULL || pInImage == NULL || pCropParams == NULL || + !picWidth || !picHeight) + { + /* just to prevent lint warning, returning non-zero will result in + * return without freeing the memory */ + free(pOutImage); + return(1); + } + + if ( ((pCropParams->cropLeftOffset + pCropParams->cropOutWidth) > + picWidth ) || + ((pCropParams->cropTopOffset + pCropParams->cropOutHeight) > + picHeight ) ) + { + /* just to prevent lint warning, returning non-zero will result in + * return without freeing the memory */ + free(pOutImage); + return(1); + } + + outWidth = pCropParams->cropOutWidth; + outHeight = pCropParams->cropOutHeight; + + /* Calculate starting pointer for luma */ + pIn = pInImage + pCropParams->cropTopOffset*picWidth + + pCropParams->cropLeftOffset; + pOut = pOutImage; + + /* Copy luma pixel values */ + for (i = outHeight; i; i--) + { + for (j = outWidth; j; j--) + { + *pOut++ = *pIn++; + } + pIn += picWidth - outWidth; + } + + outWidth >>= 1; + outHeight >>= 1; + + /* Calculate starting pointer for cb */ + pIn = pInImage + picWidth*picHeight + + pCropParams->cropTopOffset*picWidth/4 + pCropParams->cropLeftOffset/2; + + /* Copy cb pixel values */ + for (i = outHeight; i; i--) + { + for (j = outWidth; j; j--) + { + *pOut++ = *pIn++; + } + pIn += picWidth/2 - outWidth; + } + + /* Calculate starting pointer for cr */ + pIn = pInImage + 5*picWidth*picHeight/4 + + pCropParams->cropTopOffset*picWidth/4 + pCropParams->cropLeftOffset/2; + + /* Copy cr pixel values */ + for (i = outHeight; i; i--) + { + for (j = outWidth; j; j--) + { + *pOut++ = *pIn++; + } + pIn += picWidth/2 - outWidth; + } + + return (0); +} + +/*------------------------------------------------------------------------------ + + Function name: H264SwDecTrace + + Purpose: + Example implementation of H264SwDecTrace function. Prototype of this + function is given in H264SwDecApi.h. This implementation appends + trace messages to file named 'dec_api.trc'. + +------------------------------------------------------------------------------*/ +void H264SwDecTrace(char *string) +{ + FILE *fp; + + fp = fopen("dec_api.trc", "at"); + + if (!fp) + return; + + fwrite(string, 1, strlen(string), fp); + fwrite("\n", 1,1, fp); + + fclose(fp); +} + +/*------------------------------------------------------------------------------ + + Function name: H264SwDecMalloc + + Purpose: + Example implementation of H264SwDecMalloc function. Prototype of this + function is given in H264SwDecApi.h. This implementation uses + library function malloc for allocation of memory. + +------------------------------------------------------------------------------*/ +void* H264SwDecMalloc(u32 size) +{ + +#if defined(CHECK_MEMORY_USAGE) + /* Note that if the decoder has to free and reallocate some of the buffers + * the total value will be invalid */ + static u32 numBytes = 0; + numBytes += size; + DEBUG(("Allocated %d bytes, total %d\n", size, numBytes)); +#endif + + return malloc(size); +} + +/*------------------------------------------------------------------------------ + + Function name: H264SwDecFree + + Purpose: + Example implementation of H264SwDecFree function. Prototype of this + function is given in H264SwDecApi.h. This implementation uses + library function free for freeing of memory. + +------------------------------------------------------------------------------*/ +void H264SwDecFree(void *ptr) +{ + free(ptr); +} + +/*------------------------------------------------------------------------------ + + Function name: H264SwDecMemcpy + + Purpose: + Example implementation of H264SwDecMemcpy function. Prototype of this + function is given in H264SwDecApi.h. This implementation uses + library function memcpy to copy src to dest. + +------------------------------------------------------------------------------*/ +void H264SwDecMemcpy(void *dest, void *src, u32 count) +{ + memcpy(dest, src, count); +} + +/*------------------------------------------------------------------------------ + + Function name: H264SwDecMemset + + Purpose: + Example implementation of H264SwDecMemset function. Prototype of this + function is given in H264SwDecApi.h. This implementation uses + library function memset to set content of memory area pointed by ptr. + +------------------------------------------------------------------------------*/ +void H264SwDecMemset(void *ptr, i32 value, u32 count) +{ + memset(ptr, value, count); +} + diff --git a/media/libstagefright/codecs/on2/h264dec/source/EvaluationTestBench.c b/media/libstagefright/codecs/on2/h264dec/source/EvaluationTestBench.c new file mode 100755 index 0000000000000000000000000000000000000000..aadc75f5519f5969386d76f62b67e1c56474702b --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/EvaluationTestBench.c @@ -0,0 +1,350 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "H264SwDecApi.h" +#include +#include +#include + +void WriteOutput(FILE *fid, u8 *data, u32 picSize); + +/*------------------------------------------------------------------------------ + + Function name: main + + Purpose: + main function. Assuming that executable is named 'decoder' the usage + is as follows + + decoder inputFileName + + , where inputFileName shall be name of file containing h264 stream + data. + +------------------------------------------------------------------------------*/ +int main(int argc, char **argv) +{ + + u8 *byteStrmStart; + u8 *byteStrm; + u32 strmLen; + u32 picSize; + H264SwDecInst decInst; + H264SwDecRet ret; + H264SwDecInput decInput; + H264SwDecOutput decOutput; + H264SwDecPicture decPicture; + H264SwDecInfo decInfo; + u32 picNumber; + + FILE *finput; + FILE *foutput; + + /* Check that enough command line arguments given, if not -> print usage + * information out */ + if (argc < 2) + { + printf( "Usage: %s file.h264\n", argv[0]); + return -1; + } + + /* open output file for writing, output file named out.yuv. If file open + * fails -> exit */ + foutput = fopen("out.yuv", "wb"); + if (foutput == NULL) + { + printf("UNABLE TO OPEN OUTPUT FILE\n"); + return -1; + } + + /* open input file for reading, file name given by user. If file open + * fails -> exit */ + finput = fopen(argv[argc-1], "rb"); + if (finput == NULL) + { + printf("UNABLE TO OPEN INPUT FILE\n"); + return -1; + } + + /* check size of the input file -> length of the stream in bytes */ + fseek(finput, 0L, SEEK_END); + strmLen = (u32)ftell(finput); + rewind(finput); + + /* allocate memory for stream buffer, exit if unsuccessful */ + byteStrm = byteStrmStart = (u8 *)H264SwDecMalloc(sizeof(u8)*strmLen); + if (byteStrm == NULL) + { + printf("UNABLE TO ALLOCATE MEMORY\n"); + return -1; + } + + /* read input stream from file to buffer and close input file */ + fread(byteStrm, sizeof(u8), strmLen, finput); + fclose(finput); + + /* initialize decoder. If unsuccessful -> exit */ + ret = H264SwDecInit(&decInst, 0); + if (ret != H264SWDEC_OK) + { + printf("DECODER INITIALIZATION FAILED\n"); + return -1; + } + + /* initialize H264SwDecDecode() input structure */ + decInput.pStream = byteStrmStart; + decInput.dataLen = strmLen; + decInput.intraConcealmentMethod = 0; + + picNumber = 0; + + /* For performance measurements, read the start time (in seconds) here. + * The decoding time should be measured over several frames and after + * that average fps (frames/second) can be calculated. + * + * startTime = GetTime(); + * + * To prevent calculating file I/O latensies as a decoding time, + * comment out WriteOutput function call. Also prints to stdout might + * consume considerable amount of cycles during measurement */ + + /* main decoding loop */ + do + { + /* call API function to perform decoding */ + ret = H264SwDecDecode(decInst, &decInput, &decOutput); + + switch(ret) + { + + case H264SWDEC_HDRS_RDY_BUFF_NOT_EMPTY: + + /* picture dimensions are available for query now */ + ret = H264SwDecGetInfo(decInst, &decInfo); + if (ret != H264SWDEC_OK) + return -1; + + /* picture size in pixels */ + picSize = decInfo.picWidth * decInfo.picHeight; + /* memory needed for YCbCr 4:2:0 picture in bytes */ + picSize = (3 * picSize)/2; + /* memory needed for 16-bit RGB picture in bytes + * picSize = (decInfo.picWidth * decInfo.picHeight) * 2; */ + + printf("Width %d Height %d\n", + decInfo.picWidth, decInfo.picHeight); + + /* update H264SwDecDecode() input structure, number of bytes + * "consumed" is computed as difference between the new stream + * pointer and old stream pointer */ + decInput.dataLen -= + (u32)(decOutput.pStrmCurrPos - decInput.pStream); + decInput.pStream = decOutput.pStrmCurrPos; + break; + + case H264SWDEC_PIC_RDY_BUFF_NOT_EMPTY: + case H264SWDEC_PIC_RDY: + + /* update H264SwDecDecode() input structure, number of bytes + * "consumed" is computed as difference between the new stream + * pointer and old stream pointer */ + decInput.dataLen -= + (u32)(decOutput.pStrmCurrPos - decInput.pStream); + decInput.pStream = decOutput.pStrmCurrPos; + + /* use function H264SwDecNextPicture() to obtain next picture + * in display order. Function is called until no more images + * are ready for display */ + while (H264SwDecNextPicture(decInst, &decPicture, 0) == + H264SWDEC_PIC_RDY) { picNumber++; + + printf("PIC %d, type %s, concealed %d\n", picNumber, + decPicture.isIdrPicture ? "IDR" : "NON-IDR", + decPicture.nbrOfErrMBs); + fflush(stdout); + + /* Do color conversion if needed to get display image + * in RGB-format + * + * YuvToRgb( decPicture.pOutputPicture, pRgbPicture ); */ + + /* write next display image to output file */ + WriteOutput(foutput, (u8*)decPicture.pOutputPicture, + picSize); + } + + break; + + case H264SWDEC_EVALUATION_LIMIT_EXCEEDED: + /* evaluation version of the decoder has limited decoding + * capabilities */ + printf("EVALUATION LIMIT REACHED\n"); + goto end; + + default: + printf("UNRECOVERABLE ERROR\n"); + return -1; + } + /* keep decoding until all data from input stream buffer consumed */ + } while (decInput.dataLen > 0); + +end: + + /* if output in display order is preferred, the decoder shall be forced + * to output pictures remaining in decoded picture buffer. Use function + * H264SwDecNextPicture() to obtain next picture in display order. Function + * is called until no more images are ready for display. Second parameter + * for the function is set to '1' to indicate that this is end of the + * stream and all pictures shall be output */ + while (H264SwDecNextPicture(decInst, &decPicture, 1) == + H264SWDEC_PIC_RDY) { + + picNumber++; + + printf("PIC %d, type %s, concealed %d\n", picNumber, + decPicture.isIdrPicture ? "IDR" : "NON-IDR", + decPicture.nbrOfErrMBs); + fflush(stdout); + + /* Do color conversion if needed to get display image + * in RGB-format + * + * YuvToRgb( decPicture.pOutputPicture, pRgbPicture ); */ + + /* write next display image to output file */ + WriteOutput(foutput, (u8*)decPicture.pOutputPicture, picSize); + } + + /* For performance measurements, read the end time (in seconds) here. + * + * endTime = GetTime(); + * + * Now the performance can be calculated as frames per second: + * fps = picNumber / (endTime - startTime); */ + + + /* release decoder instance */ + H264SwDecRelease(decInst); + + /* close output file */ + fclose(foutput); + + /* free byte stream buffer */ + free(byteStrmStart); + + return 0; + +} + +/*------------------------------------------------------------------------------ + + Function name: WriteOutput + + Purpose: + Write picture pointed by data to file pointed by fid. Size of the + picture in pixels is indicated by picSize. + +------------------------------------------------------------------------------*/ +void WriteOutput(FILE *fid, u8 *data, u32 picSize) +{ + fwrite(data, 1, picSize, fid); +} + +/*------------------------------------------------------------------------------ + + Function name: H264SwDecTrace + + Purpose: + Example implementation of H264SwDecTrace function. Prototype of this + function is given in H264SwDecApi.h. This implementation appends + trace messages to file named 'dec_api.trc'. + +------------------------------------------------------------------------------*/ +void H264SwDecTrace(char *string) +{ + FILE *fp; + + fp = fopen("dec_api.trc", "at"); + + if (!fp) + return; + + fwrite(string, 1, strlen(string), fp); + fwrite("\n", 1,1, fp); + + fclose(fp); +} + +/*------------------------------------------------------------------------------ + + Function name: H264SwDecmalloc + + Purpose: + Example implementation of H264SwDecMalloc function. Prototype of this + function is given in H264SwDecApi.h. This implementation uses + library function malloc for allocation of memory. + +------------------------------------------------------------------------------*/ +void* H264SwDecMalloc(u32 size) +{ + return malloc(size); +} + +/*------------------------------------------------------------------------------ + + Function name: H264SwDecFree + + Purpose: + Example implementation of H264SwDecFree function. Prototype of this + function is given in H264SwDecApi.h. This implementation uses + library function free for freeing of memory. + +------------------------------------------------------------------------------*/ +void H264SwDecFree(void *ptr) +{ + free(ptr); +} + +/*------------------------------------------------------------------------------ + + Function name: H264SwDecMemcpy + + Purpose: + Example implementation of H264SwDecMemcpy function. Prototype of this + function is given in H264SwDecApi.h. This implementation uses + library function memcpy to copy src to dest. + +------------------------------------------------------------------------------*/ +void H264SwDecMemcpy(void *dest, void *src, u32 count) +{ + memcpy(dest, src, count); +} + +/*------------------------------------------------------------------------------ + + Function name: H264SwDecMemset + + Purpose: + Example implementation of H264SwDecMemset function. Prototype of this + function is given in H264SwDecApi.h. This implementation uses + library function memset to set content of memory area pointed by ptr. + +------------------------------------------------------------------------------*/ +void H264SwDecMemset(void *ptr, i32 value, u32 count) +{ + memset(ptr, value, count); +} + diff --git a/media/libstagefright/codecs/on2/h264dec/source/H264SwDecApi.c b/media/libstagefright/codecs/on2/h264dec/source/H264SwDecApi.c new file mode 100644 index 0000000000000000000000000000000000000000..2bb4c4de6a3673c0994a930d50fa1916413dc057 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/H264SwDecApi.c @@ -0,0 +1,567 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. External compiler flags + 3. Module defines + 4. Local function prototypes + 5. Functions + H264SwDecInit + H264SwDecGetInfo + H264SwDecRelease + H264SwDecDecode + H264SwDecGetAPIVersion + H264SwDecNextPicture + +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ +#include +#include "basetype.h" +#include "h264bsd_container.h" +#include "H264SwDecApi.h" +#include "h264bsd_decoder.h" +#include "h264bsd_util.h" + +/*------------------------------------------------------------------------------ + Version Information +------------------------------------------------------------------------------*/ + +#define H264SWDEC_MAJOR_VERSION 2 +#define H264SWDEC_MINOR_VERSION 3 + +/*------------------------------------------------------------------------------ + 2. External compiler flags +-------------------------------------------------------------------------------- + +H264DEC_TRACE Trace H264 Decoder API function calls. +H264DEC_EVALUATION Compile evaluation version, restricts number of frames + that can be decoded + +-------------------------------------------------------------------------------- + 3. Module defines +------------------------------------------------------------------------------*/ + +#ifdef H264DEC_TRACE +#include +#define DEC_API_TRC(str) H264SwDecTrace(str) +#else +#define DEC_API_TRC(str) +#endif + +#ifdef H264DEC_EVALUATION +#define H264DEC_EVALUATION_LIMIT 500 +#endif + +void H264SwDecTrace(char *string) { +} + +void* H264SwDecMalloc(u32 size) { + return malloc(size); +} + +void H264SwDecFree(void *ptr) { + free(ptr); +} + +void H264SwDecMemcpy(void *dest, void *src, u32 count) { + memcpy(dest, src, count); +} + +void H264SwDecMemset(void *ptr, i32 value, u32 count) { + memset(ptr, value, count); +} + + +/*------------------------------------------------------------------------------ + + Function: H264SwDecInit() + + Functional description: + Initialize decoder software. Function reserves memory for the + decoder instance and calls h264bsdInit to initialize the + instance data. + + Inputs: + noOutputReordering flag to indicate decoder that it doesn't have + to try to provide output pictures in display + order, saves memory + + Outputs: + decInst pointer to initialized instance is stored here + + Returns: + H264SWDEC_OK successfully initialized the instance + H264SWDEC_INITFAIL initialization failed + H264SWDEC_PARAM_ERR invalid parameters + H264SWDEC_MEM_FAIL memory allocation failed + +------------------------------------------------------------------------------*/ + +H264SwDecRet H264SwDecInit(H264SwDecInst *decInst, u32 noOutputReordering) +{ + u32 rv = 0; + + decContainer_t *pDecCont; + + DEC_API_TRC("H264SwDecInit#"); + + /* check that right shift on negative numbers is performed signed */ + /*lint -save -e* following check causes multiple lint messages */ + if ( ((-1)>>1) != (-1) ) + { + DEC_API_TRC("H264SwDecInit# ERROR: Right shift is not signed"); + return(H264SWDEC_INITFAIL); + } + /*lint -restore */ + + if (decInst == NULL) + { + DEC_API_TRC("H264SwDecInit# ERROR: decInst == NULL"); + return(H264SWDEC_PARAM_ERR); + } + + pDecCont = (decContainer_t *)H264SwDecMalloc(sizeof(decContainer_t)); + + if (pDecCont == NULL) + { + DEC_API_TRC("H264SwDecInit# ERROR: Memory allocation failed"); + return(H264SWDEC_MEMFAIL); + } + +#ifdef H264DEC_TRACE + sprintf(pDecCont->str, "H264SwDecInit# decInst %p noOutputReordering %d", + (void*)decInst, noOutputReordering); + DEC_API_TRC(pDecCont->str); +#endif + + rv = h264bsdInit(&pDecCont->storage, noOutputReordering); + if (rv != HANTRO_OK) + { + H264SwDecRelease(pDecCont); + return(H264SWDEC_MEMFAIL); + } + + pDecCont->decStat = INITIALIZED; + pDecCont->picNumber = 0; + +#ifdef H264DEC_TRACE + sprintf(pDecCont->str, "H264SwDecInit# OK: return %p", (void*)pDecCont); + DEC_API_TRC(pDecCont->str); +#endif + + *decInst = (decContainer_t *)pDecCont; + + return(H264SWDEC_OK); + +} + +/*------------------------------------------------------------------------------ + + Function: H264SwDecGetInfo() + + Functional description: + This function provides read access to decoder information. This + function should not be called before H264SwDecDecode function has + indicated that headers are ready. + + Inputs: + decInst decoder instance + + Outputs: + pDecInfo pointer to info struct where data is written + + Returns: + H264SWDEC_OK success + H264SWDEC_PARAM_ERR invalid parameters + H264SWDEC_HDRS_NOT_RDY information not available yet + +------------------------------------------------------------------------------*/ + +H264SwDecRet H264SwDecGetInfo(H264SwDecInst decInst, H264SwDecInfo *pDecInfo) +{ + + storage_t *pStorage; + + DEC_API_TRC("H264SwDecGetInfo#"); + + if (decInst == NULL || pDecInfo == NULL) + { + DEC_API_TRC("H264SwDecGetInfo# ERROR: decInst or pDecInfo is NULL"); + return(H264SWDEC_PARAM_ERR); + } + + pStorage = &(((decContainer_t *)decInst)->storage); + + if (pStorage->activeSps == NULL || pStorage->activePps == NULL) + { + DEC_API_TRC("H264SwDecGetInfo# ERROR: Headers not decoded yet"); + return(H264SWDEC_HDRS_NOT_RDY); + } + +#ifdef H264DEC_TRACE + sprintf(((decContainer_t*)decInst)->str, + "H264SwDecGetInfo# decInst %p pDecInfo %p", decInst, (void*)pDecInfo); + DEC_API_TRC(((decContainer_t*)decInst)->str); +#endif + + /* h264bsdPicWidth and -Height return dimensions in macroblock units, + * picWidth and -Height in pixels */ + pDecInfo->picWidth = h264bsdPicWidth(pStorage) << 4; + pDecInfo->picHeight = h264bsdPicHeight(pStorage) << 4; + pDecInfo->videoRange = h264bsdVideoRange(pStorage); + pDecInfo->matrixCoefficients = h264bsdMatrixCoefficients(pStorage); + + h264bsdCroppingParams(pStorage, + &pDecInfo->croppingFlag, + &pDecInfo->cropParams.cropLeftOffset, + &pDecInfo->cropParams.cropOutWidth, + &pDecInfo->cropParams.cropTopOffset, + &pDecInfo->cropParams.cropOutHeight); + + /* sample aspect ratio */ + h264bsdSampleAspectRatio(pStorage, + &pDecInfo->parWidth, + &pDecInfo->parHeight); + + /* profile */ + pDecInfo->profile = h264bsdProfile(pStorage); + + DEC_API_TRC("H264SwDecGetInfo# OK"); + + return(H264SWDEC_OK); + +} + +/*------------------------------------------------------------------------------ + + Function: H264SwDecRelease() + + Functional description: + Release the decoder instance. Function calls h264bsdShutDown to + release instance data and frees the memory allocated for the + instance. + + Inputs: + decInst Decoder instance + + Outputs: + none + + Returns: + none + +------------------------------------------------------------------------------*/ + +void H264SwDecRelease(H264SwDecInst decInst) +{ + + decContainer_t *pDecCont; + + DEC_API_TRC("H264SwDecRelease#"); + + if (decInst == NULL) + { + DEC_API_TRC("H264SwDecRelease# ERROR: decInst == NULL"); + return; + } + + pDecCont = (decContainer_t*)decInst; + +#ifdef H264DEC_TRACE + sprintf(pDecCont->str, "H264SwDecRelease# decInst %p",decInst); + DEC_API_TRC(pDecCont->str); +#endif + + h264bsdShutdown(&pDecCont->storage); + + H264SwDecFree(pDecCont); + +} + +/*------------------------------------------------------------------------------ + + Function: H264SwDecDecode + + Functional description: + Decode stream data. Calls h264bsdDecode to do the actual decoding. + + Input: + decInst decoder instance + pInput pointer to input struct + + Outputs: + pOutput pointer to output struct + + Returns: + H264SWDEC_NOT_INITIALIZED decoder instance not initialized yet + H264SWDEC_PARAM_ERR invalid parameters + + H264SWDEC_STRM_PROCESSED stream buffer decoded + H264SWDEC_HDRS_RDY_BUFF_NOT_EMPTY headers decoded, + stream buffer not finished + H264SWDEC_PIC_RDY decoding of a picture finished + H264SWDEC_PIC_RDY_BUFF_NOT_EMPTY decoding of a picture finished, + stream buffer not finished + H264SWDEC_STRM_ERR serious error in decoding, no + valid parameter sets available + to decode picture data + H264SWDEC_EVALUATION_LIMIT_EXCEEDED this can only occur when + evaluation version is used, + max number of frames reached + +------------------------------------------------------------------------------*/ + +H264SwDecRet H264SwDecDecode(H264SwDecInst decInst, H264SwDecInput *pInput, + H264SwDecOutput *pOutput) +{ + + decContainer_t *pDecCont; + u32 strmLen; + u32 numReadBytes; + u8 *tmpStream; + u32 decResult = 0; + H264SwDecRet returnValue = H264SWDEC_STRM_PROCESSED; + + DEC_API_TRC("H264SwDecDecode#"); + + /* Check that function input parameters are valid */ + if (pInput == NULL || pOutput == NULL) + { + DEC_API_TRC("H264SwDecDecode# ERROR: pInput or pOutput is NULL"); + return(H264SWDEC_PARAM_ERR); + } + + if ((pInput->pStream == NULL) || (pInput->dataLen == 0)) + { + DEC_API_TRC("H264SwDecDecode# ERROR: Invalid input parameters"); + return(H264SWDEC_PARAM_ERR); + } + + pDecCont = (decContainer_t *)decInst; + + /* Check if decoder is in an incorrect mode */ + if (decInst == NULL || pDecCont->decStat == UNINITIALIZED) + { + DEC_API_TRC("H264SwDecDecode# ERROR: Decoder not initialized"); + return(H264SWDEC_NOT_INITIALIZED); + } + +#ifdef H264DEC_EVALUATION + if (pDecCont->picNumber >= H264DEC_EVALUATION_LIMIT) + return(H264SWDEC_EVALUATION_LIMIT_EXCEEDED); +#endif + +#ifdef H264DEC_TRACE + sprintf(pDecCont->str, "H264SwDecDecode# decInst %p pInput %p pOutput %p", + decInst, (void*)pInput, (void*)pOutput); + DEC_API_TRC(pDecCont->str); +#endif + + pOutput->pStrmCurrPos = NULL; + + numReadBytes = 0; + strmLen = pInput->dataLen; + tmpStream = pInput->pStream; + pDecCont->storage.intraConcealmentFlag = pInput->intraConcealmentMethod; + + do + { + /* Return HDRS_RDY after DPB flush caused by new SPS */ + if (pDecCont->decStat == NEW_HEADERS) + { + decResult = H264BSD_HDRS_RDY; + pDecCont->decStat = INITIALIZED; + } + else /* Continue decoding normally */ + { + decResult = h264bsdDecode(&pDecCont->storage, tmpStream, strmLen, + pInput->picId, &numReadBytes); + } + tmpStream += numReadBytes; + /* check if too many bytes are read from stream */ + if ( (i32)(strmLen - numReadBytes) >= 0 ) + strmLen -= numReadBytes; + else + strmLen = 0; + + pOutput->pStrmCurrPos = tmpStream; + + switch (decResult) + { + case H264BSD_HDRS_RDY: + + if(pDecCont->storage.dpb->flushed && + pDecCont->storage.dpb->numOut != + pDecCont->storage.dpb->outIndex) + { + /* output first all DPB stored pictures + * DPB flush caused by new SPS */ + pDecCont->storage.dpb->flushed = 0; + pDecCont->decStat = NEW_HEADERS; + returnValue = H264SWDEC_PIC_RDY_BUFF_NOT_EMPTY; + strmLen = 0; + } + else + { + returnValue = H264SWDEC_HDRS_RDY_BUFF_NOT_EMPTY; + strmLen = 0; + } + break; + + case H264BSD_PIC_RDY: + pDecCont->picNumber++; + + if (strmLen == 0) + returnValue = H264SWDEC_PIC_RDY; + else + returnValue = H264SWDEC_PIC_RDY_BUFF_NOT_EMPTY; + + strmLen = 0; + break; + + case H264BSD_PARAM_SET_ERROR: + if ( !h264bsdCheckValidParamSets(&pDecCont->storage) && + strmLen == 0 ) + { + returnValue = H264SWDEC_STRM_ERR; + } + break; + case H264BSD_MEMALLOC_ERROR: + { + returnValue = H264SWDEC_MEMFAIL; + strmLen = 0; + } + break; + default: + break; + } + + } while (strmLen); + +#ifdef H264DEC_TRACE + sprintf(pDecCont->str, "H264SwDecDecode# OK: DecResult %d", + returnValue); + DEC_API_TRC(pDecCont->str); +#endif + + return(returnValue); + +} + +/*------------------------------------------------------------------------------ + + Function: H264SwDecGetAPIVersion + + Functional description: + Return version information of the API + + Inputs: + none + + Outputs: + none + + Returns: + API version + +------------------------------------------------------------------------------*/ + +H264SwDecApiVersion H264SwDecGetAPIVersion() +{ + H264SwDecApiVersion ver; + + ver.major = H264SWDEC_MAJOR_VERSION; + ver.minor = H264SWDEC_MINOR_VERSION; + + return(ver); +} + +/*------------------------------------------------------------------------------ + + Function: H264SwDecNextPicture + + Functional description: + Get next picture in display order if any available. + + Input: + decInst decoder instance. + flushBuffer force output of all buffered pictures + + Output: + pOutput pointer to output structure + + Returns: + H264SWDEC_OK no pictures available for display + H264SWDEC_PIC_RDY picture available for display + H264SWDEC_PARAM_ERR invalid parameters + +------------------------------------------------------------------------------*/ + +H264SwDecRet H264SwDecNextPicture(H264SwDecInst decInst, + H264SwDecPicture *pOutput, u32 flushBuffer) +{ + + decContainer_t *pDecCont; + u32 numErrMbs, isIdrPic, picId; + u32 *pOutPic; + + DEC_API_TRC("H264SwDecNextPicture#"); + + if (decInst == NULL || pOutput == NULL) + { + DEC_API_TRC("H264SwDecNextPicture# ERROR: decInst or pOutput is NULL"); + return(H264SWDEC_PARAM_ERR); + } + + pDecCont = (decContainer_t*)decInst; + +#ifdef H264DEC_TRACE + sprintf(pDecCont->str, "H264SwDecNextPicture# decInst %p pOutput %p %s %d", + decInst, (void*)pOutput, "flushBuffer", flushBuffer); + DEC_API_TRC(pDecCont->str); +#endif + + if (flushBuffer) + h264bsdFlushBuffer(&pDecCont->storage); + + pOutPic = (u32*)h264bsdNextOutputPicture(&pDecCont->storage, &picId, + &isIdrPic, &numErrMbs); + + if (pOutPic == NULL) + { + DEC_API_TRC("H264SwDecNextPicture# OK: return H264SWDEC_OK"); + return(H264SWDEC_OK); + } + else + { + pOutput->pOutputPicture = pOutPic; + pOutput->picId = picId; + pOutput->isIdrPicture = isIdrPic; + pOutput->nbrOfErrMBs = numErrMbs; + DEC_API_TRC("H264SwDecNextPicture# OK: return H264SWDEC_PIC_RDY"); + return(H264SWDEC_PIC_RDY); + } + +} + + diff --git a/media/libstagefright/codecs/on2/h264dec/source/TestBenchMultipleInstance.c b/media/libstagefright/codecs/on2/h264dec/source/TestBenchMultipleInstance.c new file mode 100755 index 0000000000000000000000000000000000000000..42170d3272be8440fa5170945856d4c67683a95d --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/TestBenchMultipleInstance.c @@ -0,0 +1,531 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CVS tag name for identification */ +const char tagName[256] = "$Name: FIRST_ANDROID_COPYRIGHT $"; + +#include "H264SwDecApi.h" +#include +#include +#include + +#define DEBUG(argv) printf argv + +/* _NO_OUT disables output file writing */ +#ifdef __arm +#define _NO_OUT +#endif + +/*------------------------------------------------------------------------------ + +------------------------------------------------------------------------------*/ +void WriteOutput(FILE *fid, u8 *data, u32 picSize); + +u32 CropPicture(u8 *pOutImage, u8 *pInImage, + u32 picWidth, u32 picHeight, CropParams *pCropParams); + +void CropWriteOutput(FILE *fid, u8 *imageData, u32 cropDisplay, + H264SwDecInfo *decInfo); + +typedef struct +{ + H264SwDecInst decInst; + H264SwDecInput decInput; + H264SwDecOutput decOutput; + H264SwDecPicture decPicture; + H264SwDecInfo decInfo; + FILE *foutput; + char outFileName[256]; + u8 *byteStrmStart; + u32 picNumber; +} Decoder; + + +/*------------------------------------------------------------------------------ + +------------------------------------------------------------------------------*/ +int main(int argc, char **argv) +{ + + i32 instCount, instRunning; + i32 i; + u32 maxNumPics; + u32 strmLen; + H264SwDecRet ret; + u32 numErrors = 0; + u32 cropDisplay = 0; + u32 disableOutputReordering = 0; + FILE *finput; + Decoder **decoder; + char outFileName[256] = "out.yuv"; + + + if ( argc > 1 && strcmp(argv[1], "-T") == 0 ) + { + fprintf(stderr, "%s\n", tagName); + return 0; + } + + if (argc < 2) + { + DEBUG(( + "Usage: %s [-Nn] [-Ooutfile] [-P] [-U] [-C] [-R] [-T] file1.264 [file2.264] .. [fileN.264]\n", + argv[0])); + DEBUG(("\t-Nn forces decoding to stop after n pictures\n")); +#if defined(_NO_OUT) + DEBUG(("\t-Ooutfile output writing disabled at compile time\n")); +#else + DEBUG(("\t-Ooutfile write output to \"outfile\" (default out.yuv)\n")); + DEBUG(("\t-Onone does not write output\n")); +#endif + DEBUG(("\t-C display cropped image (default decoded image)\n")); + DEBUG(("\t-R disable DPB output reordering\n")); + DEBUG(("\t-T to print tag name and exit\n")); + exit(100); + } + + instCount = argc - 1; + + /* read command line arguments */ + maxNumPics = 0; + for (i = 1; i < (argc-1); i++) + { + if ( strncmp(argv[i], "-N", 2) == 0 ) + { + maxNumPics = (u32)atoi(argv[i]+2); + instCount--; + } + else if ( strncmp(argv[i], "-O", 2) == 0 ) + { + strcpy(outFileName, argv[i]+2); + instCount--; + } + else if ( strcmp(argv[i], "-C") == 0 ) + { + cropDisplay = 1; + instCount--; + } + else if ( strcmp(argv[i], "-R") == 0 ) + { + disableOutputReordering = 1; + instCount--; + } + } + + if (instCount < 1) + { + DEBUG(("No input files\n")); + exit(100); + } + + /* allocate memory for multiple decoder instances + * one instance for every stream file */ + decoder = (Decoder **)malloc(sizeof(Decoder*)*(u32)instCount); + if (decoder == NULL) + { + DEBUG(("Unable to allocate memory\n")); + exit(100); + } + + /* prepare each decoder instance */ + for (i = 0; i < instCount; i++) + { + decoder[i] = (Decoder *)calloc(1, sizeof(Decoder)); + + /* open input file */ + finput = fopen(argv[argc-instCount+i],"rb"); + if (finput == NULL) + { + DEBUG(("Unable to open input file <%s>\n", argv[argc-instCount+i])); + exit(100); + } + + DEBUG(("Reading input file[%d] %s\n", i, argv[argc-instCount+i])); + + /* read input stream to buffer */ + fseek(finput,0L,SEEK_END); + strmLen = (u32)ftell(finput); + rewind(finput); + decoder[i]->byteStrmStart = (u8 *)malloc(sizeof(u8)*strmLen); + if (decoder[i]->byteStrmStart == NULL) + { + DEBUG(("Unable to allocate memory\n")); + exit(100); + } + fread(decoder[i]->byteStrmStart, sizeof(u8), strmLen, finput); + fclose(finput); + + /* open output file */ + if (strcmp(outFileName, "none") != 0) + { +#if defined(_NO_OUT) + decoder[i]->foutput = NULL; +#else + sprintf(decoder[i]->outFileName, "%s%i", outFileName, i); + decoder[i]->foutput = fopen(decoder[i]->outFileName, "wb"); + if (decoder[i]->foutput == NULL) + { + DEBUG(("Unable to open output file\n")); + exit(100); + } +#endif + } + + ret = H264SwDecInit(&(decoder[i]->decInst), disableOutputReordering); + + if (ret != H264SWDEC_OK) + { + DEBUG(("Init failed %d\n", ret)); + exit(100); + } + + decoder[i]->decInput.pStream = decoder[i]->byteStrmStart; + decoder[i]->decInput.dataLen = strmLen; + decoder[i]->decInput.intraConcealmentMethod = 0; + + } + + /* main decoding loop */ + do + { + /* decode once using each instance */ + for (i = 0; i < instCount; i++) + { + ret = H264SwDecDecode(decoder[i]->decInst, + &(decoder[i]->decInput), + &(decoder[i]->decOutput)); + + switch(ret) + { + + case H264SWDEC_HDRS_RDY_BUFF_NOT_EMPTY: + + ret = H264SwDecGetInfo(decoder[i]->decInst, + &(decoder[i]->decInfo)); + if (ret != H264SWDEC_OK) + exit(1); + + if (cropDisplay && decoder[i]->decInfo.croppingFlag) + { + DEBUG(("Decoder[%d] Cropping params: (%d, %d) %dx%d\n", + i, + decoder[i]->decInfo.cropParams.cropLeftOffset, + decoder[i]->decInfo.cropParams.cropTopOffset, + decoder[i]->decInfo.cropParams.cropOutWidth, + decoder[i]->decInfo.cropParams.cropOutHeight)); + } + + DEBUG(("Decoder[%d] Width %d Height %d\n", i, + decoder[i]->decInfo.picWidth, + decoder[i]->decInfo.picHeight)); + + DEBUG(("Decoder[%d] videoRange %d, matricCoefficients %d\n", + i, decoder[i]->decInfo.videoRange, + decoder[i]->decInfo.matrixCoefficients)); + decoder[i]->decInput.dataLen -= + (u32)(decoder[i]->decOutput.pStrmCurrPos - + decoder[i]->decInput.pStream); + decoder[i]->decInput.pStream = + decoder[i]->decOutput.pStrmCurrPos; + break; + + case H264SWDEC_PIC_RDY_BUFF_NOT_EMPTY: + decoder[i]->decInput.dataLen -= + (u32)(decoder[i]->decOutput.pStrmCurrPos - + decoder[i]->decInput.pStream); + decoder[i]->decInput.pStream = + decoder[i]->decOutput.pStrmCurrPos; + /* fall through */ + case H264SWDEC_PIC_RDY: + if (ret == H264SWDEC_PIC_RDY) + decoder[i]->decInput.dataLen = 0; + + ret = H264SwDecGetInfo(decoder[i]->decInst, + &(decoder[i]->decInfo)); + if (ret != H264SWDEC_OK) + exit(1); + + while (H264SwDecNextPicture(decoder[i]->decInst, + &(decoder[i]->decPicture), 0) == H264SWDEC_PIC_RDY) + { + decoder[i]->picNumber++; + + numErrors += decoder[i]->decPicture.nbrOfErrMBs; + + DEBUG(("Decoder[%d] PIC %d, type %s, concealed %d\n", + i, decoder[i]->picNumber, + decoder[i]->decPicture.isIdrPicture + ? "IDR" : "NON-IDR", + decoder[i]->decPicture.nbrOfErrMBs)); + fflush(stdout); + + CropWriteOutput(decoder[i]->foutput, + (u8*)decoder[i]->decPicture.pOutputPicture, + cropDisplay, &(decoder[i]->decInfo)); + } + + if (maxNumPics && decoder[i]->picNumber == maxNumPics) + decoder[i]->decInput.dataLen = 0; + break; + + case H264SWDEC_STRM_PROCESSED: + case H264SWDEC_STRM_ERR: + case H264SWDEC_PARAM_ERR: + decoder[i]->decInput.dataLen = 0; + break; + + default: + DEBUG(("Decoder[%d] FATAL ERROR\n", i)); + exit(10); + break; + + } + } + + /* check if any of the instances is still running (=has more data) */ + instRunning = instCount; + for (i = 0; i < instCount; i++) + { + if (decoder[i]->decInput.dataLen == 0) + instRunning--; + } + + } while (instRunning); + + + /* get last frames and close each instance */ + for (i = 0; i < instCount; i++) + { + while (H264SwDecNextPicture(decoder[i]->decInst, + &(decoder[i]->decPicture), 1) == H264SWDEC_PIC_RDY) + { + decoder[i]->picNumber++; + + DEBUG(("Decoder[%d] PIC %d, type %s, concealed %d\n", + i, decoder[i]->picNumber, + decoder[i]->decPicture.isIdrPicture + ? "IDR" : "NON-IDR", + decoder[i]->decPicture.nbrOfErrMBs)); + fflush(stdout); + + CropWriteOutput(decoder[i]->foutput, + (u8*)decoder[i]->decPicture.pOutputPicture, + cropDisplay, &(decoder[i]->decInfo)); + } + + H264SwDecRelease(decoder[i]->decInst); + + if (decoder[i]->foutput) + fclose(decoder[i]->foutput); + + free(decoder[i]->byteStrmStart); + + free(decoder[i]); + } + + free(decoder); + + if (numErrors) + return 1; + else + return 0; + +} + +/*------------------------------------------------------------------------------ + +------------------------------------------------------------------------------*/ +void CropWriteOutput(FILE *foutput, u8 *imageData, u32 cropDisplay, + H264SwDecInfo *decInfo) +{ + u8 *tmpImage = NULL; + u32 tmp, picSize; + + if (cropDisplay && decInfo->croppingFlag) + { + picSize = decInfo->cropParams.cropOutWidth * + decInfo->cropParams.cropOutHeight; + picSize = (3 * picSize)/2; + tmpImage = malloc(picSize); + if (tmpImage == NULL) + exit(1); + tmp = CropPicture(tmpImage, imageData, + decInfo->picWidth, decInfo->picHeight, + &(decInfo->cropParams)); + if (tmp) + exit(1); + WriteOutput(foutput, tmpImage, picSize); + free(tmpImage); + } + else + { + picSize = decInfo->picWidth * decInfo->picHeight; + picSize = (3 * picSize)/2; + WriteOutput(foutput, imageData, picSize); + } + +} + +/*------------------------------------------------------------------------------ + +------------------------------------------------------------------------------*/ +void WriteOutput(FILE *fid, u8 *data, u32 picSize) +{ + if (fid) + fwrite(data, 1, picSize, fid); +} + +/*------------------------------------------------------------------------------ + + Function name: H264SwDecTrace + +------------------------------------------------------------------------------*/ +void H264SwDecTrace(char *string) +{ + FILE *fp; + + fp = fopen("dec_api.trc", "at"); + + if (!fp) + return; + + fwrite(string, 1, strlen(string), fp); + fwrite("\n", 1,1, fp); + + fclose(fp); +} + +/*------------------------------------------------------------------------------ + + Function name: H264SwDecmalloc + +------------------------------------------------------------------------------*/ +void* H264SwDecMalloc(u32 size) +{ + return malloc(size); +} + +/*------------------------------------------------------------------------------ + + Function name: H264SwDecFree + +------------------------------------------------------------------------------*/ +void H264SwDecFree(void *ptr) +{ + free(ptr); +} + +/*------------------------------------------------------------------------------ + + Function name: H264SwDecMemcpy + +------------------------------------------------------------------------------*/ +void H264SwDecMemcpy(void *dest, void *src, u32 count) +{ + memcpy(dest, src, count); +} + +/*------------------------------------------------------------------------------ + + Function name: H264SwDecMemset + +------------------------------------------------------------------------------*/ +void H264SwDecMemset(void *ptr, i32 value, u32 count) +{ + memset(ptr, value, count); +} + +/*------------------------------------------------------------------------------ + + Function name: CropPicture + +------------------------------------------------------------------------------*/ +u32 CropPicture(u8 *pOutImage, u8 *pInImage, + u32 picWidth, u32 picHeight, CropParams *pCropParams) +{ + + u32 i, j; + u32 outWidth, outHeight; + u8 *pOut, *pIn; + + if (pOutImage == NULL || pInImage == NULL || pCropParams == NULL || + !picWidth || !picHeight) + { + /* due to lint warning */ + free(pOutImage); + return(1); + } + + if ( ((pCropParams->cropLeftOffset + pCropParams->cropOutWidth) > + picWidth ) || + ((pCropParams->cropTopOffset + pCropParams->cropOutHeight) > + picHeight ) ) + { + /* due to lint warning */ + free(pOutImage); + return(1); + } + + outWidth = pCropParams->cropOutWidth; + outHeight = pCropParams->cropOutHeight; + + pIn = pInImage + pCropParams->cropTopOffset*picWidth + + pCropParams->cropLeftOffset; + pOut = pOutImage; + + /* luma */ + for (i = outHeight; i; i--) + { + for (j = outWidth; j; j--) + { + *pOut++ = *pIn++; + } + pIn += picWidth - outWidth; + } + + outWidth >>= 1; + outHeight >>= 1; + + pIn = pInImage + picWidth*picHeight + + pCropParams->cropTopOffset*picWidth/4 + pCropParams->cropLeftOffset/2; + + /* cb */ + for (i = outHeight; i; i--) + { + for (j = outWidth; j; j--) + { + *pOut++ = *pIn++; + } + pIn += picWidth/2 - outWidth; + } + + pIn = pInImage + 5*picWidth*picHeight/4 + + pCropParams->cropTopOffset*picWidth/4 + pCropParams->cropLeftOffset/2; + + /* cr */ + for (i = outHeight; i; i--) + { + for (j = outWidth; j; j--) + { + *pOut++ = *pIn++; + } + pIn += picWidth/2 - outWidth; + } + + return (0); + +} + diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_chroma_hor.s b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_chroma_hor.s new file mode 100755 index 0000000000000000000000000000000000000000..634a4846b9d4f8e58d5da4d7e99bd9e43cdfd316 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_chroma_hor.s @@ -0,0 +1,298 @@ +; Copyright (C) 2009 The Android Open Source Project +; +; Licensed under the Apache License, Version 2.0 (the "License"); +; you may not use this file except in compliance with the License. +; You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, software +; distributed under the License is distributed on an "AS IS" BASIS, +; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; See the License for the specific language governing permissions and +; limitations under the License. + +;------------------------------------------------------------------------------- +;-- +;-- Abstract : ARMv6 optimized version of h264bsdInterpolateChromaHor function +;-- +;------------------------------------------------------------------------------- + + + IF :DEF: H264DEC_WINASM + ;// We dont use REQUIRE8 and PRESERVE8 for winasm + ELSE + REQUIRE8 + PRESERVE8 + ENDIF + + AREA |.text|, CODE + + +;// h264bsdInterpolateChromaHor register allocation + +ref RN 0 +ptrA RN 0 + +mb RN 1 +block RN 1 + +x0 RN 2 +count RN 2 + +y0 RN 3 +valX RN 3 + +width RN 4 + +height RN 5 +tmp7 RN 5 + +chrPW RN 6 +tmp8 RN 6 + +tmp1 RN 7 +chrPH RN 7 + +tmp2 RN 8 + +tmp3 RN 9 + +tmp4 RN 10 + +tmp5 RN 11 + +tmp6 RN 12 + +c32 RN 14 +xFrac RN 14 + +;// Function exports and imports + + IMPORT h264bsdFillBlock + + EXPORT h264bsdInterpolateChromaHor + +;// Function arguments +;// +;// u8 *ref, : 0xc4 +;// u8 *predPartChroma, : 0xc8 +;// i32 x0, : 0xcc +;// i32 y0, : 0xd0 +;// u32 width, : 0xf8 +;// u32 height, : 0xfc +;// u32 xFrac, : 0x100 +;// u32 chromaPartWidth, : 0x104 +;// u32 chromaPartHeight : 0x108 + +h264bsdInterpolateChromaHor + STMFD sp!, {r0-r11,lr} + SUB sp, sp, #0xc4 + + LDR chrPW, [sp, #0x104] ;// chromaPartWidth + LDR width, [sp, #0xf8] ;// width + CMP x0, #0 + BLT do_fill + + ADD tmp6, x0, chrPW ;// tmp6 = x0+ chromaPartWidth + ADD tmp6, tmp6, #1 ;// tmp6 = x0 + chromaPartWidth + 1 + CMP tmp6, width ;// x0+chromaPartWidth+1 > width + BHI do_fill + + CMP y0, #0 + BLT do_fill + LDR chrPH, [sp, #0x108] ;// chromaPartHeight + LDR height, [sp, #0xfc] ;// height + ADD tmp6, y0, chrPH ;// tmp6 = y0 + chromaPartHeight + CMP tmp6, height + BLS skip_fill + +do_fill + LDR chrPH, [sp, #0x108] ;// chromaPartHeight + LDR height, [sp, #0xfc] ;// height + ADD tmp8, chrPW, #1 ;// tmp8 = chromaPartWidth+1 + MOV tmp2, tmp8 ;// tmp2 = chromaPartWidth+1 + STMIA sp,{width,height,tmp8,chrPH,tmp2} + ADD block, sp, #0x1c ;// block + BL h264bsdFillBlock + + LDR x0, [sp, #0xcc] + LDR y0, [sp, #0xd0] + LDR ref, [sp, #0xc4] ;// ref + STMIA sp,{width,height,tmp8,chrPH,tmp2} + ADD block, sp, #0x1c ;// block + MLA ref, height, width, ref ;// ref += width * height; + MLA block, chrPH, tmp8, block;// block + (chromaPH)*(chromaPW+1) + BL h264bsdFillBlock + + MOV x0, #0 ;// x0 = 0 + MOV y0, #0 ;// y0 = 0 + STR x0, [sp, #0xcc] + STR y0, [sp, #0xd0] + ADD ref, sp, #0x1c ;// ref = block + STR ref, [sp, #0xc4] ;// ref + + STR chrPH, [sp, #0xfc] ;// height + STR tmp8, [sp, #0xf8] ;// width + MOV width, tmp8 + SUB chrPW, chrPW, #1 + +skip_fill + MLA tmp3, y0, width, x0 ;// tmp3 = y0*width+x0 + LDR xFrac, [sp, #0x100] ;// xFrac + ADD ptrA, ref, tmp3 ;// ptrA = ref + y0*width+x0 + RSB valX, xFrac, #8 ;// valX = 8-xFrac + + LDR mb, [sp, #0xc8] ;// predPartChroma + + + ;// pack values to count register + ;// [31:28] loop_x (chromaPartWidth-1) + ;// [27:24] loop_y (chromaPartHeight-1) + ;// [23:20] chromaPartWidth-1 + ;// [19:16] chromaPartHeight-1 + ;// [15:00] nothing + + SUB tmp2, chrPH, #1 ;// chromaPartHeight-1 + SUB tmp1, chrPW, #1 ;// chromaPartWidth-1 + ADD count, count, tmp2, LSL #16 ;// chromaPartHeight-1 + ADD count, count, tmp2, LSL #24 ;// loop_y + ADD count, count, tmp1, LSL #20 ;// chromaPartWidth-1 + AND tmp2, count, #0x00F00000 ;// loop_x + PKHBT valX, valX, xFrac, LSL #16 ;// |xFrac|valX | + MOV valX, valX, LSL #3 ;// multiply by 8 in advance + MOV c32, #32 + + + ;/////////////////////////////////////////////////////////////////////////// + ;// Cb + ;/////////////////////////////////////////////////////////////////////////// + + ;// 2x2 pels per iteration + ;// bilinear vertical interpolation + +loop1_y + ADD count, count, tmp2, LSL #8 + LDRB tmp1, [ptrA, width] + LDRB tmp2, [ptrA], #1 + +loop1_x + LDRB tmp3, [ptrA, width] + LDRB tmp4, [ptrA], #1 + + PKHBT tmp5, tmp1, tmp3, LSL #16 + PKHBT tmp6, tmp2, tmp4, LSL #16 + + LDRB tmp1, [ptrA, width] + LDRB tmp2, [ptrA], #1 + + SMLAD tmp5, tmp5, valX, c32 ;// multiply + SMLAD tmp6, tmp6, valX, c32 ;// multiply + + PKHBT tmp7, tmp3, tmp1, LSL #16 + PKHBT tmp8, tmp4, tmp2, LSL #16 + + SMLAD tmp7, tmp7, valX, c32 ;// multiply + SMLAD tmp8, tmp8, valX, c32 ;// multiply + + MOV tmp5, tmp5, LSR #6 ;// scale down + STRB tmp5, [mb,#8] ;// store row 2 col 1 + + MOV tmp6, tmp6, LSR #6 ;// scale down + STRB tmp6, [mb],#1 ;// store row 1 col 1 + + MOV tmp7, tmp7, LSR #6 ;// scale down + STRB tmp7, [mb,#8] ;// store row 2 col 2 + + MOV tmp8, tmp8, LSR #6 ;// scale down + STRB tmp8, [mb],#1 ;// store row 1 col 2 + + SUBS count, count, #2<<28 + BCS loop1_x + + AND tmp2, count, #0x00F00000 + + ADDS mb, mb, #16 + SBC mb, mb, tmp2, LSR #20 + ADD ptrA, ptrA, width, LSL #1 + SBC ptrA, ptrA, tmp2, LSR #20 + SUB ptrA, ptrA, #1 + + ADDS count, count, #0xE << 24 + BGE loop1_y + + ;/////////////////////////////////////////////////////////////////////////// + ;// Cr + ;/////////////////////////////////////////////////////////////////////////// + LDR height, [sp,#0xfc] ;// height + LDR ref, [sp, #0xc4] ;// ref + LDR tmp1, [sp, #0xd0] ;// y0 + LDR tmp2, [sp, #0xcc] ;// x0 + LDR mb, [sp, #0xc8] ;// predPartChroma + + ADD tmp1, height, tmp1 + MLA tmp3, tmp1, width, tmp2 + ADD ptrA, ref, tmp3 + ADD mb, mb, #64 + + AND count, count, #0x00FFFFFF + AND tmp1, count, #0x000F0000 + ADD count, count, tmp1, LSL #8 + AND tmp2, count, #0x00F00000 + + ;// 2x2 pels per iteration + ;// bilinear vertical interpolation +loop2_y + ADD count, count, tmp2, LSL #8 + LDRB tmp1, [ptrA, width] + LDRB tmp2, [ptrA], #1 + +loop2_x + LDRB tmp3, [ptrA, width] + LDRB tmp4, [ptrA], #1 + + PKHBT tmp5, tmp1, tmp3, LSL #16 + PKHBT tmp6, tmp2, tmp4, LSL #16 + + LDRB tmp1, [ptrA, width] + LDRB tmp2, [ptrA], #1 + + SMLAD tmp5, tmp5, valX, c32 ;// multiply + SMLAD tmp6, tmp6, valX, c32 ;// multiply + + PKHBT tmp7, tmp3, tmp1, LSL #16 + PKHBT tmp8, tmp4, tmp2, LSL #16 + + SMLAD tmp7, tmp7, valX, c32 ;// multiply + SMLAD tmp8, tmp8, valX, c32 ;// multiply + + MOV tmp5, tmp5, LSR #6 ;// scale down + STRB tmp5, [mb,#8] ;// store row 2 col 1 + + MOV tmp6, tmp6, LSR #6 ;// scale down + STRB tmp6, [mb],#1 ;// store row 1 col 1 + + MOV tmp7, tmp7, LSR #6 ;// scale down + STRB tmp7, [mb,#8] ;// store row 2 col 2 + + MOV tmp8, tmp8, LSR #6 ;// scale down + STRB tmp8, [mb],#1 ;// store row 1 col 2 + + SUBS count, count, #2<<28 + BCS loop2_x + + AND tmp2, count, #0x00F00000 + + ADDS mb, mb, #16 + SBC mb, mb, tmp2, LSR #20 + ADD ptrA, ptrA, width, LSL #1 + SBC ptrA, ptrA, tmp2, LSR #20 + SUB ptrA, ptrA, #1 + + ADDS count, count, #0xE << 24 + BGE loop2_y + + ADD sp,sp,#0xd4 + LDMFD sp!, {r4-r11,pc} + + END diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_chroma_hor_ver.s b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_chroma_hor_ver.s new file mode 100755 index 0000000000000000000000000000000000000000..7420ad3cf6403853826e91a51df16fb22d46106c --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_chroma_hor_ver.s @@ -0,0 +1,339 @@ +; Copyright (C) 2009 The Android Open Source Project +; +; Licensed under the Apache License, Version 2.0 (the "License"); +; you may not use this file except in compliance with the License. +; You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, software +; distributed under the License is distributed on an "AS IS" BASIS, +; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; See the License for the specific language governing permissions and +; limitations under the License. + +;------------------------------------------------------------------------------- +;-- +;-- Abstract : ARMv6 optimized version of h264bsdInterpolateChromaHorVer +;-- function +;-- +;------------------------------------------------------------------------------- + + + IF :DEF: H264DEC_WINASM + ;// We dont use REQUIRE8 and PRESERVE8 for winasm + ELSE + REQUIRE8 + PRESERVE8 + ENDIF + + AREA |.text|, CODE + + +;// h264bsdInterpolateChromaHorVer register allocation + +ref RN 0 +ptrA RN 0 + +mb RN 1 +block RN 1 + +x0 RN 2 +count RN 2 + +y0 RN 3 +valY RN 3 + +width RN 4 + +tmp4 RN 5 +height RN 5 + +tmp1 RN 6 + +tmp2 RN 7 + +tmp3 RN 8 + +valX RN 9 + +tmp5 RN 10 +chrPW RN 10 + +tmp6 RN 11 +chrPH RN 11 + +xFrac RN 12 + +c32 RN 14 +yFrac RN 14 + +;// function exports and imports + + IMPORT h264bsdFillBlock + + EXPORT h264bsdInterpolateChromaHorVer + +;// Function arguments +;// +;// u8 *ref, : 0xc4 +;// u8 *predPartChroma, : 0xc8 +;// i32 x0, : 0xcc +;// i32 y0, : 0xd0 +;// u32 width, : 0xf8 +;// u32 height, : 0xfc +;// u32 xFrac, : 0x100 +;// u32 yFrac, : 0x104 +;// u32 chromaPartWidth, : 0x108 +;// u32 chromaPartHeight : 0x10c + +h264bsdInterpolateChromaHorVer + STMFD sp!, {r0-r11,lr} + SUB sp, sp, #0xc4 + + LDR chrPW, [sp, #0x108] ;// chromaPartWidth + LDR xFrac, [sp, #0x100] ;// xFrac + LDR width, [sp, #0xf8] ;// width + CMP x0, #0 + BLT do_fill + + ADD tmp1, x0, chrPW ;// tmp1 = x0+ chromaPartWidth + ADD tmp1, tmp1, #1 ;// tmp1 = x0+ chromaPartWidth+1 + CMP tmp1, width ;// x0+chromaPartWidth+1 > width + BHI do_fill + + CMP y0, #0 + BLT do_fill + LDR chrPH, [sp, #0x10c] ;// chromaPartHeight + LDR height, [sp, #0xfc] ;// height + ADD tmp1, y0, chrPH ;// tmp1 = y0 + chromaPartHeight + ADD tmp1, tmp1, #1 ;// tmp1 = y0 + chromaPartHeight + 1 + CMP tmp1, height + BLS skip_fill + +do_fill + LDR chrPH, [sp, #0x10c] ;// chromaPartHeight + LDR height, [sp, #0xfc] ;// height + ADD tmp3, chrPW, #1 ;// tmp3 = chromaPartWidth+1 + ADD tmp1, chrPW, #1 ;// tmp1 = chromaPartWidth+1 + ADD tmp2, chrPH, #1 ;// tmp2 = chromaPartHeight+1 + STMIA sp,{width,height,tmp1,tmp2,tmp3} + ADD block, sp, #0x1c ;// block + BL h264bsdFillBlock + + LDR x0, [sp, #0xcc] + LDR y0, [sp, #0xd0] + LDR ref, [sp, #0xc4] ;// ref + STMIA sp,{width,height,tmp1,tmp2,tmp3} + ADD block, sp, #0x1c ;// block + MLA ref, height, width, ref ;// ref += width * height; + MLA block, tmp2, tmp1, block;// block + (chromaPW+1)*(chromaPH+1) + BL h264bsdFillBlock + + MOV x0, #0 ;// x0 = 0 + MOV y0, #0 ;// y0 = 0 + STR x0, [sp, #0xcc] + STR y0, [sp, #0xd0] + ADD ref, sp, #0x1c ;// ref = block + STR ref, [sp, #0xc4] ;// ref + + STR tmp2, [sp, #0xfc] ;// height + STR tmp1, [sp, #0xf8] ;// width + MOV width, tmp1 + +skip_fill + MLA tmp3, y0, width, x0 ;// tmp3 = y0*width+x0 + LDR yFrac, [sp, #0x104] ;// yFrac + LDR xFrac, [sp, #0x100] + ADD ptrA, ref, tmp3 ;// ptrA = ref + y0*width+x0 + RSB valX, xFrac, #8 ;// valX = 8-xFrac + RSB valY, yFrac, #8 ;// valY = 8-yFrac + + LDR mb, [sp, #0xc8] ;// predPartChroma + + + ;// pack values to count register + ;// [31:28] loop_x (chromaPartWidth-1) + ;// [27:24] loop_y (chromaPartHeight-1) + ;// [23:20] chromaPartWidth-1 + ;// [19:16] chromaPartHeight-1 + ;// [15:00] nothing + + SUB tmp2, chrPH, #1 ;// chromaPartHeight-1 + SUB tmp1, chrPW, #1 ;// chromaPartWidth-1 + ADD count, count, tmp2, LSL #16 ;// chromaPartHeight-1 + ADD count, count, tmp2, LSL #24 ;// loop_y + ADD count, count, tmp1, LSL #20 ;// chromaPartWidth-1 + AND tmp2, count, #0x00F00000 ;// loop_x + PKHBT valY, valY, yFrac, LSL #16 ;// |yFrac|valY | + MOV c32, #32 + + + ;/////////////////////////////////////////////////////////////////////////// + ;// Cb + ;/////////////////////////////////////////////////////////////////////////// + + ;// 2x2 pels per iteration + ;// bilinear vertical and horizontal interpolation + +loop1_y + LDRB tmp1, [ptrA] + LDRB tmp3, [ptrA, width] + LDRB tmp5, [ptrA, width, LSL #1] + + PKHBT tmp1, tmp1, tmp3, LSL #16 ;// |t3|t1| + PKHBT tmp3, tmp3, tmp5, LSL #16 ;// |t5|t3| + + SMUAD tmp1, tmp1, valY ;// t1=(t1*valY + t3*yFrac) + SMUAD tmp3, tmp3, valY ;// t3=(t3*valY + t5*yFrac) + + ADD count, count, tmp2, LSL #8 +loop1_x + ;// first + LDRB tmp2, [ptrA, #1]! + LDRB tmp4, [ptrA, width] + LDRB tmp6, [ptrA, width, LSL #1] + + PKHBT tmp2, tmp2, tmp4, LSL #16 ;// |t4|t2| + PKHBT tmp4, tmp4, tmp6, LSL #16 ;// |t6|t4| + + SMUAD tmp2, tmp2, valY ;// t2=(t2*valY + t4*yFrac) + MLA tmp5, tmp1, valX, c32 ;// t5=t1*valX+32 + MLA tmp5, tmp2, xFrac, tmp5 ;// t5=t2*xFrac+t5 + + SMUAD tmp4, tmp4, valY ;// t4=(t4*valY + t6*yFrac) + MLA tmp6, tmp3, valX, c32 ;// t3=t3*valX+32 + MLA tmp6, tmp4, xFrac, tmp6 ;// t6=t4*xFrac+t6 + + MOV tmp6, tmp6, LSR #6 ;// scale down + STRB tmp6, [mb, #8] ;// store pixel + MOV tmp5, tmp5, LSR #6 ;// scale down + STRB tmp5, [mb], #1 ;// store pixel + + ;// second + LDRB tmp1, [ptrA, #1]! + LDRB tmp3, [ptrA, width] + LDRB tmp5, [ptrA, width, LSL #1] + + PKHBT tmp1, tmp1, tmp3, LSL #16 ;// |t3|t1| + PKHBT tmp3, tmp3, tmp5, LSL #16 ;// |t5|t3| + + SMUAD tmp1, tmp1, valY ;// t1=(t1*valY + t3*yFrac) + MLA tmp5, tmp1, xFrac, c32 ;// t1=t1*xFrac+32 + MLA tmp5, tmp2, valX, tmp5 ;// t5=t2*valX+t5 + + SMUAD tmp3, tmp3, valY ;// t3=(t3*valY + t5*yFrac) + MLA tmp6, tmp3, xFrac, c32 ;// t3=t3*xFrac+32 + MLA tmp6, tmp4, valX, tmp6 ;// t6=t4*valX+t6 + + MOV tmp6, tmp6, LSR #6 ;// scale down + STRB tmp6, [mb, #8] ;// store pixel + MOV tmp5, tmp5, LSR #6 ;// scale down + STRB tmp5, [mb], #1 ;// store pixel + + SUBS count, count, #2<<28 + BCS loop1_x + + AND tmp2, count, #0x00F00000 + + ADDS mb, mb, #16 + SBC mb, mb, tmp2, LSR #20 + ADD ptrA, ptrA, width, LSL #1 + SBC ptrA, ptrA, tmp2, LSR #20 + + ADDS count, count, #0xE << 24 + BGE loop1_y + + ;/////////////////////////////////////////////////////////////////////////// + ;// Cr + ;/////////////////////////////////////////////////////////////////////////// + LDR height, [sp,#0xfc] ;// height + LDR ref, [sp, #0xc4] ;// ref + LDR tmp1, [sp, #0xd0] ;// y0 + LDR tmp2, [sp, #0xcc] ;// x0 + LDR mb, [sp, #0xc8] ;// predPartChroma + + ADD tmp1, height, tmp1 + MLA tmp3, tmp1, width, tmp2 + ADD ptrA, ref, tmp3 + ADD mb, mb, #64 + + AND count, count, #0x00FFFFFF + AND tmp1, count, #0x000F0000 + ADD count, count, tmp1, LSL #8 + AND tmp2, count, #0x00F00000 + + ;// 2x2 pels per iteration + ;// bilinear vertical and horizontal interpolation +loop2_y + LDRB tmp1, [ptrA] + LDRB tmp3, [ptrA, width] + LDRB tmp5, [ptrA, width, LSL #1] + + PKHBT tmp1, tmp1, tmp3, LSL #16 ;// |t3|t1| + PKHBT tmp3, tmp3, tmp5, LSL #16 ;// |t5|t3| + + SMUAD tmp1, tmp1, valY ;// t1=(t1*valY + t3*yFrac) + SMUAD tmp3, tmp3, valY ;// t3=(t3*valY + t5*yFrac) + + ADD count, count, tmp2, LSL #8 +loop2_x + ;// first + LDRB tmp2, [ptrA, #1]! + LDRB tmp4, [ptrA, width] + LDRB tmp6, [ptrA, width, LSL #1] + + PKHBT tmp2, tmp2, tmp4, LSL #16 ;// |t4|t2| + PKHBT tmp4, tmp4, tmp6, LSL #16 ;// |t6|t4| + + SMUAD tmp2, tmp2, valY ;// t2=(t2*valY + t4*yFrac) + MLA tmp5, tmp1, valX, c32 ;// t5=t1*valX+32 + MLA tmp5, tmp2, xFrac, tmp5 ;// t5=t2*xFrac+t5 + + SMUAD tmp4, tmp4, valY ;// t4=(t4*valY + t6*yFrac) + MLA tmp6, tmp3, valX, c32 ;// t3=t3*valX+32 + MLA tmp6, tmp4, xFrac, tmp6 ;// t6=t4*xFrac+t6 + + MOV tmp6, tmp6, LSR #6 ;// scale down + STRB tmp6, [mb, #8] ;// store pixel + MOV tmp5, tmp5, LSR #6 ;// scale down + STRB tmp5, [mb], #1 ;// store pixel + + ;// second + LDRB tmp1, [ptrA, #1]! + LDRB tmp3, [ptrA, width] + LDRB tmp5, [ptrA, width, LSL #1] + + PKHBT tmp1, tmp1, tmp3, LSL #16 ;// |t3|t1| + PKHBT tmp3, tmp3, tmp5, LSL #16 ;// |t5|t3| + + SMUAD tmp1, tmp1, valY ;// t1=(t1*valY + t3*yFrac) + MLA tmp5, tmp1, xFrac, c32 ;// t1=t1*xFrac+32 + MLA tmp5, tmp2, valX, tmp5 ;// t5=t2*valX+t5 + + SMUAD tmp3, tmp3, valY ;// t3=(t3*valY + t5*yFrac) + MLA tmp6, tmp3, xFrac, c32 ;// t3=t3*xFrac+32 + MLA tmp6, tmp4, valX, tmp6 ;// t6=t4*valX+t6 + + MOV tmp6, tmp6, LSR #6 ;// scale down + STRB tmp6, [mb, #8] ;// store pixel + MOV tmp5, tmp5, LSR #6 ;// scale down + STRB tmp5, [mb], #1 ;// store pixel + + SUBS count, count, #2<<28 + BCS loop2_x + + AND tmp2, count, #0x00F00000 + + ADDS mb, mb, #16 + SBC mb, mb, tmp2, LSR #20 + ADD ptrA, ptrA, width, LSL #1 + SBC ptrA, ptrA, tmp2, LSR #20 + + ADDS count, count, #0xE << 24 + BGE loop2_y + + ADD sp,sp,#0xd4 + LDMFD sp!,{r4-r11,pc} + + END diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_chroma_ver.s b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_chroma_ver.s new file mode 100755 index 0000000000000000000000000000000000000000..af9df1bb5db7e384ad904f5a43d5b4703ff179df --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_chroma_ver.s @@ -0,0 +1,288 @@ +; Copyright (C) 2009 The Android Open Source Project +; +; Licensed under the Apache License, Version 2.0 (the "License"); +; you may not use this file except in compliance with the License. +; You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, software +; distributed under the License is distributed on an "AS IS" BASIS, +; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; See the License for the specific language governing permissions and +; limitations under the License. + +;------------------------------------------------------------------------------- +;-- +;-- Abstract : ARMv6 optimized version of h264bsdInterpolateChromaVer function +;-- +;------------------------------------------------------------------------------- + + + IF :DEF: H264DEC_WINASM + ;// We dont use REQUIRE8 and PRESERVE8 for winasm + ELSE + REQUIRE8 + PRESERVE8 + ENDIF + + AREA |.text|, CODE + +;// h264bsdInterpolateChromaVer register allocation + +ref RN 0 +ptrA RN 0 + +mb RN 1 +block RN 1 + +x0 RN 2 +count RN 2 + +y0 RN 3 +valY RN 3 + +width RN 4 + +height RN 5 +tmp7 RN 5 + +chrPW RN 6 +tmp8 RN 6 + +tmp1 RN 7 + +tmp2 RN 8 + +tmp3 RN 9 + +tmp4 RN 10 + +tmp5 RN 11 +chrPH RN 11 + +tmp6 RN 12 + +c32 RN 14 +yFrac RN 14 + +;// Function exports and imports + + IMPORT h264bsdFillBlock + + EXPORT h264bsdInterpolateChromaVer + +;// Function arguments +;// +;// u8 *ref, : 0xc4 +;// u8 *predPartChroma, : 0xc8 +;// i32 x0, : 0xcc +;// i32 y0, : 0xd0 +;// u32 width, : 0xf8 +;// u32 height, : 0xfc +;// u32 yFrac, : 0x100 +;// u32 chromaPartWidth, : 0x104 +;// u32 chromaPartHeight : 0x108 + +h264bsdInterpolateChromaVer + STMFD sp!, {r0-r11,lr} + SUB sp, sp, #0xc4 + + LDR chrPW, [sp, #0x104] ;// chromaPartWidth + LDR width, [sp, #0xf8] ;// width + CMP x0, #0 + BLT do_fill + + ADD tmp1, x0, chrPW ;// tmp1 = x0+ chromaPartWidth + CMP tmp1, width ;// x0+chromaPartWidth > width + BHI do_fill + + CMP y0, #0 + BLT do_fill + LDR chrPH, [sp, #0x108] ;// chromaPartHeight + LDR height, [sp, #0xfc] ;// height + ADD tmp1, y0, chrPH ;// tmp1 = y0 + chromaPartHeight + ADD tmp1, tmp1, #1 ;// tmp1 = y0 + chromaPartHeight + 1 + CMP tmp1, height + BLS skip_fill + +do_fill + LDR chrPH, [sp, #0x108] ;// chromaPartHeight + LDR height, [sp, #0xfc] ;// height + ADD tmp1, chrPH, #1 ;// tmp1 = chromaPartHeight+1 + MOV tmp2, chrPW ;// tmp2 = chromaPartWidth + STMIA sp,{width,height,chrPW,tmp1,tmp2} + ADD block, sp, #0x1c ;// block + BL h264bsdFillBlock + + LDR x0, [sp, #0xcc] + LDR y0, [sp, #0xd0] + LDR ref, [sp, #0xc4] ;// ref + STMIA sp,{width,height,chrPW,tmp1,tmp2} + ADD block, sp, #0x1c ;// block + MLA ref, height, width, ref ;// ref += width * height; + MLA block, chrPW, tmp1, block;// block + (chromaPW)*(chromaPH+1) + BL h264bsdFillBlock + + MOV x0, #0 ;// x0 = 0 + MOV y0, #0 ;// y0 = 0 + STR x0, [sp, #0xcc] + STR y0, [sp, #0xd0] + ADD ref, sp, #0x1c ;// ref = block + STR ref, [sp, #0xc4] ;// ref + + STR tmp1, [sp, #0xfc] ;// height + STR chrPW, [sp, #0xf8] ;// width + MOV width, chrPW + +skip_fill + MLA tmp3, y0, width, x0 ;// tmp3 = y0*width+x0 + LDR yFrac, [sp, #0x100] ;// yFrac + ADD ptrA, ref, tmp3 ;// ptrA = ref + y0*width+x0 + RSB valY, yFrac, #8 ;// valY = 8-yFrac + + LDR mb, [sp, #0xc8] ;// predPartChroma + + + ;// pack values to count register + ;// [31:28] loop_x (chromaPartWidth-1) + ;// [27:24] loop_y (chromaPartHeight-1) + ;// [23:20] chromaPartWidth-1 + ;// [19:16] chromaPartHeight-1 + ;// [15:00] nothing + + SUB tmp2, chrPH, #1 ;// chromaPartHeight-1 + SUB tmp1, chrPW, #1 ;// chromaPartWidth-1 + ADD count, count, tmp2, LSL #16 ;// chromaPartHeight-1 + ADD count, count, tmp2, LSL #24 ;// loop_y + ADD count, count, tmp1, LSL #20 ;// chromaPartWidth-1 + AND tmp2, count, #0x00F00000 ;// loop_x + PKHBT valY, valY, yFrac, LSL #16 ;// |yFrac|valY | + MOV valY, valY, LSL #3 ;// multiply by 8 in advance + MOV c32, #32 + + + ;/////////////////////////////////////////////////////////////////////////// + ;// Cb + ;/////////////////////////////////////////////////////////////////////////// + + ;// 2x2 pels per iteration + ;// bilinear vertical interpolation + +loop1_y + ADD count, count, tmp2, LSL #8 +loop1_x + ;// Process 2x2 block + LDRB tmp2, [ptrA,width] ;// 2 row, 1 col + LDRB tmp3, [ptrA,width, LSL #1] ;// 3 row, 1 col + LDRB tmp1, [ptrA],#1 ;// 1 row, 1 col + + LDRB tmp5, [ptrA,width] ;// 2 row, 2 col + LDRB tmp6, [ptrA,width, LSL #1] ;// 3 row, 2 col + LDRB tmp4, [ptrA],#1 ;// 1 row, 2 col + + PKHBT tmp1, tmp1, tmp2, LSL #16 ;// |B|A| + PKHBT tmp2, tmp2, tmp3, LSL #16 ;// |C|B| + PKHBT tmp4, tmp4, tmp5, LSL #16 ;// |B|A| + + SMLAD tmp7, tmp2, valY, c32 ;// multiply + PKHBT tmp5, tmp5, tmp6, LSL #16 ;// |C|B| + SMLAD tmp2, tmp1, valY, c32 ;// multiply + SMLAD tmp8, tmp5, valY, c32 ;// multiply + SMLAD tmp5, tmp4, valY, c32 ;// multiply + + MOV tmp7, tmp7, LSR #6 ;// scale down + STRB tmp7, [mb,#8] ;// store row 2 col 1 + MOV tmp2, tmp2, LSR #6 ;// scale down + STRB tmp2, [mb],#1 ;// store row 1 col 1 + + MOV tmp8, tmp8, LSR #6 ;// scale down + STRB tmp8, [mb,#8] ;// store row 2 col 2 + MOV tmp5, tmp5, LSR #6 ;// scale down + STRB tmp5, [mb],#1 ;// store row 1 col 2 + + + SUBS count, count, #2<<28 + BCS loop1_x + + AND tmp2, count, #0x00F00000 + + ADDS mb, mb, #16 + SBC mb, mb, tmp2, LSR #20 + ADD ptrA, ptrA, width, LSL #1 + SBC ptrA, ptrA, tmp2, LSR #20 + + ADDS count, count, #0xE << 24 + BGE loop1_y + + ;/////////////////////////////////////////////////////////////////////////// + ;// Cr + ;/////////////////////////////////////////////////////////////////////////// + LDR height, [sp,#0xfc] ;// height + LDR ref, [sp, #0xc4] ;// ref + LDR tmp1, [sp, #0xd0] ;// y0 + LDR tmp2, [sp, #0xcc] ;// x0 + LDR mb, [sp, #0xc8] ;// predPartChroma + + ADD tmp1, height, tmp1 + MLA tmp3, tmp1, width, tmp2 + ADD ptrA, ref, tmp3 + ADD mb, mb, #64 + + AND count, count, #0x00FFFFFF + AND tmp1, count, #0x000F0000 + ADD count, count, tmp1, LSL #8 + AND tmp2, count, #0x00F00000 + + ;// 2x2 pels per iteration + ;// bilinear vertical interpolation +loop2_y + ADD count, count, tmp2, LSL #8 +loop2_x + ;// Process 2x2 block + LDRB tmp2, [ptrA,width] ;// 2 row, 1 col + LDRB tmp3, [ptrA,width, LSL #1] ;// 3 row, 1 col + LDRB tmp1, [ptrA],#1 ;// 1 row, 1 col + + LDRB tmp5, [ptrA,width] ;// 2 row, 2 col + LDRB tmp6, [ptrA,width, LSL #1] ;// 3 row, 2 col + LDRB tmp4, [ptrA],#1 ;// 1 row, 2 col + + PKHBT tmp1, tmp1, tmp2, LSL #16 ;// |B|A| + PKHBT tmp2, tmp2, tmp3, LSL #16 ;// |C|B| + PKHBT tmp4, tmp4, tmp5, LSL #16 ;// |B|A| + + SMLAD tmp7, tmp2, valY, c32 ;// multiply + PKHBT tmp5, tmp5, tmp6, LSL #16 ;// |C|B| + SMLAD tmp2, tmp1, valY, c32 ;// multiply + SMLAD tmp8, tmp5, valY, c32 ;// multiply + SMLAD tmp5, tmp4, valY, c32 ;// multiply + + MOV tmp7, tmp7, LSR #6 ;// scale down + STRB tmp7, [mb,#8] ;// store row 2 col 1 + MOV tmp2, tmp2, LSR #6 ;// scale down + STRB tmp2, [mb],#1 ;// store row 1 col 1 + + MOV tmp8, tmp8, LSR #6 ;// scale down + STRB tmp8, [mb,#8] ;// store row 2 col 2 + MOV tmp5, tmp5, LSR #6 ;// scale down + STRB tmp5, [mb],#1 ;// store row 1 col 2 + + + SUBS count, count, #2<<28 + BCS loop2_x + + AND tmp2, count, #0x00F00000 + + ADDS mb, mb, #16 + SBC mb, mb, tmp2, LSR #20 + ADD ptrA, ptrA, width, LSL #1 + SBC ptrA, ptrA, tmp2, LSR #20 + + ADDS count, count, #0xE << 24 + BGE loop2_y + + ADD sp,sp,#0xd4 + LDMFD sp!, {r4-r11,pc} + + END diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_hor_half.s b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_hor_half.s new file mode 100755 index 0000000000000000000000000000000000000000..93968b6721b9a17e99b20cca495e900eeb33c4c2 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_hor_half.s @@ -0,0 +1,251 @@ +; Copyright (C) 2009 The Android Open Source Project +; +; Licensed under the Apache License, Version 2.0 (the "License"); +; you may not use this file except in compliance with the License. +; You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, software +; distributed under the License is distributed on an "AS IS" BASIS, +; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; See the License for the specific language governing permissions and +; limitations under the License. + +;------------------------------------------------------------------------------- +;-- +;-- Abstract : ARMv6 optimized version of h264bsdInterpolateHorHalf function +;-- +;------------------------------------------------------------------------------- + + + IF :DEF: H264DEC_WINASM + ;// We dont use REQUIRE8 and PRESERVE8 for winasm + ELSE + REQUIRE8 + PRESERVE8 + ENDIF + + AREA |.text|, CODE + +;// h264bsdInterpolateHorHalf register allocation + +ref RN 0 + +mb RN 1 +buff RN 1 + +count RN 2 +x0 RN 2 + +y0 RN 3 +x_2_0 RN 3 + +width RN 4 +x_3_1 RN 4 + +height RN 5 +x_6_4 RN 5 + +partW RN 6 +x_7_5 RN 6 + +partH RN 7 +tmp1 RN 7 + +tmp2 RN 8 + +tmp3 RN 9 + +tmp4 RN 10 + +mult_20_01 RN 11 +mult_20_m5 RN 12 + +plus16 RN 14 + + +;// function exports and imports + + IMPORT h264bsdFillBlock + + EXPORT h264bsdInterpolateHorHalf + +;// Horizontal filter approach +;// +;// Basic idea in horizontal filtering is to adjust coefficients +;// like below. Calculation is done with 16-bit maths. +;// +;// Reg x_2_0 x_3_1 x_6_4 x_7_5 x_2_0 +;// [ 2 0 ] [ 3 1 ] [ 6 4 ] [ 7 5 ] [ 10 8 ] ... +;// y_0 = 20 1 20 -5 -5 1 +;// y_1 = -5 20 1 1 20 -5 +;// y_2 = 1 -5 -5 20 1 20 +;// y_3 = 1 20 -5 -5 20 1 + + +h264bsdInterpolateHorHalf + STMFD sp!, {r0-r11, lr} + SUB sp, sp, #0x1e4 + + CMP x0, #0 + BLT do_fill ;// (x0 < 0) + LDR partW, [sp,#0x220] ;// partWidth + ADD tmp4, x0, partW ;// (x0+partWidth) + ADD tmp4, tmp4, #5 ;// (y0+partW+5) + LDR width, [sp,#0x218] ;// width + CMP tmp4, width + BHI do_fill ;// (x0+partW)>width + + CMP y0, #0 + BLT do_fill ;// (y0 < 0) + LDR partH, [sp,#0x224] ;// partHeight + ADD tmp2, y0, partH ;// (y0+partHeight) + LDR height, [sp,#0x21c] ;// height + CMP tmp2, height + BLS skip_fill ;// no overfill needed + + +do_fill + LDR partH, [sp,#0x224] ;// partHeight + LDR height, [sp,#0x21c] ;// height + LDR partW, [sp,#0x220] ;// partWidth + ADD tmp4, partW, #5 ;// tmp4 = partW + 5; + STMIB sp, {height, tmp4} ;// sp+4 = height, sp+8 = partWidth+5 + STR partH, [sp,#0xc] ;// sp+c = partHeight + STR tmp4, [sp,#0x10] ;// sp+10 = partWidth+5 + LDR width, [sp,#0x218] ;// width + STR width, [sp,#0] ;// sp+0 = width + ADD buff, sp, #0x28 ;// buff = p1[21*21/4+1] + BL h264bsdFillBlock + + MOV x0, #0 + STR x0,[sp,#0x1ec] ;// x0 = 0 + STR x0,[sp,#0x1f0] ;// y0 = 0 + ADD ref,sp,#0x28 ;// ref = p1 + STR tmp4, [sp,#0x218] ;// width = partWidth+5 + + +skip_fill + LDR x0 ,[sp,#0x1ec] ;// x0 + LDR y0 ,[sp,#0x1f0] ;// y0 + LDR width, [sp,#0x218] ;// width + MLA tmp2, width, y0, x0 ;// y0*width+x0 + ADD ref, ref, tmp2 ;// ref += y0*width+x0 + ADD ref, ref, #8 ;// ref = ref+8 + LDR mb, [sp, #0x1e8] ;// mb + + ;// pack values to count register + ;// [31:28] loop_x (partWidth-1) + ;// [27:24] loop_y (partHeight-1) + ;// [23:20] partWidth-1 + ;// [19:16] partHeight-1 + ;// [15:00] width + MOV count, width + SUB partW, partW, #1; + SUB partH, partH, #1; + ADD tmp2, partH, partW, LSL #4 + ADD count, count, tmp2, LSL #16 + + + LDR mult_20_01, = 0x00140001 + LDR mult_20_m5, = 0x0014FFFB + MOV plus16, #16 + AND tmp1, count, #0x000F0000 ;// partHeight-1 + AND tmp3, count, #0x00F00000 ;// partWidth-1 + ADD count, count, tmp1, LSL #8 +loop_y + LDR x_3_1, [ref, #-8] + ADD count, count, tmp3, LSL #8 + LDR x_7_5, [ref, #-4] + UXTB16 x_2_0, x_3_1 + UXTB16 x_3_1, x_3_1, ROR #8 + UXTB16 x_6_4, x_7_5 + +loop_x + UXTB16 x_7_5, x_7_5, ROR #8 + + SMLAD tmp1, x_2_0, mult_20_01, plus16 + SMLATB tmp3, x_2_0, mult_20_01, plus16 + SMLATB tmp2, x_2_0, mult_20_m5, plus16 + SMLATB tmp4, x_3_1, mult_20_01, plus16 + + SMLAD tmp1, x_3_1, mult_20_m5, tmp1 + SMLATB tmp3, x_3_1, mult_20_m5, tmp3 + SMLAD tmp2, x_3_1, mult_20_01, tmp2 + LDR x_3_1, [ref], #4 + SMLAD tmp4, x_6_4, mult_20_m5, tmp4 + + SMLABB tmp1, x_6_4, mult_20_m5, tmp1 + SMLADX tmp3, x_6_4, mult_20_m5, tmp3 + SMLADX tmp2, x_6_4, mult_20_01, tmp2 + SMLADX tmp4, x_7_5, mult_20_m5, tmp4 + + SMLABB tmp1, x_7_5, mult_20_01, tmp1 + UXTB16 x_2_0, x_3_1 + SMLABB tmp2, x_7_5, mult_20_m5, tmp2 + SMLADX tmp3, x_7_5, mult_20_01, tmp3 + SMLABB tmp4, x_2_0, mult_20_01, tmp4 + + MOV tmp2, tmp2, ASR #5 + MOV tmp1, tmp1, ASR #5 + PKHBT tmp2, tmp2, tmp4, LSL #(16-5) + PKHBT tmp1, tmp1, tmp3, LSL #(16-5) + USAT16 tmp2, #8, tmp2 + USAT16 tmp1, #8, tmp1 + + SUBS count, count, #4<<28 + ORR tmp1, tmp1, tmp2, LSL #8 + STR tmp1, [mb], #4 + BCC next_y + + UXTB16 x_3_1, x_3_1, ROR #8 + + SMLAD tmp1, x_6_4, mult_20_01, plus16 + SMLATB tmp3, x_6_4, mult_20_01, plus16 + SMLATB tmp2, x_6_4, mult_20_m5, plus16 + SMLATB tmp4, x_7_5, mult_20_01, plus16 + + SMLAD tmp1, x_7_5, mult_20_m5, tmp1 + SMLATB tmp3, x_7_5, mult_20_m5, tmp3 + SMLAD tmp2, x_7_5, mult_20_01, tmp2 + LDR x_7_5, [ref], #4 + SMLAD tmp4, x_2_0, mult_20_m5, tmp4 + + SMLABB tmp1, x_2_0, mult_20_m5, tmp1 + SMLADX tmp3, x_2_0, mult_20_m5, tmp3 + SMLADX tmp2, x_2_0, mult_20_01, tmp2 + SMLADX tmp4, x_3_1, mult_20_m5, tmp4 + + SMLABB tmp1, x_3_1, mult_20_01, tmp1 + UXTB16 x_6_4, x_7_5 + SMLABB tmp2, x_3_1, mult_20_m5, tmp2 + SMLADX tmp3, x_3_1, mult_20_01, tmp3 + SMLABB tmp4, x_6_4, mult_20_01, tmp4 + + MOV tmp2, tmp2, ASR #5 + MOV tmp1, tmp1, ASR #5 + PKHBT tmp2, tmp2, tmp4, LSL #(16-5) + PKHBT tmp1, tmp1, tmp3, LSL #(16-5) + USAT16 tmp2, #8, tmp2 + USAT16 tmp1, #8, tmp1 + + SUBS count, count, #4<<28 + ORR tmp1, tmp1, tmp2, LSL #8 + STR tmp1, [mb], #4 + BCS loop_x + +next_y + AND tmp3, count, #0x00F00000 ;// partWidth-1 + SMLABB ref, count, mult_20_01, ref ;// +width + ADDS mb, mb, #16 ;// +16, Carry=0 + SBC mb, mb, tmp3, LSR #20 ;// -(partWidth-1)-1 + SBC ref, ref, tmp3, LSR #20 ;// -(partWidth-1)-1 + ADDS count, count, #(1<<28)-(1<<24) + BGE loop_y + + ADD sp,sp,#0x1f4 + LDMFD sp!, {r4-r11, pc} + + END + diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_hor_quarter.s b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_hor_quarter.s new file mode 100755 index 0000000000000000000000000000000000000000..de243d47a60f35889649e0289c4b498fb93a7286 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_hor_quarter.s @@ -0,0 +1,273 @@ +; Copyright (C) 2009 The Android Open Source Project +; +; Licensed under the Apache License, Version 2.0 (the "License"); +; you may not use this file except in compliance with the License. +; You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, software +; distributed under the License is distributed on an "AS IS" BASIS, +; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; See the License for the specific language governing permissions and +; limitations under the License. + +;------------------------------------------------------------------------------- +;-- +;-- Abstract : ARMv6 optimized version of h264bsdInterpolateHorQuarter function +;-- +;------------------------------------------------------------------------------- + + + IF :DEF: H264DEC_WINASM + ;// We dont use REQUIRE8 and PRESERVE8 for winasm + ELSE + REQUIRE8 + PRESERVE8 + ENDIF + + AREA |.text|, CODE + +;// h264bsdInterpolateHorQuarter register allocation + +ref RN 0 + +mb RN 1 +buff RN 1 + +count RN 2 +x0 RN 2 + +y0 RN 3 +x_2_0 RN 3 + +width RN 4 +x_3_1 RN 4 + +height RN 5 +x_6_4 RN 5 + +partW RN 6 +x_7_5 RN 6 + +partH RN 7 +tmp1 RN 7 + +tmp2 RN 8 + +tmp3 RN 9 + +tmp4 RN 10 + +mult_20_01 RN 11 + +mult_20_m5 RN 12 + +plus16 RN 14 + + +;// function exports and imports + + IMPORT h264bsdFillBlock + + EXPORT h264bsdInterpolateHorQuarter + + +;// Horizontal filter approach +;// +;// Basic idea in horizontal filtering is to adjust coefficients +;// like below. Calculation is done with 16-bit maths. +;// +;// Reg x_2_0 x_3_1 x_6_4 x_7_5 x_2_0 +;// [ 2 0 ] [ 3 1 ] [ 6 4 ] [ 7 5 ] [ 10 8 ] ... +;// y_0 = 20 1 20 -5 -5 1 +;// y_1 = -5 20 1 1 20 -5 +;// y_2 = 1 -5 -5 20 1 20 +;// y_3 = 1 20 -5 -5 20 1 + + +h264bsdInterpolateHorQuarter + STMFD sp!, {r0-r11, lr} + SUB sp, sp, #0x1e4 + + CMP x0, #0 + BLT do_fill ;// (x0 < 0) + LDR partW, [sp,#0x220] ;// partWidth + ADD tmp4, x0, partW ;// (x0+partWidth) + ADD tmp4, tmp4, #5 ;// (y0+partW+5) + LDR width, [sp,#0x218] ;// width + CMP tmp4, width + BHI do_fill ;// (x0+partW)>width + + CMP y0, #0 + BLT do_fill ;// (y0 < 0) + LDR partH, [sp,#0x224] ;// partHeight + ADD tmp2, y0, partH ;// (y0+partHeight) + LDR height, [sp,#0x21c] ;// height + CMP tmp2, height + BLS skip_fill ;// no overfill needed + + +do_fill + LDR partH, [sp,#0x224] ;// partHeight + LDR height, [sp,#0x21c] ;// height + LDR partW, [sp,#0x220] ;// partWidth + ADD tmp4, partW, #5 ;// tmp4 = partW + 5; + STMIB sp, {height, tmp4} ;// sp+4 = height, sp+8 = partWidth+5 + STR partH, [sp,#0xc] ;// sp+c = partHeight + STR tmp4, [sp,#0x10] ;// sp+10 = partWidth+5 + LDR width, [sp,#0x218] ;// width + STR width, [sp,#0] ;// sp+0 = width + ADD buff, sp, #0x28 ;// buff = p1[21*21/4+1] + BL h264bsdFillBlock + + MOV x0, #0 + STR x0,[sp,#0x1ec] ;// x0 = 0 + STR x0,[sp,#0x1f0] ;// y0 = 0 + ADD ref,sp,#0x28 ;// ref = p1 + STR tmp4, [sp,#0x218] ;// width = partWidth+5 + + +skip_fill + LDR x0 ,[sp,#0x1ec] ;// x0 + LDR y0 ,[sp,#0x1f0] ;// y0 + LDR width, [sp,#0x218] ;// width + MLA tmp2, width, y0, x0 ;// y0*width+x0 + ADD ref, ref, tmp2 ;// ref += y0*width+x0 + ADD ref, ref, #8 ;// ref = ref+8 + LDR mb, [sp, #0x1e8] ;// mb + + ;// pack values to count register + ;// [31:28] loop_x (partWidth-1) + ;// [27:24] loop_y (partHeight-1) + ;// [23:20] partWidth-1 + ;// [19:16] partHeight-1 + ;// [15:00] width + MOV count, width + SUB partW, partW, #1; + SUB partH, partH, #1; + ADD tmp2, partH, partW, LSL #4 + ADD count, count, tmp2, LSL #16 + + + LDR mult_20_01, = 0x00140001 + LDR mult_20_m5, = 0x0014FFFB + MOV plus16, #16 + AND tmp1, count, #0x000F0000 ;// partHeight-1 + AND tmp3, count, #0x00F00000 ;// partWidth-1 + ADD count, count, tmp1, LSL #8 +loop_y + LDR x_3_1, [ref, #-8] + ADD count, count, tmp3, LSL #8 + LDR x_7_5, [ref, #-4] + UXTB16 x_2_0, x_3_1 + UXTB16 x_3_1, x_3_1, ROR #8 + UXTB16 x_6_4, x_7_5 + +loop_x + UXTB16 x_7_5, x_7_5, ROR #8 + + SMLAD tmp1, x_2_0, mult_20_01, plus16 + SMLATB tmp3, x_2_0, mult_20_01, plus16 + SMLATB tmp2, x_2_0, mult_20_m5, plus16 + SMLATB tmp4, x_3_1, mult_20_01, plus16 + + SMLAD tmp1, x_3_1, mult_20_m5, tmp1 + SMLATB tmp3, x_3_1, mult_20_m5, tmp3 + SMLAD tmp2, x_3_1, mult_20_01, tmp2 + LDR x_3_1, [ref], #4 + SMLAD tmp4, x_6_4, mult_20_m5, tmp4 + + SMLABB tmp1, x_6_4, mult_20_m5, tmp1 + SMLADX tmp3, x_6_4, mult_20_m5, tmp3 + SMLADX tmp2, x_6_4, mult_20_01, tmp2 + SMLADX tmp4, x_7_5, mult_20_m5, tmp4 + + SMLABB tmp1, x_7_5, mult_20_01, tmp1 + UXTB16 x_2_0, x_3_1 + SMLABB tmp2, x_7_5, mult_20_m5, tmp2 + SMLADX tmp3, x_7_5, mult_20_01, tmp3 + SMLABB tmp4, x_2_0, mult_20_01, tmp4 + + MOV tmp2, tmp2, ASR #5 + MOV tmp1, tmp1, ASR #5 + PKHBT tmp2, tmp2, tmp4, LSL #(16-5) + PKHBT tmp1, tmp1, tmp3, LSL #(16-5) + LDR tmp4, [sp, #0x228] + USAT16 tmp2, #8, tmp2 + USAT16 tmp1, #8, tmp1 + SUB tmp4, tmp4, #10 + + SUBS count, count, #4<<28 + LDR tmp3, [ref, tmp4] + ORR tmp1, tmp1, tmp2, LSL #8 + +;// quarter pel position + LDR tmp2, = 0x80808080 + MVN tmp3, tmp3 + UHSUB8 tmp1, tmp1, tmp3 + EOR tmp1, tmp1, tmp2 + STR tmp1, [mb], #4 + + BCC next_y + + UXTB16 x_3_1, x_3_1, ROR #8 + + SMLAD tmp1, x_6_4, mult_20_01, plus16 + SMLATB tmp3, x_6_4, mult_20_01, plus16 + SMLATB tmp2, x_6_4, mult_20_m5, plus16 + SMLATB tmp4, x_7_5, mult_20_01, plus16 + + SMLAD tmp1, x_7_5, mult_20_m5, tmp1 + SMLATB tmp3, x_7_5, mult_20_m5, tmp3 + SMLAD tmp2, x_7_5, mult_20_01, tmp2 + LDR x_7_5, [ref], #4 + SMLAD tmp4, x_2_0, mult_20_m5, tmp4 + + SMLABB tmp1, x_2_0, mult_20_m5, tmp1 + SMLADX tmp3, x_2_0, mult_20_m5, tmp3 + SMLADX tmp2, x_2_0, mult_20_01, tmp2 + SMLADX tmp4, x_3_1, mult_20_m5, tmp4 + + SMLABB tmp1, x_3_1, mult_20_01, tmp1 + UXTB16 x_6_4, x_7_5 + SMLABB tmp2, x_3_1, mult_20_m5, tmp2 + SMLADX tmp3, x_3_1, mult_20_01, tmp3 + SMLABB tmp4, x_6_4, mult_20_01, tmp4 + + MOV tmp2, tmp2, ASR #5 + MOV tmp1, tmp1, ASR #5 + PKHBT tmp2, tmp2, tmp4, LSL #(16-5) + PKHBT tmp1, tmp1, tmp3, LSL #(16-5) + LDR tmp4, [sp, #0x228] + USAT16 tmp2, #8, tmp2 + USAT16 tmp1, #8, tmp1 + SUB tmp4, tmp4, #10 + + SUBS count, count, #4<<28 + LDR tmp3, [ref, tmp4] + ORR tmp1, tmp1, tmp2, LSL #8 + +;// quarter pel + LDR tmp2, = 0x80808080 + MVN tmp3, tmp3 + UHSUB8 tmp1, tmp1, tmp3 + EOR tmp1, tmp1, tmp2 + + STR tmp1, [mb], #4 + BCS loop_x + +next_y + AND tmp3, count, #0x00F00000 ;// partWidth-1 + SMLABB ref, count, mult_20_01, ref ;// +width + ADDS mb, mb, #16 ;// +16, Carry=0 + SBC mb, mb, tmp3, LSR #20 ;// -(partWidth-1)-1 + SBC ref, ref, tmp3, LSR #20 ;// -(partWidth-1)-1 + ADDS count, count, #(1<<28)-(1<<24) + BGE loop_y + + ADD sp,sp,#0x1f4 + LDMFD sp!, {r4-r11, pc} + + END + diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_hor_ver_quarter.s b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_hor_ver_quarter.s new file mode 100755 index 0000000000000000000000000000000000000000..1c79b39e9e02f377f4dae6a621d14e3dc8fe8204 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_hor_ver_quarter.s @@ -0,0 +1,536 @@ +; Copyright (C) 2009 The Android Open Source Project +; +; Licensed under the Apache License, Version 2.0 (the "License"); +; you may not use this file except in compliance with the License. +; You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, software +; distributed under the License is distributed on an "AS IS" BASIS, +; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; See the License for the specific language governing permissions and +; limitations under the License. + +;------------------------------------------------------------------------------- +;-- +;-- Abstract : ARMv6 optimized version of h264bsdInterpolateHorVerQuarter +;-- function +;-- +;------------------------------------------------------------------------------- + + + IF :DEF: H264DEC_WINASM + ;// We dont use REQUIRE8 and PRESERVE8 for winasm + ELSE + REQUIRE8 + PRESERVE8 + ENDIF + + AREA |.text|, CODE + +;// h264bsdInterpolateHorVerQuarter register allocation + +ref RN 0 + +mb RN 1 +buff RN 1 + +count RN 2 +x0 RN 2 + +y0 RN 3 +x_2_0 RN 3 +res RN 3 + +x_3_1 RN 4 +tmp1 RN 4 + +height RN 5 +x_6_4 RN 5 +tmp2 RN 5 + +partW RN 6 +x_7_5 RN 6 +tmp3 RN 6 + +partH RN 7 +tmp4 RN 7 + +tmp5 RN 8 + +tmp6 RN 9 + +tmpa RN 10 + +mult_20_01 RN 11 +tmpb RN 11 + +mult_20_m5 RN 12 +width RN 12 + +plus16 RN 14 + + +;// function exports and imports + + IMPORT h264bsdFillBlock + + EXPORT h264bsdInterpolateHorVerQuarter + +;// Horizontal filter approach +;// +;// Basic idea in horizontal filtering is to adjust coefficients +;// like below. Calculation is done with 16-bit maths. +;// +;// Reg x_2_0 x_3_1 x_6_4 x_7_5 x_2_0 +;// [ 2 0 ] [ 3 1 ] [ 6 4 ] [ 7 5 ] [ 10 8 ] ... +;// y_0 = 20 1 20 -5 -5 1 +;// y_1 = -5 20 1 1 20 -5 +;// y_2 = 1 -5 -5 20 1 20 +;// y_3 = 1 20 -5 -5 20 1 + + +h264bsdInterpolateHorVerQuarter + STMFD sp!, {r0-r11, lr} + SUB sp, sp, #0x1e4 + + CMP x0, #0 + BLT do_fill ;// (x0 < 0) + LDR partW, [sp,#0x220] ;// partWidth + LDR width, [sp,#0x218] ;// width + ADD tmpa, x0, partW ;// (x0+partWidth) + ADD tmpa, tmpa, #5 ;// (x0+partW+5) + CMP tmpa, width + BHI do_fill ;// (x0+partW)>width + + CMP y0, #0 + BLT do_fill ;// (y0 < 0) + LDR partH, [sp,#0x224] ;// partHeight + LDR height, [sp,#0x21c] ;// height + ADD tmp5, y0, partH ;// (y0+partHeight) + ADD tmp5, tmp5, #5 ;// (y0+partH+5) + CMP tmp5, height + BLS skip_fill ;// no overfill needed + + +do_fill + LDR partH, [sp,#0x224] ;// partHeight + LDR partW, [sp,#0x220] ;// partWidth + LDR height, [sp,#0x21c] ;// height + ADD tmp5, partH, #5 ;// tmp5 = partH + 5 + ADD tmpa, partW, #5 ;// tmpa = partW + 5 + STMIB sp, {height, tmpa} ;// sp+4 = height, sp+8 = partWidth+5 + LDR width, [sp,#0x218] ;// width + STR tmp5, [sp,#0xc] ;// sp+c = partHeight+5 + STR tmpa, [sp,#0x10] ;// sp+10 = partWidth+5 + STR width, [sp,#0] ;// sp+0 = width + ADD buff, sp, #0x28 ;// buff = p1[21*21/4+1] + BL h264bsdFillBlock + + MOV x0, #0 + STR x0,[sp,#0x1ec] ;// x0 = 0 + STR x0,[sp,#0x1f0] ;// y0 = 0 + ADD ref,sp,#0x28 ;// ref = p1 + STR tmpa, [sp,#0x218] ;// width = partWidth+5 + + +skip_fill + LDR x0 ,[sp,#0x1ec] ;// x0 + LDR y0 ,[sp,#0x1f0] ;// y0 + LDR width, [sp,#0x218] ;// width + LDR tmp6, [sp,#0x228] ;// horVerOffset + LDR mb, [sp, #0x1e8] ;// mb + MLA tmp5, width, y0, x0 ;// y0*width+x0 + ADD ref, ref, tmp5 ;// ref += y0*width+x0 + STR ref, [sp, #0x1e4] ;// store "ref" for vertical filtering + AND tmp6, tmp6, #2 ;// calculate ref for horizontal filter + MOV tmpa, #2 + ADD tmp6, tmpa, tmp6, LSR #1 + MLA ref, tmp6, width, ref + ADD ref, ref, #8 ;// ref = ref+8 + + ;// pack values to count register + ;// [31:28] loop_x (partWidth-1) + ;// [27:24] loop_y (partHeight-1) + ;// [23:20] partWidth-1 + ;// [19:16] partHeight-1 + ;// [15:00] width + MOV count, width + SUB partW, partW, #1; + SUB partH, partH, #1; + ADD tmp5, partH, partW, LSL #4 + ADD count, count, tmp5, LSL #16 + + + LDR mult_20_01, = 0x00140001 ;// constant multipliers + LDR mult_20_m5, = 0x0014FFFB ;// constant multipliers + MOV plus16, #16 ;// constant for add + AND tmp4, count, #0x000F0000 ;// partHeight-1 + AND tmp6, count, #0x00F00000 ;// partWidth-1 + ADD count, count, tmp4, LSL #8 ;// partH-1 to lower part of top byte + +;// HORIZONTAL PART + +loop_y_hor + LDR x_3_1, [ref, #-8] + ADD count, count, tmp6, LSL #8 ;// partW-1 to upper part of top byte + LDR x_7_5, [ref, #-4] + UXTB16 x_2_0, x_3_1 + UXTB16 x_3_1, x_3_1, ROR #8 + UXTB16 x_6_4, x_7_5 + +loop_x_hor + UXTB16 x_7_5, x_7_5, ROR #8 + + SMLAD tmp4, x_2_0, mult_20_01, plus16 + SMLATB tmp6, x_2_0, mult_20_01, plus16 + SMLATB tmp5, x_2_0, mult_20_m5, plus16 + SMLATB tmpa, x_3_1, mult_20_01, plus16 + + SMLAD tmp4, x_3_1, mult_20_m5, tmp4 + SMLATB tmp6, x_3_1, mult_20_m5, tmp6 + SMLAD tmp5, x_3_1, mult_20_01, tmp5 + LDR x_3_1, [ref], #4 + SMLAD tmpa, x_6_4, mult_20_m5, tmpa + + SMLABB tmp4, x_6_4, mult_20_m5, tmp4 + SMLADX tmp6, x_6_4, mult_20_m5, tmp6 + SMLADX tmp5, x_6_4, mult_20_01, tmp5 + SMLADX tmpa, x_7_5, mult_20_m5, tmpa + + SMLABB tmp4, x_7_5, mult_20_01, tmp4 + UXTB16 x_2_0, x_3_1 + SMLABB tmp5, x_7_5, mult_20_m5, tmp5 + SMLADX tmp6, x_7_5, mult_20_01, tmp6 + SMLABB tmpa, x_2_0, mult_20_01, tmpa + + MOV tmp5, tmp5, ASR #5 + MOV tmp4, tmp4, ASR #5 + PKHBT tmp5, tmp5, tmpa, LSL #(16-5) + PKHBT tmp4, tmp4, tmp6, LSL #(16-5) + USAT16 tmp5, #8, tmp5 + USAT16 tmp4, #8, tmp4 + + SUBS count, count, #4<<28 + ORR tmp4, tmp4, tmp5, LSL #8 + STR tmp4, [mb], #4 + BCC next_y_hor + + UXTB16 x_3_1, x_3_1, ROR #8 + + SMLAD tmp4, x_6_4, mult_20_01, plus16 + SMLATB tmp6, x_6_4, mult_20_01, plus16 + SMLATB tmp5, x_6_4, mult_20_m5, plus16 + SMLATB tmpa, x_7_5, mult_20_01, plus16 + + SMLAD tmp4, x_7_5, mult_20_m5, tmp4 + SMLATB tmp6, x_7_5, mult_20_m5, tmp6 + SMLAD tmp5, x_7_5, mult_20_01, tmp5 + LDR x_7_5, [ref], #4 + SMLAD tmpa, x_2_0, mult_20_m5, tmpa + + SMLABB tmp4, x_2_0, mult_20_m5, tmp4 + SMLADX tmp6, x_2_0, mult_20_m5, tmp6 + SMLADX tmp5, x_2_0, mult_20_01, tmp5 + SMLADX tmpa, x_3_1, mult_20_m5, tmpa + + SMLABB tmp4, x_3_1, mult_20_01, tmp4 + UXTB16 x_6_4, x_7_5 + SMLABB tmp5, x_3_1, mult_20_m5, tmp5 + SMLADX tmp6, x_3_1, mult_20_01, tmp6 + SMLABB tmpa, x_6_4, mult_20_01, tmpa + + MOV tmp5, tmp5, ASR #5 + MOV tmp4, tmp4, ASR #5 + PKHBT tmp5, tmp5, tmpa, LSL #(16-5) + PKHBT tmp4, tmp4, tmp6, LSL #(16-5) + USAT16 tmp5, #8, tmp5 + USAT16 tmp4, #8, tmp4 + + SUBS count, count, #4<<28 + ORR tmp4, tmp4, tmp5, LSL #8 + STR tmp4, [mb], #4 + BCS loop_x_hor + +next_y_hor + AND tmp6, count, #0x00F00000 ;// partWidth-1 + SMLABB ref, count, mult_20_01, ref ;// +width + ADDS mb, mb, #16 ;// +16, Carry=0 + SBC mb, mb, tmp6, LSR #20 ;// -(partWidth-1)-1 + SBC ref, ref, tmp6, LSR #20 ;// -(partWidth-1)-1 + ADDS count, count, #(1<<28)-(1<<24) ;// decrement counter (partW) + BGE loop_y_hor + + + +;// VERTICAL PART +;// +;// Approach to vertical interpolation +;// +;// Interpolation is done by using 32-bit loads and stores +;// and by using 16 bit arithmetic. 4x4 block is processed +;// in each round. +;// +;// |a_11|a_11|a_11|a_11|...|a_1n|a_1n|a_1n|a_1n| +;// |b_11|b_11|b_11|b_11|...|b_1n|b_1n|b_1n|b_1n| +;// |c_11|c_11|c_11|c_11|...|c_1n|c_1n|c_1n|c_1n| +;// |d_11|d_11|d_11|d_11|...|d_1n|d_1n|d_1n|d_1n| +;// .. +;// .. +;// |a_m1|a_m1|a_m1|a_m1|... +;// |b_m1|b_m1|b_m1|b_m1|... +;// |c_m1|c_m1|c_m1|c_m1|... +;// |d_m1|d_m1|d_m1|d_m1|... + +;// Approach to bilinear interpolation to quarter pel position. +;// 4 bytes are processed parallel +;// +;// algorithm (a+b+1)/2. Rouding upwards +1 can be achieved by +;// negating second operand to get one's complement (instead of 2's) +;// and using subtraction, EOR is used to correct sign. +;// +;// MVN b, b +;// UHSUB8 a, a, b +;// EOR a, a, 0x80808080 + + + LDR ref, [sp, #0x1e4] ;// ref + LDR tmpa, [sp, #0x228] ;// horVerOffset + LDR mb, [sp, #0x1e8] ;// mb + LDR width, [sp, #0x218] ;// width + ADD ref, ref, #2 ;// calculate correct position + AND tmpa, tmpa, #1 + ADD ref, ref, tmpa + LDR plus16, = 0x00100010 ;// +16 to lower and upperf halfwords + AND count, count, #0x00FFFFFF ;// partWidth-1 + + AND tmpa, count, #0x000F0000 ;// partHeight-1 + ADD count, count, tmpa, LSL #8 + +loop_y + ADD count, count, tmp6, LSL #8 ;// partWidth-1 + +loop_x + LDR tmp1, [ref], width ;// |a4|a3|a2|a1| + LDR tmp2, [ref], width ;// |c4|c3|c2|c1| + LDR tmp3, [ref], width ;// |g4|g3|g2|g1| + LDR tmp4, [ref], width ;// |m4|m3|m2|m1| + LDR tmp5, [ref], width ;// |r4|r3|r2|r1| + LDR tmp6, [ref], width ;// |t4|t3|t2|t1| + + ;// first four pixels + UXTB16 tmpa, tmp3 ;// |g3|g1| + UXTAB16 tmpa, tmpa, tmp4 ;// |g3+m3|g1+m1| + UXTB16 tmpb, tmp2 ;// |c3|c1| + ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M) + + UXTAB16 tmpb, tmpb, tmp5 ;// |c3+r3|c1+r1| + ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M) + UXTAB16 tmpa, tmpa, tmp1 ;// 16+20(G+M)+A + UXTAB16 tmpa, tmpa, tmp6 ;// 16+20(G+M)+A+T + + ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R) + SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R) + + USAT16 tmpb, #13, tmpa ;// saturate + LDR res, = 0x00FF00FF + UXTB16 tmpa, tmp3, ROR #8 ;// |g4|g2| + UXTAB16 tmpa, tmpa, tmp4, ROR #8 ;// |g4+m4|g2+m2| + AND res, res, tmpb, LSR #5 ;// mask and divide by 32 + + ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M) + UXTB16 tmpb, tmp2, ROR #8 ;// |c4|c2| + ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M) + UXTAB16 tmpb, tmpb, tmp5, ROR #8 ;// |c4+r4|c2+r2| + UXTAB16 tmpa, tmpa, tmp1, ROR #8 ;// 16+20(G+M)+A + UXTAB16 tmpa, tmpa, tmp6, ROR #8 ;// 16+20(G+M)+A+T + + ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R) + SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R) + + USAT16 tmpb, #13, tmpa ;// saturate + LDR tmp1, [mb] + LDR tmpa, = 0xFF00FF00 + MVN tmp1, tmp1 + AND tmpa, tmpa, tmpb, LSL #3 ;// mask and divede by 32 + ORR res, res, tmpa + + LDR tmpa, = 0x80808080 + UHSUB8 res, res, tmp1 ;// bilinear interpolation + LDR tmp1, [ref], width ;// load next row + EOR res, res, tmpa ;// correct sign + + STR res, [mb], #16 ;// next row (mb) + + + ;// tmp2 = |a4|a3|a2|a1| + ;// tmp3 = |c4|c3|c2|c1| + ;// tmp4 = |g4|g3|g2|g1| + ;// tmp5 = |m4|m3|m2|m1| + ;// tmp6 = |r4|r3|r2|r1| + ;// tmp1 = |t4|t3|t2|t1| + + ;// second four pixels + UXTB16 tmpa, tmp4 ;// |g3|g1| + UXTAB16 tmpa, tmpa, tmp5 ;// |g3+m3|g1+m1| + UXTB16 tmpb, tmp3 ;// |c3|c1| + ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M) + UXTAB16 tmpb, tmpb, tmp6 ;// |c3+r3|c1+r1| + ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M) + UXTAB16 tmpa, tmpa, tmp2 ;// 16+20(G+M)+A + UXTAB16 tmpa, tmpa, tmp1 ;// 16+20(G+M)+A+T + + ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R) + SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R) + + USAT16 tmpb, #13, tmpa ;// saturate + LDR res, = 0x00FF00FF + UXTB16 tmpa, tmp4, ROR #8 ;// |g4|g2| + UXTAB16 tmpa, tmpa, tmp5, ROR #8 ;// |g4+m4|g2+m2| + AND res, res, tmpb, LSR #5 ;// mask and divide by 32 + + ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M) + UXTB16 tmpb, tmp3, ROR #8 ;// |c4|c2| + ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M) + UXTAB16 tmpb, tmpb, tmp6, ROR #8 ;// |c4+r4|c2+r2| + UXTAB16 tmpa, tmpa, tmp2, ROR #8 ;// 16+20(G+M)+A + UXTAB16 tmpa, tmpa, tmp1, ROR #8 ;// 16+20(G+M)+A+T + + ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R) + SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R) + + USAT16 tmpb, #13, tmpa ;// saturate + LDR tmp2, [mb] + LDR tmpa, = 0xFF00FF00 + MVN tmp2, tmp2 + + AND tmpa, tmpa, tmpb, LSL #3 ;// mask and divide by 32 + ORR res, res, tmpa + LDR tmpa, = 0x80808080 + UHSUB8 res, res, tmp2 ;// bilinear interpolation + LDR tmp2, [ref], width ;// load next row + EOR res, res, tmpa ;// correct sign + STR res, [mb], #16 ;// next row + + ;// tmp3 = |a4|a3|a2|a1| + ;// tmp4 = |c4|c3|c2|c1| + ;// tmp5 = |g4|g3|g2|g1| + ;// tmp6 = |m4|m3|m2|m1| + ;// tmp1 = |r4|r3|r2|r1| + ;// tmp2 = |t4|t3|t2|t1| + + ;// third four pixels + UXTB16 tmpa, tmp5 ;// |g3|g1| + UXTAB16 tmpa, tmpa, tmp6 ;// |g3+m3|g1+m1| + UXTB16 tmpb, tmp4 ;// |c3|c1| + ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M) + UXTAB16 tmpb, tmpb, tmp1 ;// |c3+r3|c1+r1| + ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M) + UXTAB16 tmpa, tmpa, tmp3 ;// 16+20(G+M)+A + UXTAB16 tmpa, tmpa, tmp2 ;// 16+20(G+M)+A+T + + ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R) + SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R) + + USAT16 tmpb, #13, tmpa ;// saturate + LDR res, = 0x00FF00FF + UXTB16 tmpa, tmp5, ROR #8 ;// |g4|g2| + UXTAB16 tmpa, tmpa, tmp6, ROR #8 ;// |g4+m4|g2+m2| + AND res, res, tmpb, LSR #5 ;// mask and divide by 32 + + ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M) + UXTB16 tmpb, tmp4, ROR #8 ;// |c4|c2| + ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M) + UXTAB16 tmpb, tmpb, tmp1, ROR #8 ;// |c4+r4|c2+r2| + UXTAB16 tmpa, tmpa, tmp3, ROR #8 ;// 16+20(G+M)+A + UXTAB16 tmpa, tmpa, tmp2, ROR #8 ;// 16+20(G+M)+A+T + + + ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R) + SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R) + + USAT16 tmpb, #13, tmpa ;// saturate + LDR tmp3, [mb] + LDR tmpa, = 0xFF00FF00 + MVN tmp3, tmp3 + + AND tmpa, tmpa, tmpb, LSL #3 ;// mask and divide by 32 + ORR res, res, tmpa + LDR tmpa, = 0x80808080 + UHSUB8 res, res, tmp3 ;// bilinear interpolation + LDR tmp3, [ref] ;// load next row + EOR res, res, tmpa ;// correct sign + STR res, [mb], #16 ;// next row + + ;// tmp4 = |a4|a3|a2|a1| + ;// tmp5 = |c4|c3|c2|c1| + ;// tmp6 = |g4|g3|g2|g1| + ;// tmp1 = |m4|m3|m2|m1| + ;// tmp2 = |r4|r3|r2|r1| + ;// tmp3 = |t4|t3|t2|t1| + + ;// fourth four pixels + UXTB16 tmpa, tmp6 ;// |g3|g1| + UXTAB16 tmpa, tmpa, tmp1 ;// |g3+m3|g1+m1| + UXTB16 tmpb, tmp5 ;// |c3|c1| + ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M) + UXTAB16 tmpb, tmpb, tmp2 ;// |c3+r3|c1+r1| + ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M) + UXTAB16 tmpa, tmpa, tmp4 ;// 16+20(G+M)+A + UXTAB16 tmpa, tmpa, tmp3 ;// 16+20(G+M)+A+T + + ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R) + SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R) + + USAT16 tmpb, #13, tmpa ;// saturate + LDR res, = 0x00FF00FF + UXTB16 tmpa, tmp6, ROR #8 ;// |g4|g2| + UXTAB16 tmpa, tmpa, tmp1, ROR #8 ;// |g4+m4|g2+m2| + AND res, res, tmpb, LSR #5 ;// mask and divide by 32 + + ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M) + UXTB16 tmpb, tmp5, ROR #8 ;// |c4|c2| + ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M) + UXTAB16 tmpb, tmpb, tmp2, ROR #8 ;// |c4+r4|c2+r2| + UXTAB16 tmpa, tmpa, tmp4, ROR #8 ;// 16+20(G+M)+A + UXTAB16 tmpa, tmpa, tmp3, ROR #8 ;// 16+20(G+M)+A+T + + ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R) + SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R) + + USAT16 tmpb, #13, tmpa ;// saturate + LDR tmp5, [mb] + LDR tmp4, = 0xFF00FF00 + MVN tmp5, tmp5 + + AND tmpa, tmp4, tmpb, LSL #3 ;// mask and divide by 32 + ORR res, res, tmpa + LDR tmpa, = 0x80808080 + UHSUB8 res, res, tmp5 ;// bilinear interpolation + + ;// decrement loop_x counter + SUBS count, count, #4<<28 ;// decrement x loop counter + + ;// calculate "ref" address for next round + SUB ref, ref, width, LSL #3 ;// ref -= 8*width; + ADD ref, ref, #4 ;// next column (4 pixels) + + EOR res, res, tmpa ;// correct sign + STR res, [mb], #-44 + + BCS loop_x + + ADDS mb, mb, #64 ;// set Carry=0 + ADD ref, ref, width, LSL #2 ;// ref += 4*width + AND tmp6, count, #0x00F00000 ;// partWidth-1 + SBC ref, ref, tmp6, LSR #20 ;// -(partWidth-1)-1 + SBC mb, mb, tmp6, LSR #20 ;// -(partWidth-1)-1 + + ADDS count, count, #0xC << 24 ;// decrement y loop counter + BGE loop_y + + ADD sp, sp, #0x1f4 + LDMFD sp!, {r4-r11, pc} + + END diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_mid_hor.s b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_mid_hor.s new file mode 100755 index 0000000000000000000000000000000000000000..a81aed77b04a008022dbeffeb48d72de09b65203 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_mid_hor.s @@ -0,0 +1,163 @@ +; Copyright (C) 2009 The Android Open Source Project +; +; Licensed under the Apache License, Version 2.0 (the "License"); +; you may not use this file except in compliance with the License. +; You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, software +; distributed under the License is distributed on an "AS IS" BASIS, +; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; See the License for the specific language governing permissions and +; limitations under the License. + +;------------------------------------------------------------------------------- +;-- +;-- Abstract : ARMv6 optimized version horizontal part of +;-- h264bsdInterpolateMid functions +;-- +;------------------------------------------------------------------------------- + + + IF :DEF: H264DEC_WINASM + ;// We dont use REQUIRE8 and PRESERVE8 for winasm + ELSE + REQUIRE8 + PRESERVE8 + ENDIF + + AREA |.text|, CODE + + +;// Register allocation + +ref RN 0 ;// pointer to current position in reference image +mb RN 1 ;// pointer to current position in interpolated mb +count RN 2 ;// bit-packed width and count values + +x_2_0 RN 4 +x_3_1 RN 5 +x_6_4 RN 6 +x_7_5 RN 7 + +tmp1 RN 8 +tmp2 RN 9 +tmp3 RN 10 +tmp4 RN 11 + +mult_20_01 RN 12 ;// [20, 1] +mult_20_m5 RN 14 ;// [20, -5] + + + EXPORT h264bsdInterpolateMidHorPart + +;// Horizontal filter approach +;// +;// Basic idea in horizontal filtering is to adjust coefficients +;// like below. Calculation is done with 16-bit maths. +;// +;// Reg x_2_0 x_3_1 x_6_4 x_7_5 x_2_0 +;// [ 2 0 ] [ 3 1 ] [ 6 4 ] [ 7 5 ] [ 10 8 ] ... +;// y_0 = 20 1 20 -5 -5 1 +;// y_1 = -5 20 1 1 20 -5 +;// y_2 = 1 -5 -5 20 1 20 +;// y_3 = 1 20 -5 -5 20 1 + + +h264bsdInterpolateMidHorPart + STMFD sp!, {r4-r11, lr} + + ;// pack values to count register + ;// [31:28] loop_x (partWidth-1) + ;// [27:24] loop_y (partHeight-1) + ;// [23:20] partWidth-1 + ;// [19:16] partHeight-1 + ;// [15:00] width + + + LDR mult_20_01, = 0x00140001 + LDR mult_20_m5, = 0x0014FFFB + AND tmp3, count, #0x000F0000 ;// partWidth-1 +loop_y + LDR x_3_1, [ref, #-8] + ADD count, count, tmp3, LSL #12 + LDR x_7_5, [ref, #-4] + UXTB16 x_2_0, x_3_1 + UXTB16 x_3_1, x_3_1, ROR #8 + UXTB16 x_6_4, x_7_5 + +loop_x + UXTB16 x_7_5, x_7_5, ROR #8 + + SMUAD tmp1, x_2_0, mult_20_01 + SMULTB tmp2, x_2_0, mult_20_m5 + SMULTB tmp3, x_2_0, mult_20_01 + SMULTB tmp4, x_3_1, mult_20_01 + + SMLAD tmp1, x_3_1, mult_20_m5, tmp1 + SMLAD tmp2, x_3_1, mult_20_01, tmp2 + SMLATB tmp3, x_3_1, mult_20_m5, tmp3 + LDR x_3_1, [ref], #4 + SMLAD tmp4, x_6_4, mult_20_m5, tmp4 + + SMLABB tmp1, x_6_4, mult_20_m5, tmp1 + SMLADX tmp2, x_6_4, mult_20_01, tmp2 + SMLADX tmp3, x_6_4, mult_20_m5, tmp3 + SMLADX tmp4, x_7_5, mult_20_m5, tmp4 + + SMLABB tmp1, x_7_5, mult_20_01, tmp1 + SMLABB tmp2, x_7_5, mult_20_m5, tmp2 + UXTB16 x_2_0, x_3_1 + SMLADX tmp3, x_7_5, mult_20_01, tmp3 + SMLABB tmp4, x_2_0, mult_20_01, tmp4 + + SUBS count, count, #4<<28 + STR tmp1, [mb], #4 + STR tmp2, [mb], #4 + STR tmp3, [mb], #4 + STR tmp4, [mb], #4 + BCC next_y + + UXTB16 x_3_1, x_3_1, ROR #8 + + SMUAD tmp1, x_6_4, mult_20_01 + SMULTB tmp2, x_6_4, mult_20_m5 + SMULTB tmp3, x_6_4, mult_20_01 + SMULTB tmp4, x_7_5, mult_20_01 + + SMLAD tmp1, x_7_5, mult_20_m5, tmp1 + SMLAD tmp2, x_7_5, mult_20_01, tmp2 + SMLATB tmp3, x_7_5, mult_20_m5, tmp3 + LDR x_7_5, [ref], #4 + SMLAD tmp4, x_2_0, mult_20_m5, tmp4 + + SMLABB tmp1, x_2_0, mult_20_m5, tmp1 + SMLADX tmp2, x_2_0, mult_20_01, tmp2 + SMLADX tmp3, x_2_0, mult_20_m5, tmp3 + SMLADX tmp4, x_3_1, mult_20_m5, tmp4 + + SMLABB tmp1, x_3_1, mult_20_01, tmp1 + SMLABB tmp2, x_3_1, mult_20_m5, tmp2 + UXTB16 x_6_4, x_7_5 + SMLADX tmp3, x_3_1, mult_20_01, tmp3 + SMLABB tmp4, x_6_4, mult_20_01, tmp4 + + SUBS count, count, #4<<28 + STR tmp1, [mb], #4 + STR tmp2, [mb], #4 + STR tmp3, [mb], #4 + STR tmp4, [mb], #4 + BCS loop_x + +next_y + AND tmp3, count, #0x000F0000 ;// partWidth-1 + SMLABB ref, count, mult_20_01, ref ;// +width + SBC ref, ref, tmp3, LSR #16 ;// -(partWidth-1)-1 + ADDS count, count, #(1<<28)-(1<<20) + BGE loop_y + + LDMFD sp!, {r4-r11, pc} + + END + diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_ver_half.s b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_ver_half.s new file mode 100755 index 0000000000000000000000000000000000000000..244fc6ff7fcc19f396100cb0b35dd8ae43df2827 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_ver_half.s @@ -0,0 +1,347 @@ +; Copyright (C) 2009 The Android Open Source Project +; +; Licensed under the Apache License, Version 2.0 (the "License"); +; you may not use this file except in compliance with the License. +; You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, software +; distributed under the License is distributed on an "AS IS" BASIS, +; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; See the License for the specific language governing permissions and +; limitations under the License. + +;------------------------------------------------------------------------------- +;-- +;-- Abstract : ARMv6 optimized version of h264bsdInterpolateVerHalf function +;-- +;------------------------------------------------------------------------------- + + + IF :DEF: H264DEC_WINASM + ;// We dont use REQUIRE8 and PRESERVE8 for winasm + ELSE + REQUIRE8 + PRESERVE8 + ENDIF + + AREA |.text|, CODE + +;// h264bsdInterpolateVerHalf register allocation + +ref RN 0 + +mb RN 1 +buff RN 1 + +count RN 2 +x0 RN 2 + +res RN 3 +y0 RN 3 + +tmp1 RN 4 + +tmp2 RN 5 +height RN 5 + +tmp3 RN 6 +partW RN 6 + +tmp4 RN 7 +partH RN 7 + +tmp5 RN 8 +tmp6 RN 9 + +tmpa RN 10 +tmpb RN 11 +width RN 12 + +plus16 RN 14 + + +;// function exports and imports + + IMPORT h264bsdFillBlock + + EXPORT h264bsdInterpolateVerHalf + +;// Approach to vertical interpolation +;// +;// Interpolation is done by using 32-bit loads and stores +;// and by using 16 bit arithmetic. 4x4 block is processed +;// in each round. +;// +;// |a_11|a_11|a_11|a_11|...|a_1n|a_1n|a_1n|a_1n| +;// |b_11|b_11|b_11|b_11|...|b_1n|b_1n|b_1n|b_1n| +;// |c_11|c_11|c_11|c_11|...|c_1n|c_1n|c_1n|c_1n| +;// |d_11|d_11|d_11|d_11|...|d_1n|d_1n|d_1n|d_1n| +;// .. +;// .. +;// |a_m1|a_m1|a_m1|a_m1|... +;// |b_m1|b_m1|b_m1|b_m1|... +;// |c_m1|c_m1|c_m1|c_m1|... +;// |d_m1|d_m1|d_m1|d_m1|... + +h264bsdInterpolateVerHalf + STMFD sp!, {r0-r11, lr} + SUB sp, sp, #0x1e4 + + CMP x0, #0 + BLT do_fill ;// (x0 < 0) + LDR partW, [sp,#0x220] ;// partWidth + ADD tmp5, x0, partW ;// (x0+partWidth) + LDR width, [sp,#0x218] ;// width + CMP tmp5, width + BHI do_fill ;// (x0+partW)>width + + CMP y0, #0 + BLT do_fill ;// (y0 < 0) + LDR partH, [sp,#0x224] ;// partHeight + ADD tmp6, y0, partH ;// (y0+partHeight) + ADD tmp6, tmp6, #5 ;// (y0+partH+5) + LDR height, [sp,#0x21c] ;// height + CMP tmp6, height + BLS skip_fill ;// no overfill needed + + +do_fill + LDR partH, [sp,#0x224] ;// partHeight + ADD tmp5, partH, #5 ;// r2 = partH + 5; + LDR height, [sp,#0x21c] ;// height + LDR partW, [sp,#0x220] ;// partWidth + STMIB sp, {height, partW} ;// sp+4 = height, sp+8 = partWidth + STR tmp5, [sp,#0xc] ;// sp+c partHeight+5 + STR partW, [sp,#0x10] ;// sp+10 = partWidth + LDR width, [sp,#0x218] ;// width + STR width, [sp,#0] ;// sp+0 = width + ADD buff, sp, #0x28 ;// buff = p1[21*21/4+1] + BL h264bsdFillBlock + + MOV x0, #0 + STR x0,[sp,#0x1ec] ;// x0 = 0 + STR x0,[sp,#0x1f0] ;// y0 = 0 + ADD ref,sp,#0x28 ;// ref = p1 + STR partW, [sp,#0x218] + + +skip_fill + LDR x0 ,[sp,#0x1ec] ;// x0 + LDR y0 ,[sp,#0x1f0] ;// y0 + LDR width, [sp,#0x218] ;// width + MLA tmp6, width, y0, x0 ;// y0*width+x0 + ADD ref, ref, tmp6 ;// ref += y0*width+x0 + LDR mb, [sp, #0x1e8] ;// mb + + ADD count, partW, partH, LSL #16 ;// |partH|partW| + LDR tmp5, = 0x00010001 + SSUB16 count, count, tmp5; ;// |partH-1|partW-1| + LDR plus16, = 0x00100010 + + AND tmp1, count, #0x000000FF ;// partWidth + + +loop_y + ADD count, count, tmp1, LSL #24 ;// partWidth-1 to top byte + +loop_x + LDR tmp1, [ref], width ;// |a4|a3|a2|a1| + LDR tmp2, [ref], width ;// |c4|c3|c2|c1| + LDR tmp3, [ref], width ;// |g4|g3|g2|g1| + LDR tmp4, [ref], width ;// |m4|m3|m2|m1| + LDR tmp5, [ref], width ;// |r4|r3|r2|r1| + LDR tmp6, [ref], width ;// |t4|t3|t2|t1| + + ;// first four pixels + UXTB16 tmpa, tmp3 ;// |g3|g1| + UXTAB16 tmpa, tmpa, tmp4 ;// |g3+m3|g1+m1| + UXTB16 tmpb, tmp2 ;// |c3|c1| + ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M) + + UXTAB16 tmpb, tmpb, tmp5 ;// |c3+r3|c1+r1| + ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M) + UXTAB16 tmpa, tmpa, tmp1 ;// 16+20(G+M)+A + UXTAB16 tmpa, tmpa, tmp6 ;// 16+20(G+M)+A+T + + ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R) + SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R) + + USAT16 tmpb, #13, tmpa ;// saturate + LDR res, = 0x00FF00FF + UXTB16 tmpa, tmp3, ROR #8 ;// |g4|g2| + UXTAB16 tmpa, tmpa, tmp4, ROR #8 ;// |g4+m4|g2+m2| + AND res, res, tmpb, LSR #5 ;// mask and divide by 32 + + ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M) + UXTB16 tmpb, tmp2, ROR #8 ;// |c4|c2| + ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M) + UXTAB16 tmpb, tmpb, tmp5, ROR #8 ;// |c4+r4|c2+r2| + UXTAB16 tmpa, tmpa, tmp1, ROR #8 ;// 16+20(G+M)+A + UXTAB16 tmpa, tmpa, tmp6, ROR #8 ;// 16+20(G+M)+A+T + + ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R) + SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R) + + USAT16 tmpb, #13, tmpa ;// saturate + LDR tmp1, [ref], width + LDR tmpa, = 0xFF00FF00 + + AND tmpa, tmpa, tmpb, LSL #3 ;// mask and divede by 32 + ORR res, res, tmpa + STR res, [mb], #16 ;// next row (mb) + + ;// tmp2 = |a4|a3|a2|a1| + ;// tmp3 = |c4|c3|c2|c1| + ;// tmp4 = |g4|g3|g2|g1| + ;// tmp5 = |m4|m3|m2|m1| + ;// tmp6 = |r4|r3|r2|r1| + ;// tmp1 = |t4|t3|t2|t1| + + ;// second four pixels + UXTB16 tmpa, tmp4 ;// |g3|g1| + UXTAB16 tmpa, tmpa, tmp5 ;// |g3+m3|g1+m1| + UXTB16 tmpb, tmp3 ;// |c3|c1| + ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M) + UXTAB16 tmpb, tmpb, tmp6 ;// |c3+r3|c1+r1| + ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M) + UXTAB16 tmpa, tmpa, tmp2 ;// 16+20(G+M)+A + UXTAB16 tmpa, tmpa, tmp1 ;// 16+20(G+M)+A+T + + ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R) + SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R) + + USAT16 tmpb, #13, tmpa ;// saturate + LDR res, = 0x00FF00FF + UXTB16 tmpa, tmp4, ROR #8 ;// |g4|g2| + UXTAB16 tmpa, tmpa, tmp5, ROR #8 ;// |g4+m4|g2+m2| + AND res, res, tmpb, LSR #5 ;// mask and divide by 32 + + ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M) + UXTB16 tmpb, tmp3, ROR #8 ;// |c4|c2| + ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M) + UXTAB16 tmpb, tmpb, tmp6, ROR #8 ;// |c4+r4|c2+r2| + UXTAB16 tmpa, tmpa, tmp2, ROR #8 ;// 16+20(G+M)+A + UXTAB16 tmpa, tmpa, tmp1, ROR #8 ;// 16+20(G+M)+A+T + + ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R) + SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R) + + USAT16 tmpb, #13, tmpa ;// saturate + LDR tmp2, [ref], width + LDR tmpa, = 0xFF00FF00 + + AND tmpa, tmpa, tmpb, LSL #3 ;// mask and divide by 32 + ORR res, res, tmpa + STR res, [mb], #16 ;// next row + + ;// tmp3 = |a4|a3|a2|a1| + ;// tmp4 = |c4|c3|c2|c1| + ;// tmp5 = |g4|g3|g2|g1| + ;// tmp6 = |m4|m3|m2|m1| + ;// tmp1 = |r4|r3|r2|r1| + ;// tmp2 = |t4|t3|t2|t1| + + ;// third four pixels + UXTB16 tmpa, tmp5 ;// |g3|g1| + UXTAB16 tmpa, tmpa, tmp6 ;// |g3+m3|g1+m1| + UXTB16 tmpb, tmp4 ;// |c3|c1| + ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M) + UXTAB16 tmpb, tmpb, tmp1 ;// |c3+r3|c1+r1| + ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M) + UXTAB16 tmpa, tmpa, tmp3 ;// 16+20(G+M)+A + UXTAB16 tmpa, tmpa, tmp2 ;// 16+20(G+M)+A+T + + ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R) + SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R) + + USAT16 tmpb, #13, tmpa ;// saturate + LDR res, = 0x00FF00FF + UXTB16 tmpa, tmp5, ROR #8 ;// |g4|g2| + UXTAB16 tmpa, tmpa, tmp6, ROR #8 ;// |g4+m4|g2+m2| + AND res, res, tmpb, LSR #5 ;// mask and divide by 32 + + ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M) + UXTB16 tmpb, tmp4, ROR #8 ;// |c4|c2| + ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M) + UXTAB16 tmpb, tmpb, tmp1, ROR #8 ;// |c4+r4|c2+r2| + UXTAB16 tmpa, tmpa, tmp3, ROR #8 ;// 16+20(G+M)+A + UXTAB16 tmpa, tmpa, tmp2, ROR #8 ;// 16+20(G+M)+A+T + + + ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R) + SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R) + + USAT16 tmpb, #13, tmpa ;// saturate + LDR tmp3, [ref] + LDR tmpa, = 0xFF00FF00 + + ;// decrement loop_x counter + SUBS count, count, #4<<24 ;// (partWidth-1) -= 4; + + AND tmpa, tmpa, tmpb, LSL #3 ;// mask and divide by 32 + ORR res, res, tmpa + STR res, [mb], #16 ;// next row + + ;// tmp4 = |a4|a3|a2|a1| + ;// tmp5 = |c4|c3|c2|c1| + ;// tmp6 = |g4|g3|g2|g1| + ;// tmp1 = |m4|m3|m2|m1| + ;// tmp2 = |r4|r3|r2|r1| + ;// tmp3 = |t4|t3|t2|t1| + + ;// fourth four pixels + UXTB16 tmpa, tmp6 ;// |g3|g1| + UXTAB16 tmpa, tmpa, tmp1 ;// |g3+m3|g1+m1| + UXTB16 tmpb, tmp5 ;// |c3|c1| + ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M) + UXTAB16 tmpb, tmpb, tmp2 ;// |c3+r3|c1+r1| + ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M) + UXTAB16 tmpa, tmpa, tmp4 ;// 16+20(G+M)+A + UXTAB16 tmpa, tmpa, tmp3 ;// 16+20(G+M)+A+T + + ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R) + SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R) + + USAT16 tmpb, #13, tmpa ;// saturate + LDR res, = 0x00FF00FF + UXTB16 tmpa, tmp6, ROR #8 ;// |g4|g2| + UXTAB16 tmpa, tmpa, tmp1, ROR #8 ;// |g4+m4|g2+m2| + AND res, res, tmpb, LSR #5 ;// mask and divide by 32 + + ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M) + UXTB16 tmpb, tmp5, ROR #8 ;// |c4|c2| + ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M) + UXTAB16 tmpb, tmpb, tmp2, ROR #8 ;// |c4+r4|c2+r2| + UXTAB16 tmpa, tmpa, tmp4, ROR #8 ;// 16+20(G+M)+A + UXTAB16 tmpa, tmpa, tmp3, ROR #8 ;// 16+20(G+M)+A+T + + ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R) + SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R) + + USAT16 tmpb, #13, tmpa ;// saturate + LDR tmp4, = 0xFF00FF00 + + ;// calculate "ref" address for next round + SUB ref, ref, width, LSL #3 ;// ref -= 8*width; + ADD ref, ref, #4; ;// next column (4 pixels) + AND tmpa, tmp4, tmpb, LSL #3 ;// mask and divide by 32 + ORR res, res, tmpa + STR res, [mb], #-44 + + BCS loop_x + + ADDS count, count, #252<<16 ;// (partHeight-1) -= 4; + ADD ref, ref, width, LSL #2 ;// ref += 4*width + AND tmp1, count, #0x000000FF ;// partWidth-1 + ADD tmp2, tmp1, #1 ;// partWidth + SUB ref, ref, tmp2 ;// ref -= partWidth + ADD mb, mb, #64; + SUB mb, mb, tmp2; ;// mb -= partWidth + BGE loop_y + + ADD sp,sp,#0x1f4 + LDMFD sp!, {r4-r11, pc} + + END diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_ver_quarter.s b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_ver_quarter.s new file mode 100755 index 0000000000000000000000000000000000000000..5266c85600e3257e80b4dbcd2dca485275ef6101 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_ver_quarter.s @@ -0,0 +1,374 @@ +; Copyright (C) 2009 The Android Open Source Project +; +; Licensed under the Apache License, Version 2.0 (the "License"); +; you may not use this file except in compliance with the License. +; You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, software +; distributed under the License is distributed on an "AS IS" BASIS, +; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; See the License for the specific language governing permissions and +; limitations under the License. + +;------------------------------------------------------------------------------- +;-- +;-- Abstract : ARMv6 optimized version of h264bsdInterpolateVerQuarter function +;-- +;------------------------------------------------------------------------------- + + IF :DEF: H264DEC_WINASM + ;// We dont use REQUIRE8 and PRESERVE8 for winasm + ELSE + REQUIRE8 + PRESERVE8 + ENDIF + + AREA |.text|, CODE + +;// h264bsdInterpolateVerQuarter register allocation + +ref RN 0 + +mb RN 1 +buff RN 1 + +count RN 2 +x0 RN 2 + +res RN 3 +y0 RN 3 + +tmp1 RN 4 + +tmp2 RN 5 +height RN 5 + +tmp3 RN 6 +partW RN 6 + +tmp4 RN 7 +partH RN 7 + +tmp5 RN 8 +tmp6 RN 9 + +tmpa RN 10 +tmpb RN 11 +width RN 12 + +plus16 RN 14 + + +;// function exports and imports + + IMPORT h264bsdFillBlock + + EXPORT h264bsdInterpolateVerQuarter + +;// Approach to vertical interpolation +;// +;// Interpolation is done by using 32-bit loads and stores +;// and by using 16 bit arithmetic. 4x4 block is processed +;// in each round. +;// +;// |a_11|a_11|a_11|a_11|...|a_1n|a_1n|a_1n|a_1n| +;// |b_11|b_11|b_11|b_11|...|b_1n|b_1n|b_1n|b_1n| +;// |c_11|c_11|c_11|c_11|...|c_1n|c_1n|c_1n|c_1n| +;// |d_11|d_11|d_11|d_11|...|d_1n|d_1n|d_1n|d_1n| +;// .. +;// .. +;// |a_m1|a_m1|a_m1|a_m1|... +;// |b_m1|b_m1|b_m1|b_m1|... +;// |c_m1|c_m1|c_m1|c_m1|... +;// |d_m1|d_m1|d_m1|d_m1|... + +h264bsdInterpolateVerQuarter + STMFD sp!, {r0-r11, lr} + SUB sp, sp, #0x1e4 + + CMP x0, #0 + BLT do_fill ;// (x0 < 0) + LDR partW, [sp,#0x220] ;// partWidth + ADD tmp5, x0, partW ;// (x0+partWidth) + LDR width, [sp,#0x218] ;// width + CMP tmp5, width + BHI do_fill ;// (x0+partW)>width + + CMP y0, #0 + BLT do_fill ;// (y0 < 0) + LDR partH, [sp,#0x224] ;// partHeight + ADD tmp6, y0, partH ;// (y0+partHeight) + ADD tmp6, tmp6, #5 ;// (y0+partH+5) + LDR height, [sp,#0x21c] ;// height + CMP tmp6, height + BLS skip_fill ;// no overfill needed + + +do_fill + LDR partH, [sp,#0x224] ;// partHeight + ADD tmp5, partH, #5 ;// r2 = partH + 5; + LDR height, [sp,#0x21c] ;// height + LDR partW, [sp,#0x220] ;// partWidth + STMIB sp, {height, partW} ;// sp+4 = height, sp+8 = partWidth + STR tmp5, [sp,#0xc] ;// sp+c partHeight+5 + STR partW, [sp,#0x10] ;// sp+10 = partWidth + LDR width, [sp,#0x218] ;// width + STR width, [sp,#0] ;// sp+0 = width + ADD buff, sp, #0x28 ;// buff = p1[21*21/4+1] + BL h264bsdFillBlock + + MOV x0, #0 + STR x0,[sp,#0x1ec] ;// x0 = 0 + STR x0,[sp,#0x1f0] ;// y0 = 0 + ADD ref,sp,#0x28 ;// ref = p1 + STR partW, [sp,#0x218] + + +skip_fill + LDR x0 ,[sp,#0x1ec] ;// x0 + LDR y0 ,[sp,#0x1f0] ;// y0 + LDR width, [sp,#0x218] ;// width + MLA tmp6, width, y0, x0 ;// y0*width+x0 + ADD ref, ref, tmp6 ;// ref += y0*width+x0 + LDR mb, [sp, #0x1e8] ;// mb + + ADD count, partW, partH, LSL #8 ;// |xx|xx|partH|partW| + LDR tmp5, = 0x00010100 + RSB count, tmp5, count, LSL #8 ;// |xx|partH-1|partW-1|xx| + LDR tmp2, [sp, #0x228] ;// verOffset + ADD count, count, tmp2 ;// |xx|partH-1|partW-1|verOffset| + LDR plus16, = 0x00100010 + + AND tmp1, count, #0x0000FF00 ;// partWidth + + +loop_y + ADD count, count, tmp1, LSL #16 ;// partWidth-1 to top byte + +loop_x + LDR tmp1, [ref], width ;// |a4|a3|a2|a1| + LDR tmp2, [ref], width ;// |c4|c3|c2|c1| + LDR tmp3, [ref], width ;// |g4|g3|g2|g1| + LDR tmp4, [ref], width ;// |m4|m3|m2|m1| + LDR tmp5, [ref], width ;// |r4|r3|r2|r1| + LDR tmp6, [ref], width ;// |t4|t3|t2|t1| + + ;// first four pixels + UXTB16 tmpa, tmp3 ;// |g3|g1| + UXTAB16 tmpa, tmpa, tmp4 ;// |g3+m3|g1+m1| + UXTB16 tmpb, tmp2 ;// |c3|c1| + ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M) + + UXTAB16 tmpb, tmpb, tmp5 ;// |c3+r3|c1+r1| + ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M) + UXTAB16 tmpa, tmpa, tmp1 ;// 16+20(G+M)+A + UXTAB16 tmpa, tmpa, tmp6 ;// 16+20(G+M)+A+T + + ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R) + SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R) + + USAT16 tmpb, #13, tmpa ;// saturate + LDR res, = 0x00FF00FF + UXTB16 tmpa, tmp3, ROR #8 ;// |g4|g2| + UXTAB16 tmpa, tmpa, tmp4, ROR #8 ;// |g4+m4|g2+m2| + AND res, res, tmpb, LSR #5 ;// mask and divide by 32 + + ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M) + UXTB16 tmpb, tmp2, ROR #8 ;// |c4|c2| + ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M) + UXTAB16 tmpb, tmpb, tmp5, ROR #8 ;// |c4+r4|c2+r2| + UXTAB16 tmpa, tmpa, tmp1, ROR #8 ;// 16+20(G+M)+A + UXTAB16 tmpa, tmpa, tmp6, ROR #8 ;// 16+20(G+M)+A+T + + ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R) + SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R) + + USAT16 tmpb, #13, tmpa ;// saturate + MOVS tmp1, count, LSL #31 ;// update flags (verOffset) + LDR tmpa, = 0xFF00FF00 + MVNEQ tmp1, tmp3 ;// select verOffset=0 + MVNNE tmp1, tmp4 ;// select verOffset=1 + AND tmpa, tmpa, tmpb, LSL #3 ;// mask and divede by 32 + ORR res, res, tmpa + + LDR tmpa, = 0x80808080 + UHSUB8 res, res, tmp1 ;// bilinear interpolation + LDR tmp1, [ref], width ;// load next row + EOR res, res, tmpa ;// correct sign + + STR res, [mb], #16 ;// next row (mb) + + + ;// tmp2 = |a4|a3|a2|a1| + ;// tmp3 = |c4|c3|c2|c1| + ;// tmp4 = |g4|g3|g2|g1| + ;// tmp5 = |m4|m3|m2|m1| + ;// tmp6 = |r4|r3|r2|r1| + ;// tmp1 = |t4|t3|t2|t1| + + ;// second four pixels + UXTB16 tmpa, tmp4 ;// |g3|g1| + UXTAB16 tmpa, tmpa, tmp5 ;// |g3+m3|g1+m1| + UXTB16 tmpb, tmp3 ;// |c3|c1| + ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M) + UXTAB16 tmpb, tmpb, tmp6 ;// |c3+r3|c1+r1| + ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M) + UXTAB16 tmpa, tmpa, tmp2 ;// 16+20(G+M)+A + UXTAB16 tmpa, tmpa, tmp1 ;// 16+20(G+M)+A+T + + ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R) + SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R) + + USAT16 tmpb, #13, tmpa ;// saturate + LDR res, = 0x00FF00FF + UXTB16 tmpa, tmp4, ROR #8 ;// |g4|g2| + UXTAB16 tmpa, tmpa, tmp5, ROR #8 ;// |g4+m4|g2+m2| + AND res, res, tmpb, LSR #5 ;// mask and divide by 32 + + ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M) + UXTB16 tmpb, tmp3, ROR #8 ;// |c4|c2| + ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M) + UXTAB16 tmpb, tmpb, tmp6, ROR #8 ;// |c4+r4|c2+r2| + UXTAB16 tmpa, tmpa, tmp2, ROR #8 ;// 16+20(G+M)+A + UXTAB16 tmpa, tmpa, tmp1, ROR #8 ;// 16+20(G+M)+A+T + + ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R) + SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R) + + USAT16 tmpb, #13, tmpa ;// saturate + LDR tmpa, = 0xFF00FF00 + MVNEQ tmp2, tmp4 ;// select verOffset=0 + MVNNE tmp2, tmp5 ;// select verOffset=1 + + AND tmpa, tmpa, tmpb, LSL #3 ;// mask and divide by 32 + ORR res, res, tmpa + LDR tmpa, = 0x80808080 + UHSUB8 res, res, tmp2 ;// bilinear interpolation + LDR tmp2, [ref], width ;// load next row + EOR res, res, tmpa ;// correct sign + STR res, [mb], #16 ;// next row + + ;// tmp3 = |a4|a3|a2|a1| + ;// tmp4 = |c4|c3|c2|c1| + ;// tmp5 = |g4|g3|g2|g1| + ;// tmp6 = |m4|m3|m2|m1| + ;// tmp1 = |r4|r3|r2|r1| + ;// tmp2 = |t4|t3|t2|t1| + + ;// third four pixels + UXTB16 tmpa, tmp5 ;// |g3|g1| + UXTAB16 tmpa, tmpa, tmp6 ;// |g3+m3|g1+m1| + UXTB16 tmpb, tmp4 ;// |c3|c1| + ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M) + UXTAB16 tmpb, tmpb, tmp1 ;// |c3+r3|c1+r1| + ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M) + UXTAB16 tmpa, tmpa, tmp3 ;// 16+20(G+M)+A + UXTAB16 tmpa, tmpa, tmp2 ;// 16+20(G+M)+A+T + + ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R) + SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R) + + USAT16 tmpb, #13, tmpa ;// saturate + LDR res, = 0x00FF00FF + UXTB16 tmpa, tmp5, ROR #8 ;// |g4|g2| + UXTAB16 tmpa, tmpa, tmp6, ROR #8 ;// |g4+m4|g2+m2| + AND res, res, tmpb, LSR #5 ;// mask and divide by 32 + + ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M) + UXTB16 tmpb, tmp4, ROR #8 ;// |c4|c2| + ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M) + UXTAB16 tmpb, tmpb, tmp1, ROR #8 ;// |c4+r4|c2+r2| + UXTAB16 tmpa, tmpa, tmp3, ROR #8 ;// 16+20(G+M)+A + UXTAB16 tmpa, tmpa, tmp2, ROR #8 ;// 16+20(G+M)+A+T + + + ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R) + SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R) + + USAT16 tmpb, #13, tmpa ;// saturate + LDR tmpa, = 0xFF00FF00 + MVNEQ tmp3, tmp5 ;// select verOffset=0 + MVNNE tmp3, tmp6 ;// select verOffset=1 + + AND tmpa, tmpa, tmpb, LSL #3 ;// mask and divide by 32 + ORR res, res, tmpa + LDR tmpa, = 0x80808080 + UHSUB8 res, res, tmp3 ;// bilinear interpolation + LDR tmp3, [ref] ;// load next row + EOR res, res, tmpa ;// correct sign + STR res, [mb], #16 ;// next row + + ;// tmp4 = |a4|a3|a2|a1| + ;// tmp5 = |c4|c3|c2|c1| + ;// tmp6 = |g4|g3|g2|g1| + ;// tmp1 = |m4|m3|m2|m1| + ;// tmp2 = |r4|r3|r2|r1| + ;// tmp3 = |t4|t3|t2|t1| + + ;// fourth four pixels + UXTB16 tmpa, tmp6 ;// |g3|g1| + UXTAB16 tmpa, tmpa, tmp1 ;// |g3+m3|g1+m1| + UXTB16 tmpb, tmp5 ;// |c3|c1| + ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M) + UXTAB16 tmpb, tmpb, tmp2 ;// |c3+r3|c1+r1| + ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M) + UXTAB16 tmpa, tmpa, tmp4 ;// 16+20(G+M)+A + UXTAB16 tmpa, tmpa, tmp3 ;// 16+20(G+M)+A+T + + ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R) + SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R) + + USAT16 tmpb, #13, tmpa ;// saturate + LDR res, = 0x00FF00FF + UXTB16 tmpa, tmp6, ROR #8 ;// |g4|g2| + UXTAB16 tmpa, tmpa, tmp1, ROR #8 ;// |g4+m4|g2+m2| + AND res, res, tmpb, LSR #5 ;// mask and divide by 32 + + ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M) + UXTB16 tmpb, tmp5, ROR #8 ;// |c4|c2| + ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M) + UXTAB16 tmpb, tmpb, tmp2, ROR #8 ;// |c4+r4|c2+r2| + UXTAB16 tmpa, tmpa, tmp4, ROR #8 ;// 16+20(G+M)+A + UXTAB16 tmpa, tmpa, tmp3, ROR #8 ;// 16+20(G+M)+A+T + + ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R) + SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R) + + USAT16 tmpb, #13, tmpa ;// saturate + LDR tmp4, = 0xFF00FF00 + MVNEQ tmp5, tmp6 ;// select verOffset=0 + MVNNE tmp5, tmp1 ;// select verOffset=1 + + AND tmpa, tmp4, tmpb, LSL #3 ;// mask and divide by 32 + ORR res, res, tmpa + LDR tmpa, = 0x80808080 + UHSUB8 res, res, tmp5 ;// bilinear interpolation + + ;// decrement loop_x counter + SUBS count, count, #4<<24 ;// (partWidth-1) -= 4; + + ;// calculate "ref" address for next round + SUB ref, ref, width, LSL #3 ;// ref -= 8*width; + ADD ref, ref, #4; ;// next column (4 pixels) + + EOR res, res, tmpa ;// correct sign + STR res, [mb], #-44 + + BCS loop_x + + ADDS count, count, #252<<16 ;// (partHeight-1) -= 4; + ADD ref, ref, width, LSL #2 ;// ref += 4*width + AND tmp1, count, #0x0000FF00 ;// partWidth-1 + MOV tmp2, #1 + ADD tmp2, tmp2, tmp1, LSR #8 ;// partWidth + SUB ref, ref, tmp2 ;// ref -= partWidth + ADD mb, mb, #64; + SUB mb, mb, tmp2; ;// mb -= partWidth + BGE loop_y + + ADD sp,sp,#0x1f4 + LDMFD sp!, {r4-r11, pc} + + END diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/win_asm.bat b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/win_asm.bat new file mode 100644 index 0000000000000000000000000000000000000000..1b8d88c0858afe11357ff090a358a01d7c369e5f --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/win_asm.bat @@ -0,0 +1,15 @@ +echo off +set ASMFLAGS= -checkreglist -CPU ARM1136 -PreDefine "H264DEC_WINASM SETL {TRUE}" +set ASM="D:\Program Files\Microsoft Visual Studio 8\VC\ce\bin\x86_arm\armasm" +echo on + +%ASM% %ASMFLAGS% h264bsd_interpolate_chroma_ver.s +%ASM% %ASMFLAGS% h264bsd_interpolate_chroma_hor.s +%ASM% %ASMFLAGS% h264bsd_interpolate_hor_half.s +%ASM% %ASMFLAGS% h264bsd_interpolate_hor_quarter.s +%ASM% %ASMFLAGS% h264bsd_interpolate_hor_ver_quarter.s +%ASM% %ASMFLAGS% h264bsd_interpolate_ver_half.s +%ASM% %ASMFLAGS% h264bsd_interpolate_ver_quarter.s + +rem %ASM% %ASMFLAGS% h264bsd_interpolate_chroma_hor_ver.s +rem %ASM% %ASMFLAGS% h264bsd_interpolate_mid_hor.s diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm/h264bsdClearMbLayer.s b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm/h264bsdClearMbLayer.s new file mode 100644 index 0000000000000000000000000000000000000000..db1165426ed5b538d9912530c4598ec645426b2c --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm/h264bsdClearMbLayer.s @@ -0,0 +1,66 @@ +; +; Copyright (C) 2009 The Android Open Source Project +; +; Licensed under the Apache License, Version 2.0 (the "License"); +; you may not use this file except in compliance with the License. +; You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, software +; distributed under the License is distributed on an "AS IS" BASIS, +; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; See the License for the specific language governing permissions and +; limitations under the License. +; + + REQUIRE8 + PRESERVE8 + + AREA |.text|, CODE + + EXPORT h264bsdClearMbLayer + +; Input / output registers +pMbLayer RN 0 +size RN 1 +pTmp RN 2 +step RN 3 + +; -- NEON registers -- + +qZero QN Q0.U8 + +;/*------------------------------------------------------------------------------ +; +; Function: h264bsdClearMbLayer +; +; Functional description: +; +; Inputs: +; +; Outputs: +; +; Returns: +; +;------------------------------------------------------------------------------*/ + +h264bsdClearMbLayer + + VMOV qZero, #0 + ADD pTmp, pMbLayer, #16 + MOV step, #32 + SUBS size, size, #64 + +loop + VST1 qZero, [pMbLayer], step + SUBS size, size, #64 + VST1 qZero, [pTmp], step + VST1 qZero, [pMbLayer], step + VST1 qZero, [pTmp], step + BCS loop + + BX lr + END + + diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm/h264bsdCountLeadingZeros.s b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm/h264bsdCountLeadingZeros.s new file mode 100644 index 0000000000000000000000000000000000000000..c7bd73e1bc928ce08c995d87a3b56c1056766dcf --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm/h264bsdCountLeadingZeros.s @@ -0,0 +1,49 @@ +; +; Copyright (C) 2009 The Android Open Source Project +; +; Licensed under the Apache License, Version 2.0 (the "License"); +; you may not use this file except in compliance with the License. +; You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, software +; distributed under the License is distributed on an "AS IS" BASIS, +; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; See the License for the specific language governing permissions and +; limitations under the License. +; + + REQUIRE8 + PRESERVE8 + + AREA |.text|, CODE + + EXPORT h264bsdCountLeadingZeros + +; Input / output registers +value RN 0 + +; -- NEON registers -- + +;/*------------------------------------------------------------------------------ +; +; Function: h264bsdCountLeadingZeros +; +; Functional description: +; +; Inputs: +; +; Outputs: +; +; Returns: +; +;------------------------------------------------------------------------------*/ + +h264bsdCountLeadingZeros + + CLZ value, value + BX lr + END + + diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm/h264bsdFillRow7.s b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm/h264bsdFillRow7.s new file mode 100644 index 0000000000000000000000000000000000000000..5bfac9229a8743d5ff4d980dd44e5612898ac602 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm/h264bsdFillRow7.s @@ -0,0 +1,180 @@ +; +; Copyright (C) 2009 The Android Open Source Project +; +; Licensed under the Apache License, Version 2.0 (the "License"); +; you may not use this file except in compliance with the License. +; You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, software +; distributed under the License is distributed on an "AS IS" BASIS, +; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; See the License for the specific language governing permissions and +; limitations under the License. +; + + REQUIRE8 + PRESERVE8 + + AREA |.text|, CODE + + EXPORT h264bsdFillRow7 + +; Input / output registers + +ref RN 0 +fill RN 1 +left RN 2 +tmp2 RN 2 +center RN 3 +right RN 4 +tmp1 RN 5 + +; -- NEON registers -- + +qTmp0 QN Q0.U8 +qTmp1 QN Q1.U8 +dTmp0 DN D0.U8 +dTmp1 DN D1.U8 +dTmp2 DN D2.U8 +dTmp3 DN D3.U8 + + +;/*------------------------------------------------------------------------------ +; +; Function: h264bsdFillRow7 +; +; Functional description: +; +; Inputs: +; +; Outputs: +; +; Returns: +; +;------------------------------------------------------------------------------*/ + +h264bsdFillRow7 + PUSH {r4-r6,lr} + CMP left, #0 + LDR right, [sp,#0x10] + BEQ switch_center + LDRB tmp1, [ref,#0] + +loop_left + SUBS left, left, #1 + STRB tmp1, [fill], #1 + BNE loop_left + +switch_center + ASR tmp2,center,#2 + CMP tmp2,#9 + ADDCC pc,pc,tmp2,LSL #2 + B loop_center + B loop_center + B case_1 + B case_2 + B case_3 + B case_4 + B case_5 + B case_6 + B case_7 + B case_8 +;case_8 +; LDR tmp2, [ref], #4 +; SUB center, center, #4 +; STR tmp2, [fill], #4 +;case_7 +; LDR tmp2, [ref], #4 +; SUB center, center, #4 +; STR tmp2, [fill], #4 +;case_6 +; LDR tmp2, [ref], #4 +; SUB center, center, #4 +; STR tmp2, [fill],#4 +;case_5 +; LDR tmp2, [ref], #4 +; SUB center, center, #4 +; STR tmp2, [fill],#4 +;case_4 +; LDR tmp2, [ref],#4 +; SUB center, center, #4 +; STR tmp2, [fill], #4 +;case_3 +; LDR tmp2, [ref],#4 +; SUB center, center, #4 +; STR tmp2, [fill], #4 +;case_2 +; LDR tmp2, [ref],#4 +; SUB center, center, #4 +; STR tmp2, [fill], #4 +;case_1 +; LDR tmp2, [ref],#4 +; SUB center, center, #4 +; STR tmp2, [fill], #4 + +case_8 + VLD1 {qTmp0, qTmp1}, [ref]! + SUB center, center, #32 + VST1 qTmp0, [fill]! + VST1 qTmp1, [fill]! + B loop_center +case_7 + VLD1 {dTmp0,dTmp1,dTmp2}, [ref]! + SUB center, center, #28 + LDR tmp2, [ref], #4 + VST1 {dTmp0,dTmp1,dTmp2}, [fill]! + STR tmp2, [fill],#4 + B loop_center +case_6 + VLD1 {dTmp0,dTmp1,dTmp2}, [ref]! + SUB center, center, #24 + VST1 {dTmp0,dTmp1,dTmp2}, [fill]! + B loop_center +case_5 + VLD1 qTmp0, [ref]! + SUB center, center, #20 + LDR tmp2, [ref], #4 + VST1 qTmp0, [fill]! + STR tmp2, [fill],#4 + B loop_center +case_4 + VLD1 qTmp0, [ref]! + SUB center, center, #16 + VST1 qTmp0, [fill]! + B loop_center +case_3 + VLD1 dTmp0, [ref]! + SUB center, center, #12 + LDR tmp2, [ref], #4 + VST1 dTmp0, [fill]! + STR tmp2, [fill],#4 + B loop_center +case_2 + LDR tmp2, [ref],#4 + SUB center, center, #4 + STR tmp2, [fill], #4 +case_1 + LDR tmp2, [ref],#4 + SUB center, center, #4 + STR tmp2, [fill], #4 + +loop_center + CMP center, #0 + LDRBNE tmp2, [ref], #1 + SUBNE center, center, #1 + STRBNE tmp2, [fill], #1 + BNE loop_center + CMP right,#0 + POPEQ {r4-r6,pc} + LDRB tmp2, [ref,#-1] + +loop_right + STRB tmp2, [fill], #1 + SUBS right, right, #1 + BNE loop_right + + POP {r4-r6,pc} + END + diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm/h264bsdFlushBits.s b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm/h264bsdFlushBits.s new file mode 100644 index 0000000000000000000000000000000000000000..21335b835a10968517bdf531c860c445c35c9be6 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm/h264bsdFlushBits.s @@ -0,0 +1,82 @@ +; +; Copyright (C) 2009 The Android Open Source Project +; +; Licensed under the Apache License, Version 2.0 (the "License"); +; you may not use this file except in compliance with the License. +; You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, software +; distributed under the License is distributed on an "AS IS" BASIS, +; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; See the License for the specific language governing permissions and +; limitations under the License. +; + + REQUIRE8 + PRESERVE8 + + AREA |.text|, CODE + + EXPORT h264bsdFlushBits + +; Input / output registers +pStrmData RN 0 +numBits RN 1 +readBits RN 2 +strmBuffSize RN 3 +pStrmBuffStart RN 1 +pStrmCurrPos RN 2 +bitPosInWord RN 1 + +; -- NEON registers -- + + + +;/*------------------------------------------------------------------------------ +; +; Function: h264bsdFlushBits +; +; Functional description: +; +; Inputs: +; +; Outputs: +; +; Returns: +; +;------------------------------------------------------------------------------*/ + +h264bsdFlushBits +;// PUSH {r4-r6,lr} + + LDR readBits, [pStrmData, #0x10] + LDR strmBuffSize, [pStrmData, #0xC] + + ADD readBits, readBits, numBits + AND bitPosInWord, readBits, #7 + + STR readBits, [pStrmData, #0x10] + STR bitPosInWord, [pStrmData, #0x8] + + LDR pStrmBuffStart, [pStrmData, #0x0] + + CMP readBits, strmBuffSize, LSL #3 + + BHI end_of_stream + + ADD pStrmCurrPos, pStrmBuffStart, readBits, LSR #3 + STR pStrmCurrPos, [pStrmData, #0x4] + MOV r0, #0 + BX lr +;// POP {r4-r6,pc} + +end_of_stream + MVN r0, #0 + BX lr +;// POP {r4-r6,pc} + + END + + diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm/h264bsdWriteMacroblock.s b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm/h264bsdWriteMacroblock.s new file mode 100644 index 0000000000000000000000000000000000000000..38a078132ea07b5f2b6e568e8c92bf41ba50d697 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm/h264bsdWriteMacroblock.s @@ -0,0 +1,152 @@ +; +; Copyright (C) 2009 The Android Open Source Project +; +; Licensed under the Apache License, Version 2.0 (the "License"); +; you may not use this file except in compliance with the License. +; You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, software +; distributed under the License is distributed on an "AS IS" BASIS, +; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; See the License for the specific language governing permissions and +; limitations under the License. +; + + REQUIRE8 + PRESERVE8 + + AREA |.text|, CODE + + EXPORT h264bsdWriteMacroblock + +; Input / output registers +image RN 0 +data RN 1 +width RN 2 +luma RN 3 +cb RN 4 +cr RN 5 +cwidth RN 6 + +; -- NEON registers -- + +qRow0 QN Q0.U8 +qRow1 QN Q1.U8 +qRow2 QN Q2.U8 +qRow3 QN Q3.U8 +qRow4 QN Q4.U8 +qRow5 QN Q5.U8 +qRow6 QN Q6.U8 +qRow7 QN Q7.U8 +qRow8 QN Q8.U8 +qRow9 QN Q9.U8 +qRow10 QN Q10.U8 +qRow11 QN Q11.U8 +qRow12 QN Q12.U8 +qRow13 QN Q13.U8 +qRow14 QN Q14.U8 +qRow15 QN Q15.U8 + +dRow0 DN D0.U8 +dRow1 DN D1.U8 +dRow2 DN D2.U8 +dRow3 DN D3.U8 +dRow4 DN D4.U8 +dRow5 DN D5.U8 +dRow6 DN D6.U8 +dRow7 DN D7.U8 +dRow8 DN D8.U8 +dRow9 DN D9.U8 +dRow10 DN D10.U8 +dRow11 DN D11.U8 +dRow12 DN D12.U8 +dRow13 DN D13.U8 +dRow14 DN D14.U8 +dRow15 DN D15.U8 + +;/*------------------------------------------------------------------------------ +; +; Function: h264bsdWriteMacroblock +; +; Functional description: +; Write one macroblock into the image. Both luma and chroma +; components will be written at the same time. +; +; Inputs: +; data pointer to macroblock data to be written, 256 values for +; luma followed by 64 values for both chroma components +; +; Outputs: +; image pointer to the image where the macroblock will be written +; +; Returns: +; none +; +;------------------------------------------------------------------------------*/ + +h264bsdWriteMacroblock + PUSH {r4-r6,lr} + VPUSH {q4-q7} + + LDR width, [image, #4] + LDR luma, [image, #0xC] + LDR cb, [image, #0x10] + LDR cr, [image, #0x14] + + +; Write luma + VLD1 {qRow0, qRow1}, [data]! + LSL width, width, #4 + VLD1 {qRow2, qRow3}, [data]! + LSR cwidth, width, #1 + VST1 {qRow0}, [luma@128], width + VLD1 {qRow4, qRow5}, [data]! + VST1 {qRow1}, [luma@128], width + VLD1 {qRow6, qRow7}, [data]! + VST1 {qRow2}, [luma@128], width + VLD1 {qRow8, qRow9}, [data]! + VST1 {qRow3}, [luma@128], width + VLD1 {qRow10, qRow11}, [data]! + VST1 {qRow4}, [luma@128], width + VLD1 {qRow12, qRow13}, [data]! + VST1 {qRow5}, [luma@128], width + VLD1 {qRow14, qRow15}, [data]! + VST1 {qRow6}, [luma@128], width + + VLD1 {qRow0, qRow1}, [data]! ;cb rows 0,1,2,3 + VST1 {qRow7}, [luma@128], width + VLD1 {qRow2, qRow3}, [data]! ;cb rows 4,5,6,7 + VST1 {qRow8}, [luma@128], width + VLD1 {qRow4, qRow5}, [data]! ;cr rows 0,1,2,3 + VST1 {qRow9}, [luma@128], width + VLD1 {qRow6, qRow7}, [data]! ;cr rows 4,5,6,7 + VST1 {qRow10}, [luma@128], width + VST1 {dRow0}, [cb@64], cwidth + VST1 {dRow8}, [cr@64], cwidth + VST1 {qRow11}, [luma@128], width + VST1 {dRow1}, [cb@64], cwidth + VST1 {dRow9}, [cr@64], cwidth + VST1 {qRow12}, [luma@128], width + VST1 {dRow2}, [cb@64], cwidth + VST1 {dRow10}, [cr@64], cwidth + VST1 {qRow13}, [luma@128], width + VST1 {dRow3}, [cb@64], cwidth + VST1 {dRow11}, [cr@64], cwidth + VST1 {qRow14}, [luma@128], width + VST1 {dRow4}, [cb@64], cwidth + VST1 {dRow12}, [cr@64], cwidth + VST1 {qRow15}, [luma] + VST1 {dRow5}, [cb@64], cwidth + VST1 {dRow13}, [cr@64], cwidth + VST1 {dRow6}, [cb@64], cwidth + VST1 {dRow14}, [cr@64], cwidth + VST1 {dRow7}, [cb@64] + VST1 {dRow15}, [cr@64] + + VPOP {q4-q7} + POP {r4-r6,pc} + END + + diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/asm_common.S b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/asm_common.S new file mode 100644 index 0000000000000000000000000000000000000000..f39f5c497367c694aa5d9fa4d4c92a820dec8b0e --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/asm_common.S @@ -0,0 +1,41 @@ +@ +@ Copyright (C) 2009 The Android Open Source Project +@ +@ Licensed under the Apache License, Version 2.0 (the "License"); +@ you may not use this file except in compliance with the License. +@ You may obtain a copy of the License at +@ +@ http://www.apache.org/licenses/LICENSE-2.0 +@ +@ Unless required by applicable law or agreed to in writing, software +@ distributed under the License is distributed on an "AS IS" BASIS, +@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ See the License for the specific language governing permissions and +@ limitations under the License. +@ + + + + + .macro REQUIRE8 + .eabi_attribute 24, 1 + .endm + + .macro PRESERVE8 + .eabi_attribute 25, 1 + .endm + + + .macro function name, export=0 +.if \export + .global \name +.endif + .type \name, %function + .func \name +\name: + .endm + + .macro endfunction + .endfunc + .endm + diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdClearMbLayer.S b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdClearMbLayer.S new file mode 100644 index 0000000000000000000000000000000000000000..c8a940e0f3847668c487c496d7d21272a1557689 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdClearMbLayer.S @@ -0,0 +1,68 @@ +@ +@ Copyright (C) 2009 The Android Open Source Project +@ +@ Licensed under the Apache License, Version 2.0 (the "License"); +@ you may not use this file except in compliance with the License. +@ You may obtain a copy of the License at +@ +@ http://www.apache.org/licenses/LICENSE-2.0 +@ +@ Unless required by applicable law or agreed to in writing, software +@ distributed under the License is distributed on an "AS IS" BASIS, +@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ See the License for the specific language governing permissions and +@ limitations under the License. +@ + +#include "asm_common.S" + + preserve8 + + .fpu neon + .text + +/* Input / output registers */ +#define pMbLayer r0 +#define size r1 +#define pTmp r2 +#define step r3 + +/* -- NEON registers -- */ + +#define qZero Q0.U8 + +/*------------------------------------------------------------------------------ + + Function: h264bsdClearMbLayer + + Functional description: + + Inputs: + + Outputs: + + Returns: + +------------------------------------------------------------------------------*/ + +function h264bsdClearMbLayer, export=1 + + VMOV qZero, #0 + ADD pTmp, pMbLayer, #16 + MOV step, #32 + SUBS size, size, #64 + +loop: + VST1 {qZero}, [pMbLayer], step + SUBS size, size, #64 + VST1 {qZero}, [pTmp], step + VST1 {qZero}, [pMbLayer], step + VST1 {qZero}, [pTmp], step + BCS loop + + BX lr + +endfunction + + + diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdCountLeadingZeros.S b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdCountLeadingZeros.S new file mode 100644 index 0000000000000000000000000000000000000000..05253d0d4ae184b41bbf8999ee10e7c43dc75221 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdCountLeadingZeros.S @@ -0,0 +1,48 @@ +@ +@ Copyright (C) 2009 The Android Open Source Project +@ +@ Licensed under the Apache License, Version 2.0 (the "License"); +@ you may not use this file except in compliance with the License. +@ You may obtain a copy of the License at +@ +@ http://www.apache.org/licenses/LICENSE-2.0 +@ +@ Unless required by applicable law or agreed to in writing, software +@ distributed under the License is distributed on an "AS IS" BASIS, +@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ See the License for the specific language governing permissions and +@ limitations under the License. +@ +#include "asm_common.S" + + preserve8 + .arm + .text + + +/* Input / output registers */ +#define value r0 + +/* -- NEON registers -- */ + +/*------------------------------------------------------------------------------ + + Function: h264bsdCountLeadingZeros + + Functional description: + + Inputs: + + Outputs: + + Returns: + +------------------------------------------------------------------------------*/ + +function h264bsdCountLeadingZeros, export=1 + + CLZ value, value + BX lr + +endfunction + diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdFillRow7.S b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdFillRow7.S new file mode 100644 index 0000000000000000000000000000000000000000..6955b9a463cd9066a252bf55a50ce04e28598d6d --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdFillRow7.S @@ -0,0 +1,143 @@ +@ +@ Copyright (C) 2009 The Android Open Source Project +@ +@ Licensed under the Apache License, Version 2.0 (the "License"); +@ you may not use this file except in compliance with the License. +@ You may obtain a copy of the License at +@ +@ http://www.apache.org/licenses/LICENSE-2.0 +@ +@ Unless required by applicable law or agreed to in writing, software +@ distributed under the License is distributed on an "AS IS" BASIS, +@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ See the License for the specific language governing permissions and +@ limitations under the License. +@ + +#include "asm_common.S" + + preserve8 + + .fpu neon + .text + +/* Input / output registers */ + +#define ref r0 +#define fill r1 +#define left r2 +#define tmp2 r2 +#define center r3 +#define right r4 +#define tmp1 r5 + +/* -- NEON registers -- */ + +#define qTmp0 Q0.U8 +#define qTmp1 Q1.U8 +#define dTmp0 D0.U8 +#define dTmp1 D1.U8 +#define dTmp2 D2.U8 +#define dTmp3 D3.U8 + +/* +void h264bsdFillRow7(const u8 * ref, u8 * fill, i32 left, i32 center, + i32 right); +*/ + +function h264bsdFillRow7, export=1 + + PUSH {r4-r6,lr} + CMP left, #0 + LDR right, [sp,#0x10] + BEQ switch_center + LDRB tmp1, [ref,#0] + +loop_left: + SUBS left, left, #1 + STRB tmp1, [fill], #1 + BNE loop_left + +switch_center: + ASR tmp2,center,#2 + CMP tmp2,#9 + ADDCC pc,pc,tmp2,LSL #2 + B loop_center + B loop_center + B case_1 + B case_2 + B case_3 + B case_4 + B case_5 + B case_6 + B case_7 + B case_8 + +case_8: + VLD1 {qTmp0, qTmp1}, [ref]! + SUB center, center, #32 + VST1 {qTmp0}, [fill]! + VST1 {qTmp1}, [fill]! + B loop_center +case_7: + VLD1 {dTmp0,dTmp1,dTmp2}, [ref]! + SUB center, center, #28 + LDR tmp2, [ref], #4 + VST1 {dTmp0,dTmp1,dTmp2}, [fill]! + STR tmp2, [fill],#4 + B loop_center +case_6: + VLD1 {dTmp0,dTmp1,dTmp2}, [ref]! + SUB center, center, #24 + VST1 {dTmp0,dTmp1,dTmp2}, [fill]! + B loop_center +case_5: + VLD1 {qTmp0}, [ref]! + SUB center, center, #20 + LDR tmp2, [ref], #4 + VST1 {qTmp0}, [fill]! + STR tmp2, [fill],#4 + B loop_center +case_4: + VLD1 {qTmp0}, [ref]! + SUB center, center, #16 + VST1 {qTmp0}, [fill]! + B loop_center +case_3: + VLD1 {dTmp0}, [ref]! + SUB center, center, #12 + LDR tmp2, [ref], #4 + VST1 dTmp0, [fill]! + STR tmp2, [fill],#4 + B loop_center +case_2: + LDR tmp2, [ref],#4 + SUB center, center, #4 + STR tmp2, [fill], #4 +case_1: + LDR tmp2, [ref],#4 + SUB center, center, #4 + STR tmp2, [fill], #4 + +loop_center: + CMP center, #0 + BEQ jump + LDRB tmp2, [ref], #1 + SUB center, center, #1 + STRB tmp2, [fill], #1 + BNE loop_center +jump: + CMP right,#0 + POPEQ {r4-r6,pc} + LDRB tmp2, [ref,#-1] + +loop_right: + STRB tmp2, [fill], #1 + SUBS right, right, #1 + BNE loop_right + + POP {r4-r6,pc} + +endfunction + + diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdFlushBits.S b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdFlushBits.S new file mode 100644 index 0000000000000000000000000000000000000000..b3f3191a114a219d44a64125a14765981c7ba7d8 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdFlushBits.S @@ -0,0 +1,78 @@ +@ +@ Copyright (C) 2009 The Android Open Source Project +@ +@ Licensed under the Apache License, Version 2.0 (the "License"); +@ you may not use this file except in compliance with the License. +@ You may obtain a copy of the License at +@ +@ http://www.apache.org/licenses/LICENSE-2.0 +@ +@ Unless required by applicable law or agreed to in writing, software +@ distributed under the License is distributed on an "AS IS" BASIS, +@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ See the License for the specific language governing permissions and +@ limitations under the License. +@ + +#include "asm_common.S" + + preserve8 + + .arm + .text + +/* Input / output registers */ +#define pStrmData r0 +#define numBits r1 +#define readBits r2 +#define strmBuffSize r3 +#define pStrmBuffStart r1 +#define pStrmCurrPos r2 +#define bitPosInWord r1 + +/* Input / output registers */ + + + +/*------------------------------------------------------------------------------ + + Function: h264bsdFlushBits + + Functional description: + + Inputs: + + Outputs: + + Returns: + +------------------------------------------------------------------------------*/ +function h264bsdFlushBits, export=1 + + LDR readBits, [pStrmData, #0x10] + LDR strmBuffSize, [pStrmData, #0xC] + + ADD readBits, readBits, numBits + AND bitPosInWord, readBits, #7 + + STR readBits, [pStrmData, #0x10] + STR bitPosInWord, [pStrmData, #0x8] + + LDR pStrmBuffStart, [pStrmData, #0x0] + + CMP readBits, strmBuffSize, LSL #3 + + BHI end_of_stream + + ADD pStrmCurrPos, pStrmBuffStart, readBits, LSR #3 + STR pStrmCurrPos, [pStrmData, #0x4] + MOV r0, #0 + BX lr + +end_of_stream: + MVN r0, #0 + BX lr + +endfunction + + diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdWriteMacroblock.S b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdWriteMacroblock.S new file mode 100644 index 0000000000000000000000000000000000000000..495d560181299376e17c80bb5d70ec888bc7c3cb --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdWriteMacroblock.S @@ -0,0 +1,157 @@ +@ +@ Copyright (C) 2009 The Android Open Source Project +@ +@ Licensed under the Apache License, Version 2.0 (the "License"); +@ you may not use this file except in compliance with the License. +@ You may obtain a copy of the License at +@ +@ http://www.apache.org/licenses/LICENSE-2.0 +@ +@ Unless required by applicable law or agreed to in writing, software +@ distributed under the License is distributed on an "AS IS" BASIS, +@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ See the License for the specific language governing permissions and +@ limitations under the License. +@ + +#include "asm_common.S" + + require8 + preserve8 + + .arm + .fpu neon + .text + +/* Input / output registers */ +#define image r0 +#define data r1 +#define width r2 +#define luma r3 +#define cb r4 +#define cr r5 +#define cwidth r6 + +/* -- NEON registers -- */ + +#define qRow0 Q0.U8 +#define qRow1 Q1.U8 +#define qRow2 Q2.U8 +#define qRow3 Q3.U8 +#define qRow4 Q4.U8 +#define qRow5 Q5.U8 +#define qRow6 Q6.U8 +#define qRow7 Q7.U8 +#define qRow8 Q8.U8 +#define qRow9 Q9.U8 +#define qRow10 Q10.U8 +#define qRow11 Q11.U8 +#define qRow12 Q12.U8 +#define qRow13 Q13.U8 +#define qRow14 Q14.U8 +#define qRow15 Q15.U8 + +#define dRow0 D0.U8 +#define dRow1 D1.U8 +#define dRow2 D2.U8 +#define dRow3 D3.U8 +#define dRow4 D4.U8 +#define dRow5 D5.U8 +#define dRow6 D6.U8 +#define dRow7 D7.U8 +#define dRow8 D8.U8 +#define dRow9 D9.U8 +#define dRow10 D10.U8 +#define dRow11 D11.U8 +#define dRow12 D12.U8 +#define dRow13 D13.U8 +#define dRow14 D14.U8 +#define dRow15 D15.U8 + +/*------------------------------------------------------------------------------ + + Function: h264bsdWriteMacroblock + + Functional description: + Write one macroblock into the image. Both luma and chroma + components will be written at the same time. + + Inputs: + data pointer to macroblock data to be written, 256 values for + luma followed by 64 values for both chroma components + + Outputs: + image pointer to the image where the macroblock will be written + + Returns: + none + +------------------------------------------------------------------------------*/ + +function h264bsdWriteMacroblock, export=1 + PUSH {r4-r6,lr} + VPUSH {q4-q7} + + LDR width, [image, #4] + LDR luma, [image, #0xC] + LDR cb, [image, #0x10] + LDR cr, [image, #0x14] + + +@ Write luma + VLD1 {qRow0, qRow1}, [data]! + LSL width, width, #4 + VLD1 {qRow2, qRow3}, [data]! + LSR cwidth, width, #1 + VST1 {qRow0}, [luma,:128], width + VLD1 {qRow4, qRow5}, [data]! + VST1 {qRow1}, [luma,:128], width + VLD1 {qRow6, qRow7}, [data]! + VST1 {qRow2}, [luma,:128], width + VLD1 {qRow8, qRow9}, [data]! + VST1 {qRow3}, [luma,:128], width + VLD1 {qRow10, qRow11}, [data]! + VST1 {qRow4}, [luma,:128], width + VLD1 {qRow12, qRow13}, [data]! + VST1 {qRow5}, [luma,:128], width + VLD1 {qRow14, qRow15}, [data]! + VST1 {qRow6}, [luma,:128], width + + VLD1 {qRow0, qRow1}, [data]! ;//cb rows 0,1,2,3 + VST1 {qRow7}, [luma,:128], width + VLD1 {qRow2, qRow3}, [data]! ;//cb rows 4,5,6,7 + VST1 {qRow8}, [luma,:128], width + VLD1 {qRow4, qRow5}, [data]! ;//cr rows 0,1,2,3 + VST1 {qRow9}, [luma,:128], width + VLD1 {qRow6, qRow7}, [data]! ;//cr rows 4,5,6,7 + VST1 {qRow10}, [luma,:128], width + VST1 {dRow0}, [cb,:64], cwidth + VST1 {dRow8}, [cr,:64], cwidth + VST1 {qRow11}, [luma,:128], width + VST1 {dRow1}, [cb,:64], cwidth + VST1 {dRow9}, [cr,:64], cwidth + VST1 {qRow12}, [luma,:128], width + VST1 {dRow2}, [cb,:64], cwidth + VST1 {dRow10}, [cr,:64], cwidth + VST1 {qRow13}, [luma,:128], width + VST1 {dRow3}, [cb,:64], cwidth + VST1 {dRow11}, [cr,:64], cwidth + VST1 {qRow14}, [luma,:128], width + VST1 {dRow4}, [cb,:64], cwidth + VST1 {dRow12}, [cr,:64], cwidth + VST1 {qRow15}, [luma] + VST1 {dRow5}, [cb,:64], cwidth + VST1 {dRow13}, [cr,:64], cwidth + VST1 {dRow6}, [cb,:64], cwidth + VST1 {dRow14}, [cr,:64], cwidth + VST1 {dRow7}, [cb,:64] + VST1 {dRow15}, [cr,:64] + + VPOP {q4-q7} + POP {r4-r6,pc} +@ BX lr + + .endfunc + + + diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_byte_stream.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_byte_stream.c new file mode 100755 index 0000000000000000000000000000000000000000..db77f8c6723fc200b8b814dde8e1c6318a593f98 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_byte_stream.c @@ -0,0 +1,237 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. External compiler flags + 3. Module defines + 4. Local function prototypes + 5. Functions + ExtractNalUnit + +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#include "h264bsd_byte_stream.h" +#include "h264bsd_util.h" + +/*------------------------------------------------------------------------------ + 2. External compiler flags +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- + 3. Module defines +------------------------------------------------------------------------------*/ + +#define BYTE_STREAM_ERROR 0xFFFFFFFF + +/*------------------------------------------------------------------------------ + 4. Local function prototypes +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + + Function name: ExtractNalUnit + + Functional description: + Extracts one NAL unit from the byte stream buffer. Removes + emulation prevention bytes if present. The original stream buffer + is used directly and is therefore modified if emulation prevention + bytes are present in the stream. + + Stream buffer is assumed to contain either exactly one NAL unit + and nothing else, or one or more NAL units embedded in byte + stream format described in the Annex B of the standard. Function + detects which one is used based on the first bytes in the buffer. + + Inputs: + pByteStream pointer to byte stream buffer + len length of the stream buffer (in bytes) + + Outputs: + pStrmData stream information is stored here + readBytes number of bytes "consumed" from the stream buffer + + Returns: + HANTRO_OK success + HANTRO_NOK error in byte stream + +------------------------------------------------------------------------------*/ + +u32 h264bsdExtractNalUnit(u8 *pByteStream, u32 len, strmData_t *pStrmData, + u32 *readBytes) +{ + +/* Variables */ + + u32 i, tmp; + u32 byteCount,initByteCount; + u32 zeroCount; + u8 byte; + u32 hasEmulation = HANTRO_FALSE; + u32 invalidStream = HANTRO_FALSE; + u8 *readPtr, *writePtr; + +/* Code */ + + ASSERT(pByteStream); + ASSERT(len); + ASSERT(len < BYTE_STREAM_ERROR); + ASSERT(pStrmData); + + /* byte stream format if starts with 0x000001 or 0x000000 */ + if (len > 3 && pByteStream[0] == 0x00 && pByteStream[1] == 0x00 && + (pByteStream[2]&0xFE) == 0x00) + { + /* search for NAL unit start point, i.e. point after first start code + * prefix in the stream */ + zeroCount = byteCount = 2; + readPtr = pByteStream + 2; + /*lint -e(716) while(1) used consciously */ + while (1) + { + byte = *readPtr++; + byteCount++; + + if (byteCount == len) + { + /* no start code prefix found -> error */ + *readBytes = len; + return(HANTRO_NOK); + } + + if (!byte) + zeroCount++; + else if ((byte == 0x01) && (zeroCount >= 2)) + break; + else + zeroCount = 0; + } + + initByteCount = byteCount; + + /* determine size of the NAL unit. Search for next start code prefix + * or end of stream and ignore possible trailing zero bytes */ + zeroCount = 0; + /*lint -e(716) while(1) used consciously */ + while (1) + { + byte = *readPtr++; + byteCount++; + if (!byte) + zeroCount++; + + if ( (byte == 0x03) && (zeroCount == 2) ) + { + hasEmulation = HANTRO_TRUE; + } + + if ( (byte == 0x01) && (zeroCount >= 2 ) ) + { + pStrmData->strmBuffSize = + byteCount - initByteCount - zeroCount - 1; + zeroCount -= MIN(zeroCount, 3); + break; + } + else if (byte) + { + if (zeroCount >= 3) + invalidStream = HANTRO_TRUE; + zeroCount = 0; + } + + if (byteCount == len) + { + pStrmData->strmBuffSize = byteCount - initByteCount - zeroCount; + break; + } + + } + } + /* separate NAL units as input -> just set stream params */ + else + { + initByteCount = 0; + zeroCount = 0; + pStrmData->strmBuffSize = len; + hasEmulation = HANTRO_TRUE; + } + + pStrmData->pStrmBuffStart = pByteStream + initByteCount; + pStrmData->pStrmCurrPos = pStrmData->pStrmBuffStart; + pStrmData->bitPosInWord = 0; + pStrmData->strmBuffReadBits = 0; + + /* return number of bytes "consumed" */ + *readBytes = pStrmData->strmBuffSize + initByteCount + zeroCount; + + if (invalidStream) + { + return(HANTRO_NOK); + } + + /* remove emulation prevention bytes before rbsp processing */ + if (hasEmulation) + { + tmp = pStrmData->strmBuffSize; + readPtr = writePtr = pStrmData->pStrmBuffStart; + zeroCount = 0; + for (i = tmp; i--;) + { + if ((zeroCount == 2) && (*readPtr == 0x03)) + { + /* emulation prevention byte shall be followed by one of the + * following bytes: 0x00, 0x01, 0x02, 0x03. This implies that + * emulation prevention 0x03 byte shall not be the last byte + * of the stream. */ + if ( (i == 0) || (*(readPtr+1) > 0x03) ) + return(HANTRO_NOK); + + /* do not write emulation prevention byte */ + readPtr++; + zeroCount = 0; + } + else + { + /* NAL unit shall not contain byte sequences 0x000000, + * 0x000001 or 0x000002 */ + if ( (zeroCount == 2) && (*readPtr <= 0x02) ) + return(HANTRO_NOK); + + if (*readPtr == 0) + zeroCount++; + else + zeroCount = 0; + + *writePtr++ = *readPtr++; + } + } + + /* (readPtr - writePtr) indicates number of "removed" emulation + * prevention bytes -> subtract from stream buffer size */ + pStrmData->strmBuffSize -= (u32)(readPtr - writePtr); + } + + return(HANTRO_OK); + +} + diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_byte_stream.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_byte_stream.h new file mode 100755 index 0000000000000000000000000000000000000000..36aec76e9a679bf82beac3e1ae25fef3a606844c --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_byte_stream.h @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. Module defines + 3. Data types + 4. Function prototypes + +------------------------------------------------------------------------------*/ + +#ifndef H264SWDEC_BYTE_STREAM_H +#define H264SWDEC_BYTE_STREAM_H + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#include "basetype.h" +#include "h264bsd_stream.h" + +/*------------------------------------------------------------------------------ + 2. Module defines +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 3. Data types +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 4. Function prototypes +------------------------------------------------------------------------------*/ + +u32 h264bsdExtractNalUnit(u8 *pByteStream, u32 len, strmData_t *pStrmData, + u32 *readBytes); + +#endif /* #ifdef H264SWDEC_BYTE_STREAM_H */ + diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_cavlc.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_cavlc.c new file mode 100755 index 0000000000000000000000000000000000000000..91d78bd466c2cacd4ad1b4c39e818eec1133f1c5 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_cavlc.c @@ -0,0 +1,916 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. External compiler flags + 3. Module defines + 4. Local function prototypes + 5. Functions + DecodeCoeffToken + DecodeLevelPrefix + DecodeTotalZeros + DecodeRunBefore + DecodeResidualBlockCavlc + +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#include "h264bsd_cavlc.h" +#include "h264bsd_util.h" + +/*------------------------------------------------------------------------------ + 2. External compiler flags +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- + 3. Module defines +------------------------------------------------------------------------------*/ + +/* Following descriptions use term "information field" to represent combination + * of certain decoded symbol value and the length of the corresponding variable + * length code word. For example, total_zeros information field consists of + * 4 bits symbol value (bits [4,7]) along with four bits to represent length + * of the VLC code word (bits [0,3]) */ + +/* macro to obtain length of the coeff token information field, bits [0,4] */ +#define LENGTH_TC(vlc) ((vlc) & 0x1F) +/* macro to obtain length of the other information fields, bits [0,3] */ +#define LENGTH(vlc) ((vlc) & 0xF) +/* macro to obtain code word from the information fields, bits [4,7] */ +#define INFO(vlc) (((vlc) >> 4) & 0xF) /* 4 MSB bits contain information */ +/* macro to obtain trailing ones from the coeff token information word, + * bits [5,10] */ +#define TRAILING_ONES(coeffToken) ((coeffToken>>5) & 0x3F) +/* macro to obtain total coeff from the coeff token information word, + * bits [11,15] */ +#define TOTAL_COEFF(coeffToken) (((coeffToken) >> 11) & 0x1F) + +#define VLC_NOT_FOUND 0xFFFFFFFEU + +/* VLC tables for coeff_token. Because of long codes (max. 16 bits) some of the + * tables have been splitted into multiple separate tables. Each array/table + * element has the following structure: + * [5 bits for tot.coeff.] [6 bits for tr.ones] [5 bits for VLC length] + * If there is a 0x0000 value, it means that there is not corresponding VLC + * codeword for that index. */ + +/* VLC lengths up to 6 bits, 0 <= nC < 2 */ +static const u16 coeffToken0_0[32] = { + 0x0000,0x0000,0x0000,0x2066,0x1026,0x0806,0x1865,0x1865, + 0x1043,0x1043,0x1043,0x1043,0x1043,0x1043,0x1043,0x1043, + 0x0822,0x0822,0x0822,0x0822,0x0822,0x0822,0x0822,0x0822, + 0x0822,0x0822,0x0822,0x0822,0x0822,0x0822,0x0822,0x0822}; + +/* VLC lengths up to 10 bits, 0 <= nC < 2 */ +static const u16 coeffToken0_1[48] = { + 0x0000,0x0000,0x0000,0x0000,0x406a,0x304a,0x282a,0x200a, + 0x3869,0x3869,0x2849,0x2849,0x2029,0x2029,0x1809,0x1809, + 0x3068,0x3068,0x3068,0x3068,0x2048,0x2048,0x2048,0x2048, + 0x1828,0x1828,0x1828,0x1828,0x1008,0x1008,0x1008,0x1008, + 0x2867,0x2867,0x2867,0x2867,0x2867,0x2867,0x2867,0x2867, + 0x1847,0x1847,0x1847,0x1847,0x1847,0x1847,0x1847,0x1847}; + +/* VLC lengths up to 14 bits, 0 <= nC < 2 */ +static const u16 coeffToken0_2[56] = { + 0x606e,0x584e,0x502e,0x500e,0x586e,0x504e,0x482e,0x480e, + 0x400d,0x400d,0x484d,0x484d,0x402d,0x402d,0x380d,0x380d, + 0x506d,0x506d,0x404d,0x404d,0x382d,0x382d,0x300d,0x300d, + 0x486b,0x486b,0x486b,0x486b,0x486b,0x486b,0x486b,0x486b, + 0x384b,0x384b,0x384b,0x384b,0x384b,0x384b,0x384b,0x384b, + 0x302b,0x302b,0x302b,0x302b,0x302b,0x302b,0x302b,0x302b, + 0x280b,0x280b,0x280b,0x280b,0x280b,0x280b,0x280b,0x280b}; + +/* VLC lengths up to 16 bits, 0 <= nC < 2 */ +static const u16 coeffToken0_3[32] = { + 0x0000,0x0000,0x682f,0x682f,0x8010,0x8050,0x8030,0x7810, + 0x8070,0x7850,0x7830,0x7010,0x7870,0x7050,0x7030,0x6810, + 0x706f,0x706f,0x684f,0x684f,0x602f,0x602f,0x600f,0x600f, + 0x686f,0x686f,0x604f,0x604f,0x582f,0x582f,0x580f,0x580f}; + +/* VLC lengths up to 6 bits, 2 <= nC < 4 */ +static const u16 coeffToken2_0[32] = { + 0x0000,0x0000,0x0000,0x0000,0x3866,0x2046,0x2026,0x1006, + 0x3066,0x1846,0x1826,0x0806,0x2865,0x2865,0x1025,0x1025, + 0x2064,0x2064,0x2064,0x2064,0x1864,0x1864,0x1864,0x1864, + 0x1043,0x1043,0x1043,0x1043,0x1043,0x1043,0x1043,0x1043}; + +/* VLC lengths up to 9 bits, 2 <= nC < 4 */ +static const u16 coeffToken2_1[32] = { + 0x0000,0x0000,0x0000,0x0000,0x4869,0x3849,0x3829,0x3009, + 0x2808,0x2808,0x3048,0x3048,0x3028,0x3028,0x2008,0x2008, + 0x4067,0x4067,0x4067,0x4067,0x2847,0x2847,0x2847,0x2847, + 0x2827,0x2827,0x2827,0x2827,0x1807,0x1807,0x1807,0x1807}; + +/* VLC lengths up to 14 bits, 2 <= nC < 4 */ +static const u16 coeffToken2_2[128] = { + 0x0000,0x0000,0x786d,0x786d,0x806e,0x804e,0x802e,0x800e, + 0x782e,0x780e,0x784e,0x702e,0x704d,0x704d,0x700d,0x700d, + 0x706d,0x706d,0x684d,0x684d,0x682d,0x682d,0x680d,0x680d, + 0x686d,0x686d,0x604d,0x604d,0x602d,0x602d,0x600d,0x600d, + 0x580c,0x580c,0x580c,0x580c,0x584c,0x584c,0x584c,0x584c, + 0x582c,0x582c,0x582c,0x582c,0x500c,0x500c,0x500c,0x500c, + 0x606c,0x606c,0x606c,0x606c,0x504c,0x504c,0x504c,0x504c, + 0x502c,0x502c,0x502c,0x502c,0x480c,0x480c,0x480c,0x480c, + 0x586b,0x586b,0x586b,0x586b,0x586b,0x586b,0x586b,0x586b, + 0x484b,0x484b,0x484b,0x484b,0x484b,0x484b,0x484b,0x484b, + 0x482b,0x482b,0x482b,0x482b,0x482b,0x482b,0x482b,0x482b, + 0x400b,0x400b,0x400b,0x400b,0x400b,0x400b,0x400b,0x400b, + 0x506b,0x506b,0x506b,0x506b,0x506b,0x506b,0x506b,0x506b, + 0x404b,0x404b,0x404b,0x404b,0x404b,0x404b,0x404b,0x404b, + 0x402b,0x402b,0x402b,0x402b,0x402b,0x402b,0x402b,0x402b, + 0x380b,0x380b,0x380b,0x380b,0x380b,0x380b,0x380b,0x380b}; + +/* VLC lengths up to 6 bits, 4 <= nC < 8 */ +static const u16 coeffToken4_0[64] = { + 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, + 0x1806,0x3846,0x3826,0x1006,0x4866,0x3046,0x3026,0x0806, + 0x2825,0x2825,0x2845,0x2845,0x2025,0x2025,0x2045,0x2045, + 0x1825,0x1825,0x4065,0x4065,0x1845,0x1845,0x1025,0x1025, + 0x3864,0x3864,0x3864,0x3864,0x3064,0x3064,0x3064,0x3064, + 0x2864,0x2864,0x2864,0x2864,0x2064,0x2064,0x2064,0x2064, + 0x1864,0x1864,0x1864,0x1864,0x1044,0x1044,0x1044,0x1044, + 0x0824,0x0824,0x0824,0x0824,0x0004,0x0004,0x0004,0x0004}; + +/* VLC lengths up to 10 bits, 4 <= nC < 8 */ +static const u16 coeffToken4_1[128] = { + 0x0000,0x800a,0x806a,0x804a,0x802a,0x780a,0x786a,0x784a, + 0x782a,0x700a,0x706a,0x704a,0x702a,0x680a,0x6829,0x6829, + 0x6009,0x6009,0x6849,0x6849,0x6029,0x6029,0x5809,0x5809, + 0x6869,0x6869,0x6049,0x6049,0x5829,0x5829,0x5009,0x5009, + 0x6068,0x6068,0x6068,0x6068,0x5848,0x5848,0x5848,0x5848, + 0x5028,0x5028,0x5028,0x5028,0x4808,0x4808,0x4808,0x4808, + 0x5868,0x5868,0x5868,0x5868,0x5048,0x5048,0x5048,0x5048, + 0x4828,0x4828,0x4828,0x4828,0x4008,0x4008,0x4008,0x4008, + 0x3807,0x3807,0x3807,0x3807,0x3807,0x3807,0x3807,0x3807, + 0x3007,0x3007,0x3007,0x3007,0x3007,0x3007,0x3007,0x3007, + 0x4847,0x4847,0x4847,0x4847,0x4847,0x4847,0x4847,0x4847, + 0x2807,0x2807,0x2807,0x2807,0x2807,0x2807,0x2807,0x2807, + 0x5067,0x5067,0x5067,0x5067,0x5067,0x5067,0x5067,0x5067, + 0x4047,0x4047,0x4047,0x4047,0x4047,0x4047,0x4047,0x4047, + 0x4027,0x4027,0x4027,0x4027,0x4027,0x4027,0x4027,0x4027, + 0x2007,0x2007,0x2007,0x2007,0x2007,0x2007,0x2007,0x2007}; + +/* fixed 6 bit length VLC, nC <= 8 */ +static const u16 coeffToken8[64] = { + 0x0806,0x0826,0x0000,0x0006,0x1006,0x1026,0x1046,0x0000, + 0x1806,0x1826,0x1846,0x1866,0x2006,0x2026,0x2046,0x2066, + 0x2806,0x2826,0x2846,0x2866,0x3006,0x3026,0x3046,0x3066, + 0x3806,0x3826,0x3846,0x3866,0x4006,0x4026,0x4046,0x4066, + 0x4806,0x4826,0x4846,0x4866,0x5006,0x5026,0x5046,0x5066, + 0x5806,0x5826,0x5846,0x5866,0x6006,0x6026,0x6046,0x6066, + 0x6806,0x6826,0x6846,0x6866,0x7006,0x7026,0x7046,0x7066, + 0x7806,0x7826,0x7846,0x7866,0x8006,0x8026,0x8046,0x8066}; + +/* VLC lengths up to 3 bits, nC == -1 */ +static const u16 coeffTokenMinus1_0[8] = { + 0x0000,0x1043,0x0002,0x0002,0x0821,0x0821,0x0821,0x0821}; + +/* VLC lengths up to 8 bits, nC == -1 */ +static const u16 coeffTokenMinus1_1[32] = { + 0x2067,0x2067,0x2048,0x2028,0x1847,0x1847,0x1827,0x1827, + 0x2006,0x2006,0x2006,0x2006,0x1806,0x1806,0x1806,0x1806, + 0x1006,0x1006,0x1006,0x1006,0x1866,0x1866,0x1866,0x1866, + 0x1026,0x1026,0x1026,0x1026,0x0806,0x0806,0x0806,0x0806}; + +/* VLC tables for total_zeros. One table containing longer code, totalZeros_1, + * has been broken into two separate tables. Table elements have the + * following structure: + * [4 bits for info] [4 bits for VLC length] */ + +/* VLC lengths up to 5 bits */ +static const u8 totalZeros_1_0[32] = { + 0x00,0x00,0x65,0x55,0x44,0x44,0x34,0x34, + 0x23,0x23,0x23,0x23,0x13,0x13,0x13,0x13, + 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01, + 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01}; + +/* VLC lengths up to 9 bits */ +static const u8 totalZeros_1_1[32] = { + 0x00,0xf9,0xe9,0xd9,0xc8,0xc8,0xb8,0xb8, + 0xa7,0xa7,0xa7,0xa7,0x97,0x97,0x97,0x97, + 0x86,0x86,0x86,0x86,0x86,0x86,0x86,0x86, + 0x76,0x76,0x76,0x76,0x76,0x76,0x76,0x76}; + +static const u8 totalZeros_2[64] = { + 0xe6,0xd6,0xc6,0xb6,0xa5,0xa5,0x95,0x95, + 0x84,0x84,0x84,0x84,0x74,0x74,0x74,0x74, + 0x64,0x64,0x64,0x64,0x54,0x54,0x54,0x54, + 0x43,0x43,0x43,0x43,0x43,0x43,0x43,0x43, + 0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33, + 0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23, + 0x13,0x13,0x13,0x13,0x13,0x13,0x13,0x13, + 0x03,0x03,0x03,0x03,0x03,0x03,0x03,0x03}; + +static const u8 totalZeros_3[64] = { + 0xd6,0xb6,0xc5,0xc5,0xa5,0xa5,0x95,0x95, + 0x84,0x84,0x84,0x84,0x54,0x54,0x54,0x54, + 0x44,0x44,0x44,0x44,0x04,0x04,0x04,0x04, + 0x73,0x73,0x73,0x73,0x73,0x73,0x73,0x73, + 0x63,0x63,0x63,0x63,0x63,0x63,0x63,0x63, + 0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33, + 0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23, + 0x13,0x13,0x13,0x13,0x13,0x13,0x13,0x13}; + +static const u8 totalZeros_4[32] = { + 0xc5,0xb5,0xa5,0x05,0x94,0x94,0x74,0x74, + 0x34,0x34,0x24,0x24,0x83,0x83,0x83,0x83, + 0x63,0x63,0x63,0x63,0x53,0x53,0x53,0x53, + 0x43,0x43,0x43,0x43,0x13,0x13,0x13,0x13}; + +static const u8 totalZeros_5[32] = { + 0xb5,0x95,0xa4,0xa4,0x84,0x84,0x24,0x24, + 0x14,0x14,0x04,0x04,0x73,0x73,0x73,0x73, + 0x63,0x63,0x63,0x63,0x53,0x53,0x53,0x53, + 0x43,0x43,0x43,0x43,0x33,0x33,0x33,0x33}; + +static const u8 totalZeros_6[64] = { + 0xa6,0x06,0x15,0x15,0x84,0x84,0x84,0x84, + 0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, + 0x73,0x73,0x73,0x73,0x73,0x73,0x73,0x73, + 0x63,0x63,0x63,0x63,0x63,0x63,0x63,0x63, + 0x53,0x53,0x53,0x53,0x53,0x53,0x53,0x53, + 0x43,0x43,0x43,0x43,0x43,0x43,0x43,0x43, + 0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33, + 0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23}; + +static const u8 totalZeros_7[64] = { + 0x96,0x06,0x15,0x15,0x74,0x74,0x74,0x74, + 0x83,0x83,0x83,0x83,0x83,0x83,0x83,0x83, + 0x63,0x63,0x63,0x63,0x63,0x63,0x63,0x63, + 0x43,0x43,0x43,0x43,0x43,0x43,0x43,0x43, + 0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33, + 0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23, + 0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52, + 0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52}; + +static const u8 totalZeros_8[64] = { + 0x86,0x06,0x25,0x25,0x14,0x14,0x14,0x14, + 0x73,0x73,0x73,0x73,0x73,0x73,0x73,0x73, + 0x63,0x63,0x63,0x63,0x63,0x63,0x63,0x63, + 0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33, + 0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52, + 0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52, + 0x42,0x42,0x42,0x42,0x42,0x42,0x42,0x42, + 0x42,0x42,0x42,0x42,0x42,0x42,0x42,0x42}; + +static const u8 totalZeros_9[64] = { + 0x16,0x06,0x75,0x75,0x24,0x24,0x24,0x24, + 0x53,0x53,0x53,0x53,0x53,0x53,0x53,0x53, + 0x62,0x62,0x62,0x62,0x62,0x62,0x62,0x62, + 0x62,0x62,0x62,0x62,0x62,0x62,0x62,0x62, + 0x42,0x42,0x42,0x42,0x42,0x42,0x42,0x42, + 0x42,0x42,0x42,0x42,0x42,0x42,0x42,0x42, + 0x32,0x32,0x32,0x32,0x32,0x32,0x32,0x32, + 0x32,0x32,0x32,0x32,0x32,0x32,0x32,0x32}; + +static const u8 totalZeros_10[32] = { + 0x15,0x05,0x64,0x64,0x23,0x23,0x23,0x23, + 0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52, + 0x42,0x42,0x42,0x42,0x42,0x42,0x42,0x42, + 0x32,0x32,0x32,0x32,0x32,0x32,0x32,0x32}; + +static const u8 totalZeros_11[16] = { + 0x04,0x14,0x23,0x23,0x33,0x33,0x53,0x53, + 0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41}; + +static const u8 totalZeros_12[16] = { + 0x04,0x14,0x43,0x43,0x22,0x22,0x22,0x22, + 0x31,0x31,0x31,0x31,0x31,0x31,0x31,0x31}; + +static const u8 totalZeros_13[8] = {0x03,0x13,0x32,0x32,0x21,0x21,0x21,0x21}; + +static const u8 totalZeros_14[4] = {0x02,0x12,0x21,0x21}; + +/* VLC tables for run_before. Table elements have the following structure: + * [4 bits for info] [4bits for VLC length] + */ + +static const u8 runBefore_6[8] = {0x13,0x23,0x43,0x33,0x63,0x53,0x02,0x02}; + +static const u8 runBefore_5[8] = {0x53,0x43,0x33,0x23,0x12,0x12,0x02,0x02}; + +static const u8 runBefore_4[8] = {0x43,0x33,0x22,0x22,0x12,0x12,0x02,0x02}; + +static const u8 runBefore_3[4] = {0x32,0x22,0x12,0x02}; + +static const u8 runBefore_2[4] = {0x22,0x12,0x01,0x01}; + +static const u8 runBefore_1[2] = {0x11,0x01}; + +/* following four macros are used to handle stream buffer "cache" in the CAVLC + * decoding function */ + +/* macro to initialize stream buffer cache, fills the buffer (32 bits) */ +#define BUFFER_INIT(value, bits) \ +{ \ + bits = 32; \ + value = h264bsdShowBits32(pStrmData); \ +} + +/* macro to read numBits bits from the buffer, bits will be written to + * outVal. Refills the buffer if not enough bits left */ +#define BUFFER_SHOW(value, bits, outVal, numBits) \ +{ \ + if (bits < (numBits)) \ + { \ + if(h264bsdFlushBits(pStrmData,32-bits) == END_OF_STREAM) \ + return(HANTRO_NOK); \ + value = h264bsdShowBits32(pStrmData); \ + bits = 32; \ + } \ + (outVal) = value >> (32 - (numBits)); \ +} + +/* macro to flush numBits bits from the buffer */ +#define BUFFER_FLUSH(value, bits, numBits) \ +{ \ + value <<= (numBits); \ + bits -= (numBits); \ +} + +/* macro to read and flush numBits bits from the buffer, bits will be written + * to outVal. Refills the buffer if not enough bits left */ +#define BUFFER_GET(value, bits, outVal, numBits) \ +{ \ + if (bits < (numBits)) \ + { \ + if(h264bsdFlushBits(pStrmData,32-bits) == END_OF_STREAM) \ + return(HANTRO_NOK); \ + value = h264bsdShowBits32(pStrmData); \ + bits = 32; \ + } \ + (outVal) = value >> (32 - (numBits)); \ + value <<= (numBits); \ + bits -= (numBits); \ +} + +/*------------------------------------------------------------------------------ + 4. Local function prototypes +------------------------------------------------------------------------------*/ + +static u32 DecodeCoeffToken(u32 bits, u32 nc); + +static u32 DecodeLevelPrefix(u32 bits); + +static u32 DecodeTotalZeros(u32 bits, u32 totalCoeff, u32 isChromaDC); + +static u32 DecodeRunBefore(u32 bits,u32 zerosLeft); + +/*------------------------------------------------------------------------------ + + Function: DecodeCoeffToken + + Functional description: + Function to decode coeff_token information field from the stream. + + Inputs: + u32 bits next 16 stream bits + u32 nc nC, see standard for details + + Outputs: + u32 information field (11 bits for value, 5 bits for length) + +------------------------------------------------------------------------------*/ + +u32 DecodeCoeffToken(u32 bits, u32 nc) +{ + +/* Variables */ + + u32 value; + +/* Code */ + + /* standard defines that nc for decoding of chroma dc coefficients is -1, + * represented by u32 here -> -1 maps to 2^32 - 1 */ + ASSERT(nc <= 16 || nc == (u32)(-1)); + + if (nc < 2) + { + if (bits >= 0x8000) + { + value = 0x0001; + } + else if (bits >= 0x0C00) + value = coeffToken0_0[bits >> 10]; + else if (bits >= 0x0100) + value = coeffToken0_1[bits >> 6]; + else if (bits >= 0x0020) + value = coeffToken0_2[(bits>>2)-8]; + else + value = coeffToken0_3[bits]; + } + else if (nc < 4) + { + if (bits >= 0x8000) + { + value = bits & 0x4000 ? 0x0002 : 0x0822; + } + else if (bits >= 0x1000) + value = coeffToken2_0[bits >> 10]; + else if (bits >= 0x0200) + value = coeffToken2_1[bits >> 7]; + else + value = coeffToken2_2[bits>>2]; + } + else if (nc < 8) + { + value = coeffToken4_0[bits >> 10]; + if (!value) + value = coeffToken4_1[bits>>6]; + } + else if (nc <= 16) + { + value = coeffToken8[bits>>10]; + } + else + { + value = coeffTokenMinus1_0[bits >> 13]; + if (!value) + value = coeffTokenMinus1_1[bits>>8]; + } + + return(value); + +} + +/*------------------------------------------------------------------------------ + + Function: DecodeLevelPrefix + + Functional description: + Function to decode level_prefix information field from the stream + + Inputs: + u32 bits next 16 stream bits + + Outputs: + u32 level_prefix information field or VLC_NOT_FOUND + +------------------------------------------------------------------------------*/ + +u32 DecodeLevelPrefix(u32 bits) +{ + +/* Variables */ + + u32 numZeros; + +/* Code */ + + if (bits >= 0x8000) + numZeros = 0; + else if (bits >= 0x4000) + numZeros = 1; + else if (bits >= 0x2000) + numZeros = 2; + else if (bits >= 0x1000) + numZeros = 3; + else if (bits >= 0x0800) + numZeros = 4; + else if (bits >= 0x0400) + numZeros = 5; + else if (bits >= 0x0200) + numZeros = 6; + else if (bits >= 0x0100) + numZeros = 7; + else if (bits >= 0x0080) + numZeros = 8; + else if (bits >= 0x0040) + numZeros = 9; + else if (bits >= 0x0020) + numZeros = 10; + else if (bits >= 0x0010) + numZeros = 11; + else if (bits >= 0x0008) + numZeros = 12; + else if (bits >= 0x0004) + numZeros = 13; + else if (bits >= 0x0002) + numZeros = 14; + else if (bits >= 0x0001) + numZeros = 15; + else /* more than 15 zeros encountered which is an error */ + return(VLC_NOT_FOUND); + + return(numZeros); + +} + +/*------------------------------------------------------------------------------ + + Function: DecodeTotalZeros + + Functional description: + Function to decode total_zeros information field from the stream + + Inputs: + u32 bits next 9 stream bits + u32 totalCoeff total number of coefficients for the block + being decoded + u32 isChromaDC flag to indicate chroma DC block + + Outputs: + u32 information field (4 bits value, 4 bits length) + +------------------------------------------------------------------------------*/ + +u32 DecodeTotalZeros(u32 bits, u32 totalCoeff, u32 isChromaDC) +{ + +/* Variables */ + + u32 value = 0x0; + +/* Code */ + + ASSERT(totalCoeff); + + if (!isChromaDC) + { + ASSERT(totalCoeff < 16); + switch (totalCoeff) + { + case 1: + value = totalZeros_1_0[bits >> 4]; + if (!value) + value = totalZeros_1_1[bits]; + break; + + case 2: + value = totalZeros_2[bits >> 3]; + break; + + case 3: + value = totalZeros_3[bits >> 3]; + break; + + case 4: + value = totalZeros_4[bits >> 4]; + break; + + case 5: + value = totalZeros_5[bits >> 4]; + break; + + case 6: + value = totalZeros_6[bits >> 3]; + break; + + case 7: + value = totalZeros_7[bits >> 3]; + break; + + case 8: + value = totalZeros_8[bits >> 3]; + break; + + case 9: + value = totalZeros_9[bits >> 3]; + break; + + case 10: + value = totalZeros_10[bits >> 4]; + break; + + case 11: + value = totalZeros_11[bits >> 5]; + break; + + case 12: + value = totalZeros_12[bits >> 5]; + break; + + case 13: + value = totalZeros_13[bits >> 6]; + break; + + case 14: + value = totalZeros_14[bits >> 7]; + break; + + default: /* case 15 */ + value = (bits >> 8) ? 0x11 : 0x01; + break; + } + } + else + { + ASSERT(totalCoeff < 4); + bits >>= 6; + if (bits > 3) + value = 0x01; + else + { + if (totalCoeff == 3) + value = 0x11; + else if (bits > 1) + { + value = 0x12; + } + else if (totalCoeff == 2) + value = 0x22; + else if (bits) + value = 0x23; + else + value = 0x33; + } + } + + return(value); + +} + +/*------------------------------------------------------------------------------ + + Function: DecodeRunBefore + + Functional description: + Function to decode run_before information field from the stream + + Inputs: + u32 bits next 11 stream bits + u32 zerosLeft number of zeros left for the current block + + Outputs: + u32 information field (4 bits value, 4 bits length) + +------------------------------------------------------------------------------*/ + +u32 DecodeRunBefore(u32 bits, u32 zerosLeft) +{ + +/* Variables */ + + u32 value = 0x0; + +/* Code */ + + switch (zerosLeft) + { + case 1: + value = runBefore_1[bits>>10]; + break; + + case 2: + value = runBefore_2[bits>>9]; + break; + + case 3: + value = runBefore_3[bits>>9]; + break; + + case 4: + value = runBefore_4[bits>>8]; + break; + + case 5: + value = runBefore_5[bits>>8]; + break; + + case 6: + value = runBefore_6[bits>>8]; + break; + + default: + if (bits >= 0x100) + value = ((7-(bits>>8))<<4)+0x3; + else if (bits >= 0x80) + value = 0x74; + else if (bits >= 0x40) + value = 0x85; + else if (bits >= 0x20) + value = 0x96; + else if (bits >= 0x10) + value = 0xa7; + else if (bits >= 0x8) + value = 0xb8; + else if (bits >= 0x4) + value = 0xc9; + else if (bits >= 0x2) + value = 0xdA; + else if (bits) + value = 0xeB; + if (INFO(value) > zerosLeft) + value = 0; + break; + } + + return(value); + +} + +/*------------------------------------------------------------------------------ + + Function: DecodeResidualBlockCavlc + + Functional description: + Function to decode one CAVLC coded block. This corresponds to + syntax elements residual_block_cavlc() in the standard. + + Inputs: + pStrmData pointer to stream data structure + nc nC value + maxNumCoeff maximum number of residual coefficients + + Outputs: + coeffLevel stores decoded coefficient levels + + Returns: + numCoeffs on bits [4,11] if successful + coeffMap on bits [16,31] if successful, this is bit map + where each bit indicates if the corresponding + coefficient was zero (0) or non-zero (1) + HANTRO_NOK end of stream or error in stream + +------------------------------------------------------------------------------*/ + +u32 h264bsdDecodeResidualBlockCavlc( + strmData_t *pStrmData, + i32 *coeffLevel, + i32 nc, + u32 maxNumCoeff) +{ + +/* Variables */ + + u32 i, tmp, totalCoeff, trailingOnes, suffixLength, levelPrefix; + u32 levelSuffix, zerosLeft, bit; + i32 level[16]; + u32 run[16]; + /* stream "cache" */ + u32 bufferValue; + u32 bufferBits; + +/* Code */ + + ASSERT(pStrmData); + ASSERT(coeffLevel); + ASSERT(nc > -2); + ASSERT(maxNumCoeff == 4 || maxNumCoeff == 15 || maxNumCoeff == 16); + ASSERT(VLC_NOT_FOUND != END_OF_STREAM); + + /* assume that coeffLevel array has been "cleaned" by caller */ + + BUFFER_INIT(bufferValue, bufferBits); + + /*lint -e774 disable lint warning on always false comparison */ + BUFFER_SHOW(bufferValue, bufferBits, bit, 16); + /*lint +e774 */ + tmp = DecodeCoeffToken(bit, (u32)nc); + if (!tmp) + return(HANTRO_NOK); + BUFFER_FLUSH(bufferValue, bufferBits, LENGTH_TC(tmp)); + + totalCoeff = TOTAL_COEFF(tmp); + if (totalCoeff > maxNumCoeff) + return(HANTRO_NOK); + trailingOnes = TRAILING_ONES(tmp); + + if (totalCoeff != 0) + { + i = 0; + /* nonzero coefficients: +/- 1 */ + if (trailingOnes) + { + BUFFER_GET(bufferValue, bufferBits, bit, trailingOnes); + tmp = 1 << (trailingOnes - 1); + for (; tmp; i++) + { + level[i] = bit & tmp ? -1 : 1; + tmp >>= 1; + } + } + + /* other levels */ + if (totalCoeff > 10 && trailingOnes < 3) + suffixLength = 1; + else + suffixLength = 0; + + for (; i < totalCoeff; i++) + { + BUFFER_SHOW(bufferValue, bufferBits, bit, 16); + levelPrefix = DecodeLevelPrefix(bit); + if (levelPrefix == VLC_NOT_FOUND) + return(HANTRO_NOK); + BUFFER_FLUSH(bufferValue, bufferBits, levelPrefix+1); + + if (levelPrefix < 14) + tmp = suffixLength; + else if (levelPrefix == 14) + { + tmp = suffixLength ? suffixLength : 4; + } + else + { + /* setting suffixLength to 1 here corresponds to adding 15 + * to levelCode value if levelPrefix == 15 and + * suffixLength == 0 */ + if (!suffixLength) + suffixLength = 1; + tmp = 12; + } + + if (suffixLength) + levelPrefix <<= suffixLength; + + if (tmp) + { + BUFFER_GET(bufferValue, bufferBits, levelSuffix, tmp); + levelPrefix += levelSuffix; + } + + tmp = levelPrefix; + + if (i == trailingOnes && trailingOnes < 3) + tmp += 2; + + level[i] = (tmp+2)>>1; + + if (suffixLength == 0) + suffixLength = 1; + + if ((level[i] > (3 << (suffixLength - 1))) && suffixLength < 6) + suffixLength++; + + if (tmp & 0x1) + level[i] = -level[i]; + } + + /* zero runs */ + if (totalCoeff < maxNumCoeff) + { + BUFFER_SHOW(bufferValue, bufferBits, bit,9); + zerosLeft = DecodeTotalZeros(bit, totalCoeff, + (u32)(maxNumCoeff == 4)); + if (!zerosLeft) + return(HANTRO_NOK); + BUFFER_FLUSH(bufferValue, bufferBits, LENGTH(zerosLeft)); + zerosLeft = INFO(zerosLeft); + } + else + zerosLeft = 0; + + for (i = 0; i < totalCoeff - 1; i++) + { + if (zerosLeft > 0) + { + BUFFER_SHOW(bufferValue, bufferBits, bit,11); + tmp = DecodeRunBefore(bit, zerosLeft); + if (!tmp) + return(HANTRO_NOK); + BUFFER_FLUSH(bufferValue, bufferBits, LENGTH(tmp)); + run[i] = INFO(tmp); + zerosLeft -= run[i]++; + } + else + { + run[i] = 1; + } + } + + /* combining level and run, levelSuffix variable used to hold coeffMap, + * i.e. bit map indicating which coefficients had non-zero value. */ + + /*lint -esym(771,level,run) level and run are always initialized */ + tmp = zerosLeft; + coeffLevel[tmp] = level[totalCoeff-1]; + levelSuffix = 1 << tmp; + for (i = totalCoeff-1; i--;) + { + tmp += run[i]; + levelSuffix |= 1 << tmp; + coeffLevel[tmp] = level[i]; + } + + } + else + levelSuffix = 0; + + if (h264bsdFlushBits(pStrmData, 32-bufferBits) != HANTRO_OK) + return(HANTRO_NOK); + + return((totalCoeff << 4) | (levelSuffix << 16)); +} + diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_cavlc.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_cavlc.h new file mode 100755 index 0000000000000000000000000000000000000000..80353d392f7fc15620960d88c8073b8e8ccff3e7 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_cavlc.h @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. Module defines + 3. Data types + 4. Function prototypes + +------------------------------------------------------------------------------*/ + +#ifndef H264SWDEC_CAVLC_H +#define H264SWDEC_CAVLC_H + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#include "basetype.h" +#include "h264bsd_stream.h" + +/*------------------------------------------------------------------------------ + 2. Module defines +------------------------------------------------------------------------------*/ + + +/*------------------------------------------------------------------------------ + 3. Data types +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 4. Function prototypes +------------------------------------------------------------------------------*/ + +u32 h264bsdDecodeResidualBlockCavlc( + strmData_t *pStrmData, + i32 *coeffLevel, + i32 nc, + u32 maxNumCoeff); + +#endif /* #ifdef H264SWDEC_CAVLC_H */ + diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_cfg.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_cfg.h new file mode 100755 index 0000000000000000000000000000000000000000..2baba5add14a533874f2b1e2bc25fd7b6d409fd0 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_cfg.h @@ -0,0 +1,56 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. Module defines + 3. Data types + 4. Function prototypes + +------------------------------------------------------------------------------*/ + +#ifndef H264SWDEC_CFG_H +#define H264SWDEC_CFG_H + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#include "basetype.h" + +/*------------------------------------------------------------------------------ + 2. Module defines +------------------------------------------------------------------------------*/ + +#define MAX_NUM_REF_PICS 16 +#define MAX_NUM_SLICE_GROUPS 8 +#define MAX_NUM_SEQ_PARAM_SETS 32 +#define MAX_NUM_PIC_PARAM_SETS 256 + +/*------------------------------------------------------------------------------ + 3. Data types +------------------------------------------------------------------------------*/ + + +/*------------------------------------------------------------------------------ + 4. Function prototypes +------------------------------------------------------------------------------*/ + +#endif /* #ifdef H264SWDEC_CFG_H */ + diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_conceal.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_conceal.c new file mode 100755 index 0000000000000000000000000000000000000000..493fb9e472173b9ac37662c9de0ab43dc2bb60d9 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_conceal.c @@ -0,0 +1,626 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. External compiler flags + 3. Module defines + 4. Local function prototypes + 5. Functions + h264bsdConceal + ConcealMb + Transform + +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#include "h264bsd_conceal.h" +#include "h264bsd_util.h" +#include "h264bsd_reconstruct.h" +#include "h264bsd_dpb.h" + +/*------------------------------------------------------------------------------ + 2. External compiler flags +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- + 3. Module defines +------------------------------------------------------------------------------*/ + +/*lint -e702 disable lint warning on right shift of signed quantity */ + +/*------------------------------------------------------------------------------ + 4. Local function prototypes +------------------------------------------------------------------------------*/ + +static u32 ConcealMb(mbStorage_t *pMb, image_t *currImage, u32 row, u32 col, + u32 sliceType, u8 *data); + +static void Transform(i32 *data); + +/*------------------------------------------------------------------------------ + + Function name: h264bsdConceal + + Functional description: + Perform error concealment for a picture. Two types of concealment + is performed based on sliceType: + 1) copy from previous picture for P-slices. + 2) concealment from neighbour pixels for I-slices + + I-type concealment is based on ideas presented by Jarno Tulkki. + The concealment algorithm determines frequency domain coefficients + from the neighbour pixels, applies integer transform (the same + transform used in the residual processing) and uses the results as + pixel values for concealed macroblocks. Transform produces 4x4 + array and one pixel value has to be used for 4x4 luma blocks and + 2x2 chroma blocks. + + Similar concealment is performed for whole picture (the choise + of the type is based on last successfully decoded slice header of + the picture but it is handled by the calling function). It is + acknowledged that this may result in wrong type of concealment + when a picture contains both types of slices. However, + determination of slice type macroblock-by-macroblock cannot + be done due to the fact that it is impossible to know to which + slice each corrupted (not successfully decoded) macroblock + belongs. + + The error concealment is started by searching the first propoerly + decoded macroblock and concealing the row containing the macroblock + in question. After that all macroblocks above the row in question + are concealed. Finally concealment of rows below is performed. + The order of concealment for 4x4 picture where macroblock 9 is the + first properly decoded one is as follows (properly decoded + macroblocks marked with 'x', numbers indicating the order of + concealment): + + 4 6 8 10 + 3 5 7 9 + 1 x x 2 + 11 12 13 14 + + If all macroblocks of the picture are lost, the concealment is + copy of previous picture for P-type and setting the image to + constant gray (pixel value 128) for I-type. + + Concealment sets quantization parameter of the concealed + macroblocks to value 40 and macroblock type to intra to enable + deblocking filter to smooth the edges of the concealed areas. + + Inputs: + pStorage pointer to storage structure + currImage pointer to current image structure + sliceType type of the slice + + Outputs: + currImage concealed macroblocks will be written here + + Returns: + HANTRO_OK + +------------------------------------------------------------------------------*/ + +u32 h264bsdConceal(storage_t *pStorage, image_t *currImage, u32 sliceType) +{ + +/* Variables */ + + u32 i, j; + u32 row, col; + u32 width, height; + u8 *refData; + mbStorage_t *mb; + +/* Code */ + + ASSERT(pStorage); + ASSERT(currImage); + + DEBUG(("Concealing %s slice\n", IS_I_SLICE(sliceType) ? + "intra" : "inter")); + + width = currImage->width; + height = currImage->height; + refData = NULL; + /* use reference picture with smallest available index */ + if (IS_P_SLICE(sliceType) || (pStorage->intraConcealmentFlag != 0)) + { + i = 0; + do + { + refData = h264bsdGetRefPicData(pStorage->dpb, i); + i++; + if (i >= 16) + break; + } while (refData == NULL); + } + + i = row = col = 0; + /* find first properly decoded macroblock -> start point for concealment */ + while (i < pStorage->picSizeInMbs && !pStorage->mb[i].decoded) + { + i++; + col++; + if (col == width) + { + row++; + col = 0; + } + } + + /* whole picture lost -> copy previous or set grey */ + if (i == pStorage->picSizeInMbs) + { + if ( (IS_I_SLICE(sliceType) && (pStorage->intraConcealmentFlag == 0)) || + refData == NULL) + H264SwDecMemset(currImage->data, 128, width*height*384); + else + H264SwDecMemcpy(currImage->data, refData, width*height*384); + + pStorage->numConcealedMbs = pStorage->picSizeInMbs; + + /* no filtering if whole picture concealed */ + for (i = 0; i < pStorage->picSizeInMbs; i++) + pStorage->mb[i].disableDeblockingFilterIdc = 1; + + return(HANTRO_OK); + } + + /* start from the row containing the first correct macroblock, conceal the + * row in question, all rows above that row and then continue downwards */ + mb = pStorage->mb + row * width; + for (j = col; j--;) + { + ConcealMb(mb+j, currImage, row, j, sliceType, refData); + mb[j].decoded = 1; + pStorage->numConcealedMbs++; + } + for (j = col + 1; j < width; j++) + { + if (!mb[j].decoded) + { + ConcealMb(mb+j, currImage, row, j, sliceType, refData); + mb[j].decoded = 1; + pStorage->numConcealedMbs++; + } + } + /* if previous row(s) could not be concealed -> conceal them now */ + if (row) + { + for (j = 0; j < width; j++) + { + i = row - 1; + mb = pStorage->mb + i*width + j; + do + { + ConcealMb(mb, currImage, i, j, sliceType, refData); + mb->decoded = 1; + pStorage->numConcealedMbs++; + mb -= width; + } while(i--); + } + } + + /* process rows below the one containing the first correct macroblock */ + for (i = row + 1; i < height; i++) + { + mb = pStorage->mb + i * width; + + for (j = 0; j < width; j++) + { + if (!mb[j].decoded) + { + ConcealMb(mb+j, currImage, i, j, sliceType, refData); + mb[j].decoded = 1; + pStorage->numConcealedMbs++; + } + } + } + + return(HANTRO_OK); +} + +/*------------------------------------------------------------------------------ + + Function name: ConcealMb + + Functional description: + Perform error concealment for one macroblock, location of the + macroblock in the picture indicated by row and col + +------------------------------------------------------------------------------*/ + +u32 ConcealMb(mbStorage_t *pMb, image_t *currImage, u32 row, u32 col, + u32 sliceType, u8 *refData) +{ + +/* Variables */ + + u32 i, j, comp; + u32 hor, ver; + u32 mbNum; + u32 width, height; + u8 *mbPos; + u8 data[384]; + u8 *pData; + i32 tmp; + i32 firstPhase[16]; + i32 *pTmp; + /* neighbours above, below, left and right */ + i32 a[4], b[4], l[4], r[4]; + u32 A, B, L, R; +#ifdef H264DEC_OMXDL + u8 fillBuff[32*21 + 15 + 32]; + u8 *pFill; +#endif +/* Code */ + + ASSERT(pMb); + ASSERT(!pMb->decoded); + ASSERT(currImage); + ASSERT(col < currImage->width); + ASSERT(row < currImage->height); + +#ifdef H264DEC_OMXDL + pFill = ALIGN(fillBuff, 16); +#endif + width = currImage->width; + height = currImage->height; + mbNum = row * width + col; + + h264bsdSetCurrImageMbPointers(currImage, mbNum); + + mbPos = currImage->data + row * 16 * width * 16 + col * 16; + A = B = L = R = HANTRO_FALSE; + + /* set qpY to 40 to enable some filtering in deblocking (stetson value) */ + pMb->qpY = 40; + pMb->disableDeblockingFilterIdc = 0; + /* mbType set to intra to perform filtering despite the values of other + * boundary strength determination fields */ + pMb->mbType = I_4x4; + pMb->filterOffsetA = 0; + pMb->filterOffsetB = 0; + pMb->chromaQpIndexOffset = 0; + + if (IS_I_SLICE(sliceType)) + H264SwDecMemset(data, 0, sizeof(data)); + else + { + mv_t mv = {0,0}; + image_t refImage; + refImage.width = width; + refImage.height = height; + refImage.data = refData; + if (refImage.data) + { +#ifndef H264DEC_OMXDL + h264bsdPredictSamples(data, &mv, &refImage, col*16, row*16, + 0, 0, 16, 16); +#else + h264bsdPredictSamples(data, &mv, &refImage, + ((row*16) + ((col*16)<<16)), + 0x00001010, pFill); +#endif + h264bsdWriteMacroblock(currImage, data); + + return(HANTRO_OK); + } + else + H264SwDecMemset(data, 0, sizeof(data)); + } + + H264SwDecMemset(firstPhase, 0, sizeof(firstPhase)); + + /* counter for number of neighbours used */ + j = 0; + hor = ver = 0; + if (row && (pMb-width)->decoded) + { + A = HANTRO_TRUE; + pData = mbPos - width*16; + a[0] = *pData++; a[0] += *pData++; a[0] += *pData++; a[0] += *pData++; + a[1] = *pData++; a[1] += *pData++; a[1] += *pData++; a[1] += *pData++; + a[2] = *pData++; a[2] += *pData++; a[2] += *pData++; a[2] += *pData++; + a[3] = *pData++; a[3] += *pData++; a[3] += *pData++; a[3] += *pData++; + j++; + hor++; + firstPhase[0] += a[0] + a[1] + a[2] + a[3]; + firstPhase[1] += a[0] + a[1] - a[2] - a[3]; + } + if ((row != height - 1) && (pMb+width)->decoded) + { + B = HANTRO_TRUE; + pData = mbPos + 16*width*16; + b[0] = *pData++; b[0] += *pData++; b[0] += *pData++; b[0] += *pData++; + b[1] = *pData++; b[1] += *pData++; b[1] += *pData++; b[1] += *pData++; + b[2] = *pData++; b[2] += *pData++; b[2] += *pData++; b[2] += *pData++; + b[3] = *pData++; b[3] += *pData++; b[3] += *pData++; b[3] += *pData++; + j++; + hor++; + firstPhase[0] += b[0] + b[1] + b[2] + b[3]; + firstPhase[1] += b[0] + b[1] - b[2] - b[3]; + } + if (col && (pMb-1)->decoded) + { + L = HANTRO_TRUE; + pData = mbPos - 1; + l[0] = pData[0]; l[0] += pData[16*width]; + l[0] += pData[32*width]; l[0] += pData[48*width]; + pData += 64*width; + l[1] = pData[0]; l[1] += pData[16*width]; + l[1] += pData[32*width]; l[1] += pData[48*width]; + pData += 64*width; + l[2] = pData[0]; l[2] += pData[16*width]; + l[2] += pData[32*width]; l[2] += pData[48*width]; + pData += 64*width; + l[3] = pData[0]; l[3] += pData[16*width]; + l[3] += pData[32*width]; l[3] += pData[48*width]; + j++; + ver++; + firstPhase[0] += l[0] + l[1] + l[2] + l[3]; + firstPhase[4] += l[0] + l[1] - l[2] - l[3]; + } + if ((col != width - 1) && (pMb+1)->decoded) + { + R = HANTRO_TRUE; + pData = mbPos + 16; + r[0] = pData[0]; r[0] += pData[16*width]; + r[0] += pData[32*width]; r[0] += pData[48*width]; + pData += 64*width; + r[1] = pData[0]; r[1] += pData[16*width]; + r[1] += pData[32*width]; r[1] += pData[48*width]; + pData += 64*width; + r[2] = pData[0]; r[2] += pData[16*width]; + r[2] += pData[32*width]; r[2] += pData[48*width]; + pData += 64*width; + r[3] = pData[0]; r[3] += pData[16*width]; + r[3] += pData[32*width]; r[3] += pData[48*width]; + j++; + ver++; + firstPhase[0] += r[0] + r[1] + r[2] + r[3]; + firstPhase[4] += r[0] + r[1] - r[2] - r[3]; + } + + /* at least one properly decoded neighbour available */ + ASSERT(j); + + /*lint -esym(644,l,r,a,b) variable initialized above */ + if (!hor && L && R) + firstPhase[1] = (l[0]+l[1]+l[2]+l[3]-r[0]-r[1]-r[2]-r[3]) >> 5; + else if (hor) + firstPhase[1] >>= (3+hor); + + if (!ver && A && B) + firstPhase[4] = (a[0]+a[1]+a[2]+a[3]-b[0]-b[1]-b[2]-b[3]) >> 5; + else if (ver) + firstPhase[4] >>= (3+ver); + + switch (j) + { + case 1: + firstPhase[0] >>= 4; + break; + + case 2: + firstPhase[0] >>= 5; + break; + + case 3: + /* approximate (firstPhase[0]*4/3)>>6 */ + firstPhase[0] = (21 * firstPhase[0]) >> 10; + break; + + default: /* 4 */ + firstPhase[0] >>= 6; + break; + + } + + + Transform(firstPhase); + + for (i = 0, pData = data, pTmp = firstPhase; i < 256;) + { + tmp = pTmp[(i & 0xF)>>2]; + /*lint -e734 CLIP1 macro results in value that fits into 8 bits */ + *pData++ = CLIP1(tmp); + /*lint +e734 */ + + i++; + if (!(i & 0x3F)) + pTmp += 4; + } + + /* chroma components */ + mbPos = currImage->data + width * height * 256 + + row * 8 * width * 8 + col * 8; + for (comp = 0; comp < 2; comp++) + { + + H264SwDecMemset(firstPhase, 0, sizeof(firstPhase)); + + /* counter for number of neighbours used */ + j = 0; + hor = ver = 0; + if (A) + { + pData = mbPos - width*8; + a[0] = *pData++; a[0] += *pData++; + a[1] = *pData++; a[1] += *pData++; + a[2] = *pData++; a[2] += *pData++; + a[3] = *pData++; a[3] += *pData++; + j++; + hor++; + firstPhase[0] += a[0] + a[1] + a[2] + a[3]; + firstPhase[1] += a[0] + a[1] - a[2] - a[3]; + } + if (B) + { + pData = mbPos + 8*width*8; + b[0] = *pData++; b[0] += *pData++; + b[1] = *pData++; b[1] += *pData++; + b[2] = *pData++; b[2] += *pData++; + b[3] = *pData++; b[3] += *pData++; + j++; + hor++; + firstPhase[0] += b[0] + b[1] + b[2] + b[3]; + firstPhase[1] += b[0] + b[1] - b[2] - b[3]; + } + if (L) + { + pData = mbPos - 1; + l[0] = pData[0]; l[0] += pData[8*width]; + pData += 16*width; + l[1] = pData[0]; l[1] += pData[8*width]; + pData += 16*width; + l[2] = pData[0]; l[2] += pData[8*width]; + pData += 16*width; + l[3] = pData[0]; l[3] += pData[8*width]; + j++; + ver++; + firstPhase[0] += l[0] + l[1] + l[2] + l[3]; + firstPhase[4] += l[0] + l[1] - l[2] - l[3]; + } + if (R) + { + pData = mbPos + 8; + r[0] = pData[0]; r[0] += pData[8*width]; + pData += 16*width; + r[1] = pData[0]; r[1] += pData[8*width]; + pData += 16*width; + r[2] = pData[0]; r[2] += pData[8*width]; + pData += 16*width; + r[3] = pData[0]; r[3] += pData[8*width]; + j++; + ver++; + firstPhase[0] += r[0] + r[1] + r[2] + r[3]; + firstPhase[4] += r[0] + r[1] - r[2] - r[3]; + } + if (!hor && L && R) + firstPhase[1] = (l[0]+l[1]+l[2]+l[3]-r[0]-r[1]-r[2]-r[3]) >> 4; + else if (hor) + firstPhase[1] >>= (2+hor); + + if (!ver && A && B) + firstPhase[4] = (a[0]+a[1]+a[2]+a[3]-b[0]-b[1]-b[2]-b[3]) >> 4; + else if (ver) + firstPhase[4] >>= (2+ver); + + switch (j) + { + case 1: + firstPhase[0] >>= 3; + break; + + case 2: + firstPhase[0] >>= 4; + break; + + case 3: + /* approximate (firstPhase[0]*4/3)>>5 */ + firstPhase[0] = (21 * firstPhase[0]) >> 9; + break; + + default: /* 4 */ + firstPhase[0] >>= 5; + break; + + } + + Transform(firstPhase); + + pData = data + 256 + comp*64; + for (i = 0, pTmp = firstPhase; i < 64;) + { + tmp = pTmp[(i & 0x7)>>1]; + /*lint -e734 CLIP1 macro results in value that fits into 8 bits */ + *pData++ = CLIP1(tmp); + /*lint +e734 */ + + i++; + if (!(i & 0xF)) + pTmp += 4; + } + + /* increment pointers for cr */ + mbPos += width * height * 64; + } + + h264bsdWriteMacroblock(currImage, data); + + return(HANTRO_OK); + +} + + +/*------------------------------------------------------------------------------ + + Function name: Transform + + Functional description: + Simplified transform, assuming that only dc component and lowest + horizontal and lowest vertical component may be non-zero + +------------------------------------------------------------------------------*/ + +void Transform(i32 *data) +{ + + u32 col; + i32 tmp0, tmp1; + + if (!data[1] && !data[4]) + { + data[1] = data[2] = data[3] = data[4] = data[5] = + data[6] = data[7] = data[8] = data[9] = data[10] = + data[11] = data[12] = data[13] = data[14] = data[15] = data[0]; + return; + } + /* first horizontal transform for rows 0 and 1 */ + tmp0 = data[0]; + tmp1 = data[1]; + data[0] = tmp0 + tmp1; + data[1] = tmp0 + (tmp1>>1); + data[2] = tmp0 - (tmp1>>1); + data[3] = tmp0 - tmp1; + + tmp0 = data[4]; + data[5] = tmp0; + data[6] = tmp0; + data[7] = tmp0; + + /* then vertical transform */ + for (col = 4; col--; data++) + { + tmp0 = data[0]; + tmp1 = data[4]; + data[0] = tmp0 + tmp1; + data[4] = tmp0 + (tmp1>>1); + data[8] = tmp0 - (tmp1>>1); + data[12] = tmp0 - tmp1; + } + +} +/*lint +e702 */ + diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_conceal.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_conceal.h new file mode 100755 index 0000000000000000000000000000000000000000..3134670c8c628936eb07c263915765b5001bfdc1 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_conceal.h @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. Module defines + 3. Data types + 4. Function prototypes + +------------------------------------------------------------------------------*/ + +#ifndef H264SWDEC_CONCEAL_H +#define H264SWDEC_CONCEAL_H + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#include "basetype.h" +#include "h264bsd_slice_header.h" +#include "h264bsd_storage.h" + +/*------------------------------------------------------------------------------ + 2. Module defines +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 3. Data types +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 4. Function prototypes +------------------------------------------------------------------------------*/ + +u32 h264bsdConceal(storage_t *pStorage, image_t *currImage, u32 sliceType); + +#endif /* #ifdef H264SWDEC_CONCEAL_H */ + diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_container.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_container.h new file mode 100755 index 0000000000000000000000000000000000000000..99b74a0a7d0a4bb94441d81ef9b544733f3a4951 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_container.h @@ -0,0 +1,69 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. Module defines + 3. Data types + 4. Function prototypes + +------------------------------------------------------------------------------*/ + +#ifndef H264SWDEC_CONTAINER_H +#define H264SWDEC_CONTAINER_H + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#include "basetype.h" +#include "h264bsd_storage.h" + +/*------------------------------------------------------------------------------ + 2. Module defines +------------------------------------------------------------------------------*/ + +/* String length for tracing */ +#define H264DEC_TRACE_STR_LEN 100 + +/*------------------------------------------------------------------------------ + 3. Data types +------------------------------------------------------------------------------*/ + +typedef struct +{ + enum { + UNINITIALIZED, + INITIALIZED, + NEW_HEADERS + } decStat; + + u32 picNumber; + storage_t storage; +#ifdef H264DEC_TRACE + char str[H264DEC_TRACE_STR_LEN]; +#endif +} decContainer_t; + +/*------------------------------------------------------------------------------ + 4. Function prototypes +------------------------------------------------------------------------------*/ + +#endif /* #ifdef H264SWDEC_DECCONTAINER_H */ + diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_deblocking.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_deblocking.c new file mode 100755 index 0000000000000000000000000000000000000000..f8c1f76da98ed6ce5f2e04752fec939fe039e013 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_deblocking.c @@ -0,0 +1,2417 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. External compiler flags + 3. Module defines + 4. Local function prototypes + 5. Functions + h264bsdFilterPicture + FilterVerLumaEdge + FilterHorLumaEdge + FilterHorLuma + FilterVerChromaEdge + FilterHorChromaEdge + FilterHorChroma + InnerBoundaryStrength + EdgeBoundaryStrength + GetBoundaryStrengths + IsSliceBoundaryOnLeft + IsSliceBoundaryOnTop + GetMbFilteringFlags + GetLumaEdgeThresholds + GetChromaEdgeThresholds + FilterLuma + FilterChroma + +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#include "basetype.h" +#include "h264bsd_util.h" +#include "h264bsd_macroblock_layer.h" +#include "h264bsd_deblocking.h" +#include "h264bsd_dpb.h" + +#ifdef H264DEC_OMXDL +#include "omxtypes.h" +#include "omxVC.h" +#include "armVC.h" +#endif /* H264DEC_OMXDL */ + +/*------------------------------------------------------------------------------ + 2. External compiler flags +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- + 3. Module defines +------------------------------------------------------------------------------*/ + +/* Switch off the following Lint messages for this file: + * Info 701: Shift left of signed quantity (int) + * Info 702: Shift right of signed quantity (int) + */ +/*lint -e701 -e702 */ + +/* array of alpha values, from the standard */ +static const u8 alphas[52] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,4,5,6,7,8,9,10, + 12,13,15,17,20,22,25,28,32,36,40,45,50,56,63,71,80,90,101,113,127,144,162, + 182,203,226,255,255}; + +/* array of beta values, from the standard */ +static const u8 betas[52] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,3,3,3,3,4,4, + 4,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,16,16,17,17,18,18}; + + + +#ifndef H264DEC_OMXDL +/* array of tc0 values, from the standard, each triplet corresponds to a + * column in the table. Indexing goes as tc0[indexA][bS-1] */ +static const u8 tc0[52][3] = { + {0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, + {0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, + {0,0,0},{0,0,1},{0,0,1},{0,0,1},{0,0,1},{0,1,1},{0,1,1},{1,1,1}, + {1,1,1},{1,1,1},{1,1,1},{1,1,2},{1,1,2},{1,1,2},{1,1,2},{1,2,3}, + {1,2,3},{2,2,3},{2,2,4},{2,3,4},{2,3,4},{3,3,5},{3,4,6},{3,4,6}, + {4,5,7},{4,5,8},{4,6,9},{5,7,10},{6,8,11},{6,8,13},{7,10,14},{8,11,16}, + {9,12,18},{10,13,20},{11,15,23},{13,17,25} +}; +#else +/* array of tc0 values, from the standard, each triplet corresponds to a + * column in the table. Indexing goes as tc0[indexA][bS] */ +static const u8 tc0[52][5] = { + {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0}, {0, 0, 0, 1, 0}, {0, 0, 0, 1, 0}, {0, 0, 0, 1, 0}, + {0, 0, 0, 1, 0}, {0, 0, 1, 1, 0}, {0, 0, 1, 1, 0}, {0, 1, 1, 1, 0}, + {0, 1, 1, 1, 0}, {0, 1, 1, 1, 0}, {0, 1, 1, 1, 0}, {0, 1, 1, 2, 0}, + {0, 1, 1, 2, 0}, {0, 1, 1, 2, 0}, {0, 1, 1, 2, 0}, {0, 1, 2, 3, 0}, + {0, 1, 2, 3, 0}, {0, 2, 2, 3, 0}, {0, 2, 2, 4, 0}, {0, 2, 3, 4, 0}, + {0, 2, 3, 4, 0}, {0, 3, 3, 5, 0}, {0, 3, 4, 6, 0}, {0, 3, 4, 6, 0}, + {0, 4, 5, 7, 0}, {0, 4, 5, 8, 0}, {0, 4, 6, 9, 0}, {0, 5, 7, 10, 0}, + {0, 6, 8, 11, 0}, {0, 6, 8, 13, 0}, {0, 7, 10, 14, 0}, + {0, 8, 11, 16, 0}, {0, 9, 12, 18, 0}, {0, 10, 13, 20, 0}, + {0, 11, 15, 23, 0}, {0, 13, 17, 25, 0} +}; +#endif + + +#ifndef H264DEC_OMXDL +/* mapping of raster scan block index to 4x4 block index */ +static const u32 mb4x4Index[16] = + {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15}; + +typedef struct { + const u8 *tc0; + u32 alpha; + u32 beta; +} edgeThreshold_t; + +typedef struct { + u32 top; + u32 left; +} bS_t; + +enum { TOP = 0, LEFT = 1, INNER = 2 }; +#endif /* H264DEC_OMXDL */ + +#define FILTER_LEFT_EDGE 0x04 +#define FILTER_TOP_EDGE 0x02 +#define FILTER_INNER_EDGE 0x01 + + +/* clipping table defined in intra_prediction.c */ +extern const u8 h264bsdClip[]; + +/*------------------------------------------------------------------------------ + 4. Local function prototypes +------------------------------------------------------------------------------*/ + +static u32 InnerBoundaryStrength(mbStorage_t *mb1, u32 i1, u32 i2); + +#ifndef H264DEC_OMXDL +static u32 EdgeBoundaryStrength(mbStorage_t *mb1, mbStorage_t *mb2, + u32 i1, u32 i2); +#else +static u32 InnerBoundaryStrength2(mbStorage_t *mb1, u32 i1, u32 i2); +static u32 EdgeBoundaryStrengthLeft(mbStorage_t *mb1, mbStorage_t *mb2); +static u32 EdgeBoundaryStrengthTop(mbStorage_t *mb1, mbStorage_t *mb2); +#endif + +static u32 IsSliceBoundaryOnLeft(mbStorage_t *mb); + +static u32 IsSliceBoundaryOnTop(mbStorage_t *mb); + +static u32 GetMbFilteringFlags(mbStorage_t *mb); + +#ifndef H264DEC_OMXDL + +static u32 GetBoundaryStrengths(mbStorage_t *mb, bS_t *bs, u32 flags); + +static void FilterLuma(u8 *data, bS_t *bS, edgeThreshold_t *thresholds, + u32 imageWidth); + +static void FilterChroma(u8 *cb, u8 *cr, bS_t *bS, edgeThreshold_t *thresholds, + u32 imageWidth); + +static void FilterVerLumaEdge( u8 *data, u32 bS, edgeThreshold_t *thresholds, + u32 imageWidth); +static void FilterHorLumaEdge( u8 *data, u32 bS, edgeThreshold_t *thresholds, + i32 imageWidth); +static void FilterHorLuma( u8 *data, u32 bS, edgeThreshold_t *thresholds, + i32 imageWidth); + +static void FilterVerChromaEdge( u8 *data, u32 bS, edgeThreshold_t *thresholds, + u32 imageWidth); +static void FilterHorChromaEdge( u8 *data, u32 bS, edgeThreshold_t *thresholds, + i32 imageWidth); +static void FilterHorChroma( u8 *data, u32 bS, edgeThreshold_t *thresholds, + i32 imageWidth); + +static void GetLumaEdgeThresholds( + edgeThreshold_t *thresholds, + mbStorage_t *mb, + u32 filteringFlags); + +static void GetChromaEdgeThresholds( + edgeThreshold_t *thresholds, + mbStorage_t *mb, + u32 filteringFlags, + i32 chromaQpIndexOffset); + +#else /* H264DEC_OMXDL */ + +static u32 GetBoundaryStrengths(mbStorage_t *mb, u8 (*bs)[16], u32 flags); + +static void GetLumaEdgeThresholds( + mbStorage_t *mb, + u8 (*alpha)[2], + u8 (*beta)[2], + u8 (*threshold)[16], + u8 (*bs)[16], + u32 filteringFlags ); + +static void GetChromaEdgeThresholds( + mbStorage_t *mb, + u8 (*alpha)[2], + u8 (*beta)[2], + u8 (*threshold)[8], + u8 (*bs)[16], + u32 filteringFlags, + i32 chromaQpIndexOffset); + +#endif /* H264DEC_OMXDL */ + +/*------------------------------------------------------------------------------ + + Function: IsSliceBoundaryOnLeft + + Functional description: + Function to determine if there is a slice boundary on the left side + of a macroblock. + +------------------------------------------------------------------------------*/ +u32 IsSliceBoundaryOnLeft(mbStorage_t *mb) +{ + +/* Variables */ + +/* Code */ + + ASSERT(mb && mb->mbA); + + if (mb->sliceId != mb->mbA->sliceId) + return(HANTRO_TRUE); + else + return(HANTRO_FALSE); + +} + +/*------------------------------------------------------------------------------ + + Function: IsSliceBoundaryOnTop + + Functional description: + Function to determine if there is a slice boundary above the + current macroblock. + +------------------------------------------------------------------------------*/ +u32 IsSliceBoundaryOnTop(mbStorage_t *mb) +{ + +/* Variables */ + +/* Code */ + + ASSERT(mb && mb->mbB); + + if (mb->sliceId != mb->mbB->sliceId) + return(HANTRO_TRUE); + else + return(HANTRO_FALSE); + +} + +/*------------------------------------------------------------------------------ + + Function: GetMbFilteringFlags + + Functional description: + Function to determine which edges of a macroblock has to be + filtered. Output is a bit-wise OR of FILTER_LEFT_EDGE, + FILTER_TOP_EDGE and FILTER_INNER_EDGE, depending on which edges + shall be filtered. + +------------------------------------------------------------------------------*/ +u32 GetMbFilteringFlags(mbStorage_t *mb) +{ + +/* Variables */ + + u32 flags = 0; + +/* Code */ + + ASSERT(mb); + + /* nothing will be filtered if disableDeblockingFilterIdc == 1 */ + if (mb->disableDeblockingFilterIdc != 1) + { + flags |= FILTER_INNER_EDGE; + + /* filterLeftMbEdgeFlag, left mb is MB_A */ + if (mb->mbA && + ((mb->disableDeblockingFilterIdc != 2) || + !IsSliceBoundaryOnLeft(mb))) + flags |= FILTER_LEFT_EDGE; + + /* filterTopMbEdgeFlag */ + if (mb->mbB && + ((mb->disableDeblockingFilterIdc != 2) || + !IsSliceBoundaryOnTop(mb))) + flags |= FILTER_TOP_EDGE; + } + + return(flags); + +} + +/*------------------------------------------------------------------------------ + + Function: InnerBoundaryStrength + + Functional description: + Function to calculate boundary strength value bs for an inner + edge of a macroblock. Macroblock type is checked before this is + called -> no intra mb condition here. + +------------------------------------------------------------------------------*/ +u32 InnerBoundaryStrength(mbStorage_t *mb1, u32 ind1, u32 ind2) +{ + i32 tmp1, tmp2; + i32 mv1, mv2, mv3, mv4; + + tmp1 = mb1->totalCoeff[ind1]; + tmp2 = mb1->totalCoeff[ind2]; + mv1 = mb1->mv[ind1].hor; + mv2 = mb1->mv[ind2].hor; + mv3 = mb1->mv[ind1].ver; + mv4 = mb1->mv[ind2].ver; + + if (tmp1 || tmp2) + { + return 2; + } + else if ( (ABS(mv1 - mv2) >= 4) || (ABS(mv3 - mv4) >= 4) || + (mb1->refAddr[ind1 >> 2] != mb1->refAddr[ind2 >> 2]) ) + { + return 1; + } + else + return 0; +} + +/*------------------------------------------------------------------------------ + + Function: InnerBoundaryStrength2 + + Functional description: + Function to calculate boundary strength value bs for an inner + edge of a macroblock. The function is the same as + InnerBoundaryStrength but without checking totalCoeff. + +------------------------------------------------------------------------------*/ +u32 InnerBoundaryStrength2(mbStorage_t *mb1, u32 ind1, u32 ind2) +{ + i32 tmp1, tmp2, tmp3, tmp4; + + tmp1 = mb1->mv[ind1].hor; + tmp2 = mb1->mv[ind2].hor; + tmp3 = mb1->mv[ind1].ver; + tmp4 = mb1->mv[ind2].ver; + + if ( (ABS(tmp1 - tmp2) >= 4) || (ABS(tmp3 - tmp4) >= 4) || + (mb1->refAddr[ind1 >> 2] != mb1->refAddr[ind2 >> 2])) + { + return 1; + } + else + return 0; +} +#ifndef H264DEC_OMXDL +/*------------------------------------------------------------------------------ + + Function: EdgeBoundaryStrength + + Functional description: + Function to calculate boundary strength value bs for left- or + top-most edge of a macroblock. Macroblock types are checked + before this is called -> no intra mb conditions here. + +------------------------------------------------------------------------------*/ +u32 EdgeBoundaryStrength(mbStorage_t *mb1, mbStorage_t *mb2, + u32 ind1, u32 ind2) +{ + + if (mb1->totalCoeff[ind1] || mb2->totalCoeff[ind2]) + { + return 2; + } + else if ((mb1->refAddr[ind1 >> 2] != mb2->refAddr[ind2 >> 2]) || + (ABS(mb1->mv[ind1].hor - mb2->mv[ind2].hor) >= 4) || + (ABS(mb1->mv[ind1].ver - mb2->mv[ind2].ver) >= 4)) + { + return 1; + } + else + return 0; +} + +#else /* H264DEC_OMXDL */ + +/*------------------------------------------------------------------------------ + + Function: EdgeBoundaryStrengthTop + + Functional description: + Function to calculate boundary strength value bs for + top-most edge of a macroblock. Macroblock types are checked + before this is called -> no intra mb conditions here. + +------------------------------------------------------------------------------*/ +u32 EdgeBoundaryStrengthTop(mbStorage_t *mb1, mbStorage_t *mb2) +{ + u32 topBs = 0; + u32 tmp1, tmp2, tmp3, tmp4; + + tmp1 = mb1->totalCoeff[0]; + tmp2 = mb2->totalCoeff[10]; + tmp3 = mb1->totalCoeff[1]; + tmp4 = mb2->totalCoeff[11]; + if (tmp1 || tmp2) + { + topBs = 2<<0; + } + else if ((ABS(mb1->mv[0].hor - mb2->mv[10].hor) >= 4) || + (ABS(mb1->mv[0].ver - mb2->mv[10].ver) >= 4) || + (mb1->refAddr[0] != mb2->refAddr[10 >> 2])) + { + topBs = 1<<0; + } + tmp1 = mb1->totalCoeff[4]; + tmp2 = mb2->totalCoeff[14]; + if (tmp3 || tmp4) + { + topBs += 2<<8; + } + else if ((ABS(mb1->mv[1].hor - mb2->mv[11].hor) >= 4) || + (ABS(mb1->mv[1].ver - mb2->mv[11].ver) >= 4) || + (mb1->refAddr[0] != mb2->refAddr[11 >> 2])) + { + topBs += 1<<8; + } + tmp3 = mb1->totalCoeff[5]; + tmp4 = mb2->totalCoeff[15]; + if (tmp1 || tmp2) + { + topBs += 2<<16; + } + else if ((ABS(mb1->mv[4].hor - mb2->mv[14].hor) >= 4) || + (ABS(mb1->mv[4].ver - mb2->mv[14].ver) >= 4) || + (mb1->refAddr[4 >> 2] != mb2->refAddr[14 >> 2])) + { + topBs += 1<<16; + } + if (tmp3 || tmp4) + { + topBs += 2<<24; + } + else if ((ABS(mb1->mv[5].hor - mb2->mv[15].hor) >= 4) || + (ABS(mb1->mv[5].ver - mb2->mv[15].ver) >= 4) || + (mb1->refAddr[5 >> 2] != mb2->refAddr[15 >> 2])) + { + topBs += 1<<24; + } + + return topBs; +} + +/*------------------------------------------------------------------------------ + + Function: EdgeBoundaryStrengthLeft + + Functional description: + Function to calculate boundary strength value bs for left- + edge of a macroblock. Macroblock types are checked + before this is called -> no intra mb conditions here. + +------------------------------------------------------------------------------*/ +u32 EdgeBoundaryStrengthLeft(mbStorage_t *mb1, mbStorage_t *mb2) +{ + u32 leftBs = 0; + u32 tmp1, tmp2, tmp3, tmp4; + + tmp1 = mb1->totalCoeff[0]; + tmp2 = mb2->totalCoeff[5]; + tmp3 = mb1->totalCoeff[2]; + tmp4 = mb2->totalCoeff[7]; + + if (tmp1 || tmp2) + { + leftBs = 2<<0; + } + else if ((ABS(mb1->mv[0].hor - mb2->mv[5].hor) >= 4) || + (ABS(mb1->mv[0].ver - mb2->mv[5].ver) >= 4) || + (mb1->refAddr[0] != mb2->refAddr[5 >> 2])) + { + leftBs = 1<<0; + } + tmp1 = mb1->totalCoeff[8]; + tmp2 = mb2->totalCoeff[13]; + if (tmp3 || tmp4) + { + leftBs += 2<<8; + } + else if ((ABS(mb1->mv[2].hor - mb2->mv[7].hor) >= 4) || + (ABS(mb1->mv[2].ver - mb2->mv[7].ver) >= 4) || + (mb1->refAddr[0] != mb2->refAddr[7 >> 2])) + { + leftBs += 1<<8; + } + tmp3 = mb1->totalCoeff[10]; + tmp4 = mb2->totalCoeff[15]; + if (tmp1 || tmp2) + { + leftBs += 2<<16; + } + else if ((ABS(mb1->mv[8].hor - mb2->mv[13].hor) >= 4) || + (ABS(mb1->mv[8].ver - mb2->mv[13].ver) >= 4) || + (mb1->refAddr[8 >> 2] != mb2->refAddr[13 >> 2])) + { + leftBs += 1<<16; + } + if (tmp3 || tmp4) + { + leftBs += 2<<24; + } + else if ((ABS(mb1->mv[10].hor - mb2->mv[15].hor) >= 4) || + (ABS(mb1->mv[10].ver - mb2->mv[15].ver) >= 4) || + (mb1->refAddr[10 >> 2] != mb2->refAddr[15 >> 2])) + { + leftBs += 1<<24; + } + + return leftBs; +} +#endif /* H264DEC_OMXDL */ +/*------------------------------------------------------------------------------ + + Function: h264bsdFilterPicture + + Functional description: + Perform deblocking filtering for a picture. Filter does not copy + the original picture anywhere but filtering is performed directly + on the original image. Parameters controlling the filtering process + are computed based on information in macroblock structures of the + filtered macroblock, macroblock above and macroblock on the left of + the filtered one. + + Inputs: + image pointer to image to be filtered + mb pointer to macroblock data structure of the top-left + macroblock of the picture + + Outputs: + image filtered image stored here + + Returns: + none + +------------------------------------------------------------------------------*/ +#ifndef H264DEC_OMXDL +void h264bsdFilterPicture( + image_t *image, + mbStorage_t *mb) +{ + +/* Variables */ + + u32 flags; + u32 picSizeInMbs, mbRow, mbCol; + u32 picWidthInMbs; + u8 *data; + mbStorage_t *pMb; + bS_t bS[16]; + edgeThreshold_t thresholds[3]; + +/* Code */ + + ASSERT(image); + ASSERT(mb); + ASSERT(image->data); + ASSERT(image->width); + ASSERT(image->height); + + picWidthInMbs = image->width; + data = image->data; + picSizeInMbs = picWidthInMbs * image->height; + + pMb = mb; + + for (mbRow = 0, mbCol = 0; mbRow < image->height; pMb++) + { + flags = GetMbFilteringFlags(pMb); + + if (flags) + { + /* GetBoundaryStrengths function returns non-zero value if any of + * the bS values for the macroblock being processed was non-zero */ + if (GetBoundaryStrengths(pMb, bS, flags)) + { + /* luma */ + GetLumaEdgeThresholds(thresholds, pMb, flags); + data = image->data + mbRow * picWidthInMbs * 256 + mbCol * 16; + + FilterLuma((u8*)data, bS, thresholds, picWidthInMbs*16); + + /* chroma */ + GetChromaEdgeThresholds(thresholds, pMb, flags, + pMb->chromaQpIndexOffset); + data = image->data + picSizeInMbs * 256 + + mbRow * picWidthInMbs * 64 + mbCol * 8; + + FilterChroma((u8*)data, data + 64*picSizeInMbs, bS, + thresholds, picWidthInMbs*8); + + } + } + + mbCol++; + if (mbCol == picWidthInMbs) + { + mbCol = 0; + mbRow++; + } + } + +} + +/*------------------------------------------------------------------------------ + + Function: FilterVerLumaEdge + + Functional description: + Filter one vertical 4-pixel luma edge. + +------------------------------------------------------------------------------*/ +void FilterVerLumaEdge( + u8 *data, + u32 bS, + edgeThreshold_t *thresholds, + u32 imageWidth) +{ + +/* Variables */ + + i32 delta, tc, tmp; + u32 i; + u8 p0, q0, p1, q1, p2, q2; + u32 tmpFlag; + const u8 *clp = h264bsdClip + 512; + +/* Code */ + + ASSERT(data); + ASSERT(bS && bS <= 4); + ASSERT(thresholds); + + if (bS < 4) + { + tc = thresholds->tc0[bS-1]; + tmp = tc; + for (i = 4; i; i--, data += imageWidth) + { + p1 = data[-2]; p0 = data[-1]; + q0 = data[0]; q1 = data[1]; + if ( ((unsigned)ABS(p0-q0) < thresholds->alpha) && + ((unsigned)ABS(p1-p0) < thresholds->beta) && + ((unsigned)ABS(q1-q0) < thresholds->beta) ) + { + p2 = data[-3]; + q2 = data[2]; + + if ((unsigned)ABS(p2-p0) < thresholds->beta) + { + data[-2] = (u8)(p1 + CLIP3(-tc,tc, + (p2 + ((p0 + q0 + 1) >> 1) - (p1 << 1)) >> 1)); + tmp++; + } + + if ((unsigned)ABS(q2-q0) < thresholds->beta) + { + data[1] = (u8)(q1 + CLIP3(-tc,tc, + (q2 + ((p0 + q0 + 1) >> 1) - (q1 << 1)) >> 1)); + tmp++; + } + + delta = CLIP3(-tmp, tmp, ((((q0 - p0) << 2) + + (p1 - q1) + 4) >> 3)); + + p0 = clp[p0 + delta]; + q0 = clp[q0 - delta]; + tmp = tc; + data[-1] = p0; + data[ 0] = q0; + } + } + } + else + { + for (i = 4; i; i--, data += imageWidth) + { + p1 = data[-2]; p0 = data[-1]; + q0 = data[0]; q1 = data[1]; + if ( ((unsigned)ABS(p0-q0) < thresholds->alpha) && + ((unsigned)ABS(p1-p0) < thresholds->beta) && + ((unsigned)ABS(q1-q0) < thresholds->beta) ) + { + tmpFlag = + ((unsigned)ABS(p0-q0) < ((thresholds->alpha >> 2) +2)) ? + HANTRO_TRUE : HANTRO_FALSE; + + p2 = data[-3]; + q2 = data[2]; + + if (tmpFlag && (unsigned)ABS(p2-p0) < thresholds->beta) + { + tmp = p1 + p0 + q0; + data[-1] = (u8)((p2 + 2 * tmp + q1 + 4) >> 3); + data[-2] = (u8)((p2 + tmp + 2) >> 2); + data[-3] = (u8)((2 * data[-4] + 3 * p2 + tmp + 4) >> 3); + } + else + data[-1] = (2 * p1 + p0 + q1 + 2) >> 2; + + if (tmpFlag && (unsigned)ABS(q2-q0) < thresholds->beta) + { + tmp = p0 + q0 + q1; + data[0] = (u8)((p1 + 2 * tmp + q2 + 4) >> 3); + data[1] = (u8)((tmp + q2 + 2) >> 2); + data[2] = (u8)((2 * data[3] + 3 * q2 + tmp + 4) >> 3); + } + else + data[0] = (u8)((2 * q1 + q0 + p1 + 2) >> 2); + } + } + } + +} + +/*------------------------------------------------------------------------------ + + Function: FilterHorLumaEdge + + Functional description: + Filter one horizontal 4-pixel luma edge + +------------------------------------------------------------------------------*/ +void FilterHorLumaEdge( + u8 *data, + u32 bS, + edgeThreshold_t *thresholds, + i32 imageWidth) +{ + +/* Variables */ + + i32 delta, tc, tmp; + u32 i; + u8 p0, q0, p1, q1, p2, q2; + const u8 *clp = h264bsdClip + 512; + +/* Code */ + + ASSERT(data); + ASSERT(bS < 4); + ASSERT(thresholds); + + tc = thresholds->tc0[bS-1]; + tmp = tc; + for (i = 4; i; i--, data++) + { + p1 = data[-imageWidth*2]; p0 = data[-imageWidth]; + q0 = data[0]; q1 = data[imageWidth]; + if ( ((unsigned)ABS(p0-q0) < thresholds->alpha) && + ((unsigned)ABS(p1-p0) < thresholds->beta) && + ((unsigned)ABS(q1-q0) < thresholds->beta) ) + { + p2 = data[-imageWidth*3]; + + if ((unsigned)ABS(p2-p0) < thresholds->beta) + { + data[-imageWidth*2] = (u8)(p1 + CLIP3(-tc,tc, + (p2 + ((p0 + q0 + 1) >> 1) - (p1 << 1)) >> 1)); + tmp++; + } + + q2 = data[imageWidth*2]; + + if ((unsigned)ABS(q2-q0) < thresholds->beta) + { + data[imageWidth] = (u8)(q1 + CLIP3(-tc,tc, + (q2 + ((p0 + q0 + 1) >> 1) - (q1 << 1)) >> 1)); + tmp++; + } + + delta = CLIP3(-tmp, tmp, ((((q0 - p0) << 2) + + (p1 - q1) + 4) >> 3)); + + p0 = clp[p0 + delta]; + q0 = clp[q0 - delta]; + tmp = tc; + data[-imageWidth] = p0; + data[ 0] = q0; + } + } +} + +/*------------------------------------------------------------------------------ + + Function: FilterHorLuma + + Functional description: + Filter all four successive horizontal 4-pixel luma edges. This can + be done when bS is equal to all four edges. + +------------------------------------------------------------------------------*/ +void FilterHorLuma( + u8 *data, + u32 bS, + edgeThreshold_t *thresholds, + i32 imageWidth) +{ + +/* Variables */ + + i32 delta, tc, tmp; + u32 i; + u8 p0, q0, p1, q1, p2, q2; + u32 tmpFlag; + const u8 *clp = h264bsdClip + 512; + +/* Code */ + + ASSERT(data); + ASSERT(bS <= 4); + ASSERT(thresholds); + + if (bS < 4) + { + tc = thresholds->tc0[bS-1]; + tmp = tc; + for (i = 16; i; i--, data++) + { + p1 = data[-imageWidth*2]; p0 = data[-imageWidth]; + q0 = data[0]; q1 = data[imageWidth]; + if ( ((unsigned)ABS(p0-q0) < thresholds->alpha) && + ((unsigned)ABS(p1-p0) < thresholds->beta) && + ((unsigned)ABS(q1-q0) < thresholds->beta) ) + { + p2 = data[-imageWidth*3]; + + if ((unsigned)ABS(p2-p0) < thresholds->beta) + { + data[-imageWidth*2] = (u8)(p1 + CLIP3(-tc,tc, + (p2 + ((p0 + q0 + 1) >> 1) - (p1 << 1)) >> 1)); + tmp++; + } + + q2 = data[imageWidth*2]; + + if ((unsigned)ABS(q2-q0) < thresholds->beta) + { + data[imageWidth] = (u8)(q1 + CLIP3(-tc,tc, + (q2 + ((p0 + q0 + 1) >> 1) - (q1 << 1)) >> 1)); + tmp++; + } + + delta = CLIP3(-tmp, tmp, ((((q0 - p0) << 2) + + (p1 - q1) + 4) >> 3)); + + p0 = clp[p0 + delta]; + q0 = clp[q0 - delta]; + tmp = tc; + data[-imageWidth] = p0; + data[ 0] = q0; + } + } + } + else + { + for (i = 16; i; i--, data++) + { + p1 = data[-imageWidth*2]; p0 = data[-imageWidth]; + q0 = data[0]; q1 = data[imageWidth]; + if ( ((unsigned)ABS(p0-q0) < thresholds->alpha) && + ((unsigned)ABS(p1-p0) < thresholds->beta) && + ((unsigned)ABS(q1-q0) < thresholds->beta) ) + { + tmpFlag = ((unsigned)ABS(p0-q0) < ((thresholds->alpha >> 2) +2)) + ? HANTRO_TRUE : HANTRO_FALSE; + + p2 = data[-imageWidth*3]; + q2 = data[imageWidth*2]; + + if (tmpFlag && (unsigned)ABS(p2-p0) < thresholds->beta) + { + tmp = p1 + p0 + q0; + data[-imageWidth] = (u8)((p2 + 2 * tmp + q1 + 4) >> 3); + data[-imageWidth*2] = (u8)((p2 + tmp + 2) >> 2); + data[-imageWidth*3] = (u8)((2 * data[-imageWidth*4] + + 3 * p2 + tmp + 4) >> 3); + } + else + data[-imageWidth] = (u8)((2 * p1 + p0 + q1 + 2) >> 2); + + if (tmpFlag && (unsigned)ABS(q2-q0) < thresholds->beta) + { + tmp = p0 + q0 + q1; + data[ 0] = (u8)((p1 + 2 * tmp + q2 + 4) >> 3); + data[imageWidth] = (u8)((tmp + q2 + 2) >> 2); + data[imageWidth*2] = (u8)((2 * data[imageWidth*3] + + 3 * q2 + tmp + 4) >> 3); + } + else + data[0] = (2 * q1 + q0 + p1 + 2) >> 2; + } + } + } + +} + +/*------------------------------------------------------------------------------ + + Function: FilterVerChromaEdge + + Functional description: + Filter one vertical 2-pixel chroma edge + +------------------------------------------------------------------------------*/ +void FilterVerChromaEdge( + u8 *data, + u32 bS, + edgeThreshold_t *thresholds, + u32 width) +{ + +/* Variables */ + + i32 delta, tc; + u8 p0, q0, p1, q1; + const u8 *clp = h264bsdClip + 512; + +/* Code */ + + ASSERT(data); + ASSERT(bS <= 4); + ASSERT(thresholds); + + p1 = data[-2]; p0 = data[-1]; + q0 = data[0]; q1 = data[1]; + if ( ((unsigned)ABS(p0-q0) < thresholds->alpha) && + ((unsigned)ABS(p1-p0) < thresholds->beta) && + ((unsigned)ABS(q1-q0) < thresholds->beta) ) + { + if (bS < 4) + { + tc = thresholds->tc0[bS-1] + 1; + delta = CLIP3(-tc, tc, ((((q0 - p0) << 2) + + (p1 - q1) + 4) >> 3)); + p0 = clp[p0 + delta]; + q0 = clp[q0 - delta]; + data[-1] = p0; + data[ 0] = q0; + } + else + { + data[-1] = (2 * p1 + p0 + q1 + 2) >> 2; + data[ 0] = (2 * q1 + q0 + p1 + 2) >> 2; + } + } + data += width; + p1 = data[-2]; p0 = data[-1]; + q0 = data[0]; q1 = data[1]; + if ( ((unsigned)ABS(p0-q0) < thresholds->alpha) && + ((unsigned)ABS(p1-p0) < thresholds->beta) && + ((unsigned)ABS(q1-q0) < thresholds->beta) ) + { + if (bS < 4) + { + tc = thresholds->tc0[bS-1] + 1; + delta = CLIP3(-tc, tc, ((((q0 - p0) << 2) + + (p1 - q1) + 4) >> 3)); + p0 = clp[p0 + delta]; + q0 = clp[q0 - delta]; + data[-1] = p0; + data[ 0] = q0; + } + else + { + data[-1] = (2 * p1 + p0 + q1 + 2) >> 2; + data[ 0] = (2 * q1 + q0 + p1 + 2) >> 2; + } + } + +} + +/*------------------------------------------------------------------------------ + + Function: FilterHorChromaEdge + + Functional description: + Filter one horizontal 2-pixel chroma edge + +------------------------------------------------------------------------------*/ +void FilterHorChromaEdge( + u8 *data, + u32 bS, + edgeThreshold_t *thresholds, + i32 width) +{ + +/* Variables */ + + i32 delta, tc; + u32 i; + u8 p0, q0, p1, q1; + const u8 *clp = h264bsdClip + 512; + +/* Code */ + + ASSERT(data); + ASSERT(bS < 4); + ASSERT(thresholds); + + tc = thresholds->tc0[bS-1] + 1; + for (i = 2; i; i--, data++) + { + p1 = data[-width*2]; p0 = data[-width]; + q0 = data[0]; q1 = data[width]; + if ( ((unsigned)ABS(p0-q0) < thresholds->alpha) && + ((unsigned)ABS(p1-p0) < thresholds->beta) && + ((unsigned)ABS(q1-q0) < thresholds->beta) ) + { + delta = CLIP3(-tc, tc, ((((q0 - p0) << 2) + + (p1 - q1) + 4) >> 3)); + p0 = clp[p0 + delta]; + q0 = clp[q0 - delta]; + data[-width] = p0; + data[ 0] = q0; + } + } +} + +/*------------------------------------------------------------------------------ + + Function: FilterHorChroma + + Functional description: + Filter all four successive horizontal 2-pixel chroma edges. This + can be done if bS is equal for all four edges. + +------------------------------------------------------------------------------*/ +void FilterHorChroma( + u8 *data, + u32 bS, + edgeThreshold_t *thresholds, + i32 width) +{ + +/* Variables */ + + i32 delta, tc; + u32 i; + u8 p0, q0, p1, q1; + const u8 *clp = h264bsdClip + 512; + +/* Code */ + + ASSERT(data); + ASSERT(bS <= 4); + ASSERT(thresholds); + + if (bS < 4) + { + tc = thresholds->tc0[bS-1] + 1; + for (i = 8; i; i--, data++) + { + p1 = data[-width*2]; p0 = data[-width]; + q0 = data[0]; q1 = data[width]; + if ( ((unsigned)ABS(p0-q0) < thresholds->alpha) && + ((unsigned)ABS(p1-p0) < thresholds->beta) && + ((unsigned)ABS(q1-q0) < thresholds->beta) ) + { + delta = CLIP3(-tc, tc, ((((q0 - p0) << 2) + + (p1 - q1) + 4) >> 3)); + p0 = clp[p0 + delta]; + q0 = clp[q0 - delta]; + data[-width] = p0; + data[ 0] = q0; + } + } + } + else + { + for (i = 8; i; i--, data++) + { + p1 = data[-width*2]; p0 = data[-width]; + q0 = data[0]; q1 = data[width]; + if ( ((unsigned)ABS(p0-q0) < thresholds->alpha) && + ((unsigned)ABS(p1-p0) < thresholds->beta) && + ((unsigned)ABS(q1-q0) < thresholds->beta) ) + { + data[-width] = (2 * p1 + p0 + q1 + 2) >> 2; + data[ 0] = (2 * q1 + q0 + p1 + 2) >> 2; + } + } + } + +} + + +/*------------------------------------------------------------------------------ + + Function: GetBoundaryStrengths + + Functional description: + Function to calculate boundary strengths for all edges of a + macroblock. Function returns HANTRO_TRUE if any of the bS values for + the macroblock had non-zero value, HANTRO_FALSE otherwise. + +------------------------------------------------------------------------------*/ +u32 GetBoundaryStrengths(mbStorage_t *mb, bS_t *bS, u32 flags) +{ + +/* Variables */ + + /* this flag is set HANTRO_TRUE as soon as any boundary strength value is + * non-zero */ + u32 nonZeroBs = HANTRO_FALSE; + +/* Code */ + + ASSERT(mb); + ASSERT(bS); + ASSERT(flags); + + /* top edges */ + if (flags & FILTER_TOP_EDGE) + { + if (IS_INTRA_MB(*mb) || IS_INTRA_MB(*mb->mbB)) + { + bS[0].top = bS[1].top = bS[2].top = bS[3].top = 4; + nonZeroBs = HANTRO_TRUE; + } + else + { + bS[0].top = EdgeBoundaryStrength(mb, mb->mbB, 0, 10); + bS[1].top = EdgeBoundaryStrength(mb, mb->mbB, 1, 11); + bS[2].top = EdgeBoundaryStrength(mb, mb->mbB, 4, 14); + bS[3].top = EdgeBoundaryStrength(mb, mb->mbB, 5, 15); + if (bS[0].top || bS[1].top || bS[2].top || bS[3].top) + nonZeroBs = HANTRO_TRUE; + } + } + else + { + bS[0].top = bS[1].top = bS[2].top = bS[3].top = 0; + } + + /* left edges */ + if (flags & FILTER_LEFT_EDGE) + { + if (IS_INTRA_MB(*mb) || IS_INTRA_MB(*mb->mbA)) + { + bS[0].left = bS[4].left = bS[8].left = bS[12].left = 4; + nonZeroBs = HANTRO_TRUE; + } + else + { + bS[0].left = EdgeBoundaryStrength(mb, mb->mbA, 0, 5); + bS[4].left = EdgeBoundaryStrength(mb, mb->mbA, 2, 7); + bS[8].left = EdgeBoundaryStrength(mb, mb->mbA, 8, 13); + bS[12].left = EdgeBoundaryStrength(mb, mb->mbA, 10, 15); + if (!nonZeroBs && + (bS[0].left || bS[4].left || bS[8].left || bS[12].left)) + nonZeroBs = HANTRO_TRUE; + } + } + else + { + bS[0].left = bS[4].left = bS[8].left = bS[12].left = 0; + } + + /* inner edges */ + if (IS_INTRA_MB(*mb)) + { + bS[4].top = bS[5].top = bS[6].top = bS[7].top = + bS[8].top = bS[9].top = bS[10].top = bS[11].top = + bS[12].top = bS[13].top = bS[14].top = bS[15].top = 3; + + bS[1].left = bS[2].left = bS[3].left = + bS[5].left = bS[6].left = bS[7].left = + bS[9].left = bS[10].left = bS[11].left = + bS[13].left = bS[14].left = bS[15].left = 3; + nonZeroBs = HANTRO_TRUE; + } + else + { + /* 16x16 inter mb -> ref addresses or motion vectors cannot differ, + * only check if either of the blocks contain coefficients */ + if (h264bsdNumMbPart(mb->mbType) == 1) + { + bS[4].top = mb->totalCoeff[2] || mb->totalCoeff[0] ? 2 : 0; + bS[5].top = mb->totalCoeff[3] || mb->totalCoeff[1] ? 2 : 0; + bS[6].top = mb->totalCoeff[6] || mb->totalCoeff[4] ? 2 : 0; + bS[7].top = mb->totalCoeff[7] || mb->totalCoeff[5] ? 2 : 0; + bS[8].top = mb->totalCoeff[8] || mb->totalCoeff[2] ? 2 : 0; + bS[9].top = mb->totalCoeff[9] || mb->totalCoeff[3] ? 2 : 0; + bS[10].top = mb->totalCoeff[12] || mb->totalCoeff[6] ? 2 : 0; + bS[11].top = mb->totalCoeff[13] || mb->totalCoeff[7] ? 2 : 0; + bS[12].top = mb->totalCoeff[10] || mb->totalCoeff[8] ? 2 : 0; + bS[13].top = mb->totalCoeff[11] || mb->totalCoeff[9] ? 2 : 0; + bS[14].top = mb->totalCoeff[14] || mb->totalCoeff[12] ? 2 : 0; + bS[15].top = mb->totalCoeff[15] || mb->totalCoeff[13] ? 2 : 0; + + bS[1].left = mb->totalCoeff[1] || mb->totalCoeff[0] ? 2 : 0; + bS[2].left = mb->totalCoeff[4] || mb->totalCoeff[1] ? 2 : 0; + bS[3].left = mb->totalCoeff[5] || mb->totalCoeff[4] ? 2 : 0; + bS[5].left = mb->totalCoeff[3] || mb->totalCoeff[2] ? 2 : 0; + bS[6].left = mb->totalCoeff[6] || mb->totalCoeff[3] ? 2 : 0; + bS[7].left = mb->totalCoeff[7] || mb->totalCoeff[6] ? 2 : 0; + bS[9].left = mb->totalCoeff[9] || mb->totalCoeff[8] ? 2 : 0; + bS[10].left = mb->totalCoeff[12] || mb->totalCoeff[9] ? 2 : 0; + bS[11].left = mb->totalCoeff[13] || mb->totalCoeff[12] ? 2 : 0; + bS[13].left = mb->totalCoeff[11] || mb->totalCoeff[10] ? 2 : 0; + bS[14].left = mb->totalCoeff[14] || mb->totalCoeff[11] ? 2 : 0; + bS[15].left = mb->totalCoeff[15] || mb->totalCoeff[14] ? 2 : 0; + } + /* 16x8 inter mb -> ref addresses and motion vectors can be different + * only for the middle horizontal edge, for the other top edges it is + * enough to check whether the blocks contain coefficients or not. The + * same applies to all internal left edges. */ + else if (mb->mbType == P_L0_L0_16x8) + { + bS[4].top = mb->totalCoeff[2] || mb->totalCoeff[0] ? 2 : 0; + bS[5].top = mb->totalCoeff[3] || mb->totalCoeff[1] ? 2 : 0; + bS[6].top = mb->totalCoeff[6] || mb->totalCoeff[4] ? 2 : 0; + bS[7].top = mb->totalCoeff[7] || mb->totalCoeff[5] ? 2 : 0; + bS[12].top = mb->totalCoeff[10] || mb->totalCoeff[8] ? 2 : 0; + bS[13].top = mb->totalCoeff[11] || mb->totalCoeff[9] ? 2 : 0; + bS[14].top = mb->totalCoeff[14] || mb->totalCoeff[12] ? 2 : 0; + bS[15].top = mb->totalCoeff[15] || mb->totalCoeff[13] ? 2 : 0; + bS[8].top = InnerBoundaryStrength(mb, 8, 2); + bS[9].top = InnerBoundaryStrength(mb, 9, 3); + bS[10].top = InnerBoundaryStrength(mb, 12, 6); + bS[11].top = InnerBoundaryStrength(mb, 13, 7); + + bS[1].left = mb->totalCoeff[1] || mb->totalCoeff[0] ? 2 : 0; + bS[2].left = mb->totalCoeff[4] || mb->totalCoeff[1] ? 2 : 0; + bS[3].left = mb->totalCoeff[5] || mb->totalCoeff[4] ? 2 : 0; + bS[5].left = mb->totalCoeff[3] || mb->totalCoeff[2] ? 2 : 0; + bS[6].left = mb->totalCoeff[6] || mb->totalCoeff[3] ? 2 : 0; + bS[7].left = mb->totalCoeff[7] || mb->totalCoeff[6] ? 2 : 0; + bS[9].left = mb->totalCoeff[9] || mb->totalCoeff[8] ? 2 : 0; + bS[10].left = mb->totalCoeff[12] || mb->totalCoeff[9] ? 2 : 0; + bS[11].left = mb->totalCoeff[13] || mb->totalCoeff[12] ? 2 : 0; + bS[13].left = mb->totalCoeff[11] || mb->totalCoeff[10] ? 2 : 0; + bS[14].left = mb->totalCoeff[14] || mb->totalCoeff[11] ? 2 : 0; + bS[15].left = mb->totalCoeff[15] || mb->totalCoeff[14] ? 2 : 0; + } + /* 8x16 inter mb -> ref addresses and motion vectors can be different + * only for the middle vertical edge, for the other left edges it is + * enough to check whether the blocks contain coefficients or not. The + * same applies to all internal top edges. */ + else if (mb->mbType == P_L0_L0_8x16) + { + bS[4].top = mb->totalCoeff[2] || mb->totalCoeff[0] ? 2 : 0; + bS[5].top = mb->totalCoeff[3] || mb->totalCoeff[1] ? 2 : 0; + bS[6].top = mb->totalCoeff[6] || mb->totalCoeff[4] ? 2 : 0; + bS[7].top = mb->totalCoeff[7] || mb->totalCoeff[5] ? 2 : 0; + bS[8].top = mb->totalCoeff[8] || mb->totalCoeff[2] ? 2 : 0; + bS[9].top = mb->totalCoeff[9] || mb->totalCoeff[3] ? 2 : 0; + bS[10].top = mb->totalCoeff[12] || mb->totalCoeff[6] ? 2 : 0; + bS[11].top = mb->totalCoeff[13] || mb->totalCoeff[7] ? 2 : 0; + bS[12].top = mb->totalCoeff[10] || mb->totalCoeff[8] ? 2 : 0; + bS[13].top = mb->totalCoeff[11] || mb->totalCoeff[9] ? 2 : 0; + bS[14].top = mb->totalCoeff[14] || mb->totalCoeff[12] ? 2 : 0; + bS[15].top = mb->totalCoeff[15] || mb->totalCoeff[13] ? 2 : 0; + + bS[1].left = mb->totalCoeff[1] || mb->totalCoeff[0] ? 2 : 0; + bS[3].left = mb->totalCoeff[5] || mb->totalCoeff[4] ? 2 : 0; + bS[5].left = mb->totalCoeff[3] || mb->totalCoeff[2] ? 2 : 0; + bS[7].left = mb->totalCoeff[7] || mb->totalCoeff[6] ? 2 : 0; + bS[9].left = mb->totalCoeff[9] || mb->totalCoeff[8] ? 2 : 0; + bS[11].left = mb->totalCoeff[13] || mb->totalCoeff[12] ? 2 : 0; + bS[13].left = mb->totalCoeff[11] || mb->totalCoeff[10] ? 2 : 0; + bS[15].left = mb->totalCoeff[15] || mb->totalCoeff[14] ? 2 : 0; + bS[2].left = InnerBoundaryStrength(mb, 4, 1); + bS[6].left = InnerBoundaryStrength(mb, 6, 3); + bS[10].left = InnerBoundaryStrength(mb, 12, 9); + bS[14].left = InnerBoundaryStrength(mb, 14, 11); + } + else + { + bS[4].top = + InnerBoundaryStrength(mb, mb4x4Index[4], mb4x4Index[0]); + bS[5].top = + InnerBoundaryStrength(mb, mb4x4Index[5], mb4x4Index[1]); + bS[6].top = + InnerBoundaryStrength(mb, mb4x4Index[6], mb4x4Index[2]); + bS[7].top = + InnerBoundaryStrength(mb, mb4x4Index[7], mb4x4Index[3]); + bS[8].top = + InnerBoundaryStrength(mb, mb4x4Index[8], mb4x4Index[4]); + bS[9].top = + InnerBoundaryStrength(mb, mb4x4Index[9], mb4x4Index[5]); + bS[10].top = + InnerBoundaryStrength(mb, mb4x4Index[10], mb4x4Index[6]); + bS[11].top = + InnerBoundaryStrength(mb, mb4x4Index[11], mb4x4Index[7]); + bS[12].top = + InnerBoundaryStrength(mb, mb4x4Index[12], mb4x4Index[8]); + bS[13].top = + InnerBoundaryStrength(mb, mb4x4Index[13], mb4x4Index[9]); + bS[14].top = + InnerBoundaryStrength(mb, mb4x4Index[14], mb4x4Index[10]); + bS[15].top = + InnerBoundaryStrength(mb, mb4x4Index[15], mb4x4Index[11]); + + bS[1].left = + InnerBoundaryStrength(mb, mb4x4Index[1], mb4x4Index[0]); + bS[2].left = + InnerBoundaryStrength(mb, mb4x4Index[2], mb4x4Index[1]); + bS[3].left = + InnerBoundaryStrength(mb, mb4x4Index[3], mb4x4Index[2]); + bS[5].left = + InnerBoundaryStrength(mb, mb4x4Index[5], mb4x4Index[4]); + bS[6].left = + InnerBoundaryStrength(mb, mb4x4Index[6], mb4x4Index[5]); + bS[7].left = + InnerBoundaryStrength(mb, mb4x4Index[7], mb4x4Index[6]); + bS[9].left = + InnerBoundaryStrength(mb, mb4x4Index[9], mb4x4Index[8]); + bS[10].left = + InnerBoundaryStrength(mb, mb4x4Index[10], mb4x4Index[9]); + bS[11].left = + InnerBoundaryStrength(mb, mb4x4Index[11], mb4x4Index[10]); + bS[13].left = + InnerBoundaryStrength(mb, mb4x4Index[13], mb4x4Index[12]); + bS[14].left = + InnerBoundaryStrength(mb, mb4x4Index[14], mb4x4Index[13]); + bS[15].left = + InnerBoundaryStrength(mb, mb4x4Index[15], mb4x4Index[14]); + } + if (!nonZeroBs && + (bS[4].top || bS[5].top || bS[6].top || bS[7].top || + bS[8].top || bS[9].top || bS[10].top || bS[11].top || + bS[12].top || bS[13].top || bS[14].top || bS[15].top || + bS[1].left || bS[2].left || bS[3].left || + bS[5].left || bS[6].left || bS[7].left || + bS[9].left || bS[10].left || bS[11].left || + bS[13].left || bS[14].left || bS[15].left)) + nonZeroBs = HANTRO_TRUE; + } + + return(nonZeroBs); + +} + +/*------------------------------------------------------------------------------ + + Function: GetLumaEdgeThresholds + + Functional description: + Compute alpha, beta and tc0 thresholds for inner, left and top + luma edges of a macroblock. + +------------------------------------------------------------------------------*/ +void GetLumaEdgeThresholds( + edgeThreshold_t *thresholds, + mbStorage_t *mb, + u32 filteringFlags) +{ + +/* Variables */ + + u32 indexA, indexB; + u32 qpAv, qp, qpTmp; + +/* Code */ + + ASSERT(thresholds); + ASSERT(mb); + + qp = mb->qpY; + + indexA = (u32)CLIP3(0, 51, (i32)qp + mb->filterOffsetA); + indexB = (u32)CLIP3(0, 51, (i32)qp + mb->filterOffsetB); + + thresholds[INNER].alpha = alphas[indexA]; + thresholds[INNER].beta = betas[indexB]; + thresholds[INNER].tc0 = tc0[indexA]; + + if (filteringFlags & FILTER_TOP_EDGE) + { + qpTmp = mb->mbB->qpY; + if (qpTmp != qp) + { + qpAv = (qp + qpTmp + 1) >> 1; + + indexA = (u32)CLIP3(0, 51, (i32)qpAv + mb->filterOffsetA); + indexB = (u32)CLIP3(0, 51, (i32)qpAv + mb->filterOffsetB); + + thresholds[TOP].alpha = alphas[indexA]; + thresholds[TOP].beta = betas[indexB]; + thresholds[TOP].tc0 = tc0[indexA]; + } + else + { + thresholds[TOP].alpha = thresholds[INNER].alpha; + thresholds[TOP].beta = thresholds[INNER].beta; + thresholds[TOP].tc0 = thresholds[INNER].tc0; + } + } + if (filteringFlags & FILTER_LEFT_EDGE) + { + qpTmp = mb->mbA->qpY; + if (qpTmp != qp) + { + qpAv = (qp + qpTmp + 1) >> 1; + + indexA = (u32)CLIP3(0, 51, (i32)qpAv + mb->filterOffsetA); + indexB = (u32)CLIP3(0, 51, (i32)qpAv + mb->filterOffsetB); + + thresholds[LEFT].alpha = alphas[indexA]; + thresholds[LEFT].beta = betas[indexB]; + thresholds[LEFT].tc0 = tc0[indexA]; + } + else + { + thresholds[LEFT].alpha = thresholds[INNER].alpha; + thresholds[LEFT].beta = thresholds[INNER].beta; + thresholds[LEFT].tc0 = thresholds[INNER].tc0; + } + } + +} + +/*------------------------------------------------------------------------------ + + Function: GetChromaEdgeThresholds + + Functional description: + Compute alpha, beta and tc0 thresholds for inner, left and top + chroma edges of a macroblock. + +------------------------------------------------------------------------------*/ +void GetChromaEdgeThresholds( + edgeThreshold_t *thresholds, + mbStorage_t *mb, + u32 filteringFlags, + i32 chromaQpIndexOffset) +{ + +/* Variables */ + + u32 indexA, indexB; + u32 qpAv, qp, qpTmp; + +/* Code */ + + ASSERT(thresholds); + ASSERT(mb); + + qp = mb->qpY; + qp = h264bsdQpC[CLIP3(0, 51, (i32)qp + chromaQpIndexOffset)]; + + indexA = (u32)CLIP3(0, 51, (i32)qp + mb->filterOffsetA); + indexB = (u32)CLIP3(0, 51, (i32)qp + mb->filterOffsetB); + + thresholds[INNER].alpha = alphas[indexA]; + thresholds[INNER].beta = betas[indexB]; + thresholds[INNER].tc0 = tc0[indexA]; + + if (filteringFlags & FILTER_TOP_EDGE) + { + qpTmp = mb->mbB->qpY; + if (qpTmp != mb->qpY) + { + qpTmp = h264bsdQpC[CLIP3(0, 51, (i32)qpTmp + chromaQpIndexOffset)]; + qpAv = (qp + qpTmp + 1) >> 1; + + indexA = (u32)CLIP3(0, 51, (i32)qpAv + mb->filterOffsetA); + indexB = (u32)CLIP3(0, 51, (i32)qpAv + mb->filterOffsetB); + + thresholds[TOP].alpha = alphas[indexA]; + thresholds[TOP].beta = betas[indexB]; + thresholds[TOP].tc0 = tc0[indexA]; + } + else + { + thresholds[TOP].alpha = thresholds[INNER].alpha; + thresholds[TOP].beta = thresholds[INNER].beta; + thresholds[TOP].tc0 = thresholds[INNER].tc0; + } + } + if (filteringFlags & FILTER_LEFT_EDGE) + { + qpTmp = mb->mbA->qpY; + if (qpTmp != mb->qpY) + { + qpTmp = h264bsdQpC[CLIP3(0, 51, (i32)qpTmp + chromaQpIndexOffset)]; + qpAv = (qp + qpTmp + 1) >> 1; + + indexA = (u32)CLIP3(0, 51, (i32)qpAv + mb->filterOffsetA); + indexB = (u32)CLIP3(0, 51, (i32)qpAv + mb->filterOffsetB); + + thresholds[LEFT].alpha = alphas[indexA]; + thresholds[LEFT].beta = betas[indexB]; + thresholds[LEFT].tc0 = tc0[indexA]; + } + else + { + thresholds[LEFT].alpha = thresholds[INNER].alpha; + thresholds[LEFT].beta = thresholds[INNER].beta; + thresholds[LEFT].tc0 = thresholds[INNER].tc0; + } + } + +} + +/*------------------------------------------------------------------------------ + + Function: FilterLuma + + Functional description: + Function to filter all luma edges of a macroblock + +------------------------------------------------------------------------------*/ +void FilterLuma( + u8 *data, + bS_t *bS, + edgeThreshold_t *thresholds, + u32 width) +{ + +/* Variables */ + + u32 vblock; + bS_t *tmp; + u8 *ptr; + u32 offset; + +/* Code */ + + ASSERT(data); + ASSERT(bS); + ASSERT(thresholds); + + ptr = data; + tmp = bS; + + offset = TOP; + + /* loop block rows, perform filtering for all vertical edges of the block + * row first, then filter each horizontal edge of the row */ + for (vblock = 4; vblock--;) + { + /* only perform filtering if bS is non-zero, first of the four + * FilterVerLumaEdge handles the left edge of the macroblock, others + * filter inner edges */ + if (tmp[0].left) + FilterVerLumaEdge(ptr, tmp[0].left, thresholds + LEFT, width); + if (tmp[1].left) + FilterVerLumaEdge(ptr+4, tmp[1].left, thresholds + INNER, width); + if (tmp[2].left) + FilterVerLumaEdge(ptr+8, tmp[2].left, thresholds + INNER, width); + if (tmp[3].left) + FilterVerLumaEdge(ptr+12, tmp[3].left, thresholds + INNER, width); + + /* if bS is equal for all horizontal edges of the row -> perform + * filtering with FilterHorLuma, otherwise use FilterHorLumaEdge for + * each edge separately. offset variable indicates top macroblock edge + * on the first loop round, inner edge for the other rounds */ + if (tmp[0].top == tmp[1].top && tmp[1].top == tmp[2].top && + tmp[2].top == tmp[3].top) + { + if(tmp[0].top) + FilterHorLuma(ptr, tmp[0].top, thresholds + offset, (i32)width); + } + else + { + if(tmp[0].top) + FilterHorLumaEdge(ptr, tmp[0].top, thresholds+offset, + (i32)width); + if(tmp[1].top) + FilterHorLumaEdge(ptr+4, tmp[1].top, thresholds+offset, + (i32)width); + if(tmp[2].top) + FilterHorLumaEdge(ptr+8, tmp[2].top, thresholds+offset, + (i32)width); + if(tmp[3].top) + FilterHorLumaEdge(ptr+12, tmp[3].top, thresholds+offset, + (i32)width); + } + + /* four pixel rows ahead, i.e. next row of 4x4-blocks */ + ptr += width*4; + tmp += 4; + offset = INNER; + } +} + +/*------------------------------------------------------------------------------ + + Function: FilterChroma + + Functional description: + Function to filter all chroma edges of a macroblock + +------------------------------------------------------------------------------*/ +void FilterChroma( + u8 *dataCb, + u8 *dataCr, + bS_t *bS, + edgeThreshold_t *thresholds, + u32 width) +{ + +/* Variables */ + + u32 vblock; + bS_t *tmp; + u32 offset; + +/* Code */ + + ASSERT(dataCb); + ASSERT(dataCr); + ASSERT(bS); + ASSERT(thresholds); + + tmp = bS; + offset = TOP; + + /* loop block rows, perform filtering for all vertical edges of the block + * row first, then filter each horizontal edge of the row */ + for (vblock = 0; vblock < 2; vblock++) + { + /* only perform filtering if bS is non-zero, first two of the four + * FilterVerChromaEdge calls handle the left edge of the macroblock, + * others filter the inner edge. Note that as chroma uses bS values + * determined for luma edges, each bS is used only for 2 pixels of + * a 4-pixel edge */ + if (tmp[0].left) + { + FilterVerChromaEdge(dataCb, tmp[0].left, thresholds + LEFT, width); + FilterVerChromaEdge(dataCr, tmp[0].left, thresholds + LEFT, width); + } + if (tmp[4].left) + { + FilterVerChromaEdge(dataCb+2*width, tmp[4].left, thresholds + LEFT, + width); + FilterVerChromaEdge(dataCr+2*width, tmp[4].left, thresholds + LEFT, + width); + } + if (tmp[2].left) + { + FilterVerChromaEdge(dataCb+4, tmp[2].left, thresholds + INNER, + width); + FilterVerChromaEdge(dataCr+4, tmp[2].left, thresholds + INNER, + width); + } + if (tmp[6].left) + { + FilterVerChromaEdge(dataCb+2*width+4, tmp[6].left, + thresholds + INNER, width); + FilterVerChromaEdge(dataCr+2*width+4, tmp[6].left, + thresholds + INNER, width); + } + + /* if bS is equal for all horizontal edges of the row -> perform + * filtering with FilterHorChroma, otherwise use FilterHorChromaEdge + * for each edge separately. offset variable indicates top macroblock + * edge on the first loop round, inner edge for the second */ + if (tmp[0].top == tmp[1].top && tmp[1].top == tmp[2].top && + tmp[2].top == tmp[3].top) + { + if(tmp[0].top) + { + FilterHorChroma(dataCb, tmp[0].top, thresholds+offset, + (i32)width); + FilterHorChroma(dataCr, tmp[0].top, thresholds+offset, + (i32)width); + } + } + else + { + if (tmp[0].top) + { + FilterHorChromaEdge(dataCb, tmp[0].top, thresholds+offset, + (i32)width); + FilterHorChromaEdge(dataCr, tmp[0].top, thresholds+offset, + (i32)width); + } + if (tmp[1].top) + { + FilterHorChromaEdge(dataCb+2, tmp[1].top, thresholds+offset, + (i32)width); + FilterHorChromaEdge(dataCr+2, tmp[1].top, thresholds+offset, + (i32)width); + } + if (tmp[2].top) + { + FilterHorChromaEdge(dataCb+4, tmp[2].top, thresholds+offset, + (i32)width); + FilterHorChromaEdge(dataCr+4, tmp[2].top, thresholds+offset, + (i32)width); + } + if (tmp[3].top) + { + FilterHorChromaEdge(dataCb+6, tmp[3].top, thresholds+offset, + (i32)width); + FilterHorChromaEdge(dataCr+6, tmp[3].top, thresholds+offset, + (i32)width); + } + } + + tmp += 8; + dataCb += width*4; + dataCr += width*4; + offset = INNER; + } +} + +#else /* H264DEC_OMXDL */ + +/*------------------------------------------------------------------------------ + + Function: h264bsdFilterPicture + + Functional description: + Perform deblocking filtering for a picture. Filter does not copy + the original picture anywhere but filtering is performed directly + on the original image. Parameters controlling the filtering process + are computed based on information in macroblock structures of the + filtered macroblock, macroblock above and macroblock on the left of + the filtered one. + + Inputs: + image pointer to image to be filtered + mb pointer to macroblock data structure of the top-left + macroblock of the picture + + Outputs: + image filtered image stored here + + Returns: + none + +------------------------------------------------------------------------------*/ + +/*lint --e{550} Symbol not accessed */ +void h264bsdFilterPicture( + image_t *image, + mbStorage_t *mb) +{ + +/* Variables */ + + u32 flags; + u32 picSizeInMbs, mbRow, mbCol; + u32 picWidthInMbs; + u8 *data; + mbStorage_t *pMb; + u8 bS[2][16]; + u8 thresholdLuma[2][16]; + u8 thresholdChroma[2][8]; + u8 alpha[2][2]; + u8 beta[2][2]; + OMXResult res; + +/* Code */ + + ASSERT(image); + ASSERT(mb); + ASSERT(image->data); + ASSERT(image->width); + ASSERT(image->height); + + picWidthInMbs = image->width; + data = image->data; + picSizeInMbs = picWidthInMbs * image->height; + + pMb = mb; + + for (mbRow = 0, mbCol = 0; mbRow < image->height; pMb++) + { + flags = GetMbFilteringFlags(pMb); + + if (flags) + { + /* GetBoundaryStrengths function returns non-zero value if any of + * the bS values for the macroblock being processed was non-zero */ + if (GetBoundaryStrengths(pMb, bS, flags)) + { + + /* Luma */ + GetLumaEdgeThresholds(pMb,alpha,beta,thresholdLuma,bS,flags); + data = image->data + mbRow * picWidthInMbs * 256 + mbCol * 16; + + res = omxVCM4P10_FilterDeblockingLuma_VerEdge_I( data, + (OMX_S32)(picWidthInMbs*16), + (const OMX_U8*)alpha, + (const OMX_U8*)beta, + (const OMX_U8*)thresholdLuma, + (const OMX_U8*)bS ); + + res = omxVCM4P10_FilterDeblockingLuma_HorEdge_I( data, + (OMX_S32)(picWidthInMbs*16), + (const OMX_U8*)alpha+2, + (const OMX_U8*)beta+2, + (const OMX_U8*)thresholdLuma+16, + (const OMX_U8*)bS+16 ); + /* Cb */ + GetChromaEdgeThresholds(pMb, alpha, beta, thresholdChroma, + bS, flags, pMb->chromaQpIndexOffset); + data = image->data + picSizeInMbs * 256 + + mbRow * picWidthInMbs * 64 + mbCol * 8; + + res = omxVCM4P10_FilterDeblockingChroma_VerEdge_I( data, + (OMX_S32)(picWidthInMbs*8), + (const OMX_U8*)alpha, + (const OMX_U8*)beta, + (const OMX_U8*)thresholdChroma, + (const OMX_U8*)bS ); + res = omxVCM4P10_FilterDeblockingChroma_HorEdge_I( data, + (OMX_S32)(picWidthInMbs*8), + (const OMX_U8*)alpha+2, + (const OMX_U8*)beta+2, + (const OMX_U8*)thresholdChroma+8, + (const OMX_U8*)bS+16 ); + /* Cr */ + data += (picSizeInMbs * 64); + res = omxVCM4P10_FilterDeblockingChroma_VerEdge_I( data, + (OMX_S32)(picWidthInMbs*8), + (const OMX_U8*)alpha, + (const OMX_U8*)beta, + (const OMX_U8*)thresholdChroma, + (const OMX_U8*)bS ); + res = omxVCM4P10_FilterDeblockingChroma_HorEdge_I( data, + (OMX_S32)(picWidthInMbs*8), + (const OMX_U8*)alpha+2, + (const OMX_U8*)beta+2, + (const OMX_U8*)thresholdChroma+8, + (const OMX_U8*)bS+16 ); + } + } + + mbCol++; + if (mbCol == picWidthInMbs) + { + mbCol = 0; + mbRow++; + } + } + +} + +/*------------------------------------------------------------------------------ + + Function: GetBoundaryStrengths + + Functional description: + Function to calculate boundary strengths for all edges of a + macroblock. Function returns HANTRO_TRUE if any of the bS values for + the macroblock had non-zero value, HANTRO_FALSE otherwise. + +------------------------------------------------------------------------------*/ +u32 GetBoundaryStrengths(mbStorage_t *mb, u8 (*bS)[16], u32 flags) +{ + +/* Variables */ + + /* this flag is set HANTRO_TRUE as soon as any boundary strength value is + * non-zero */ + u32 nonZeroBs = HANTRO_FALSE; + u32 *pTmp; + u32 tmp1, tmp2, isIntraMb; + +/* Code */ + + ASSERT(mb); + ASSERT(bS); + ASSERT(flags); + + isIntraMb = IS_INTRA_MB(*mb); + + /* top edges */ + pTmp = (u32*)&bS[1][0]; + if (flags & FILTER_TOP_EDGE) + { + if (isIntraMb || IS_INTRA_MB(*mb->mbB)) + { + *pTmp = 0x04040404; + nonZeroBs = HANTRO_TRUE; + } + else + { + *pTmp = EdgeBoundaryStrengthTop(mb, mb->mbB); + if (*pTmp) + nonZeroBs = HANTRO_TRUE; + } + } + else + { + *pTmp = 0; + } + + /* left edges */ + pTmp = (u32*)&bS[0][0]; + if (flags & FILTER_LEFT_EDGE) + { + if (isIntraMb || IS_INTRA_MB(*mb->mbA)) + { + /*bS[0][0] = bS[0][1] = bS[0][2] = bS[0][3] = 4;*/ + *pTmp = 0x04040404; + nonZeroBs = HANTRO_TRUE; + } + else + { + *pTmp = EdgeBoundaryStrengthLeft(mb, mb->mbA); + if (!nonZeroBs && *pTmp) + nonZeroBs = HANTRO_TRUE; + } + } + else + { + *pTmp = 0; + } + + /* inner edges */ + if (isIntraMb) + { + pTmp++; + *pTmp++ = 0x03030303; + *pTmp++ = 0x03030303; + *pTmp++ = 0x03030303; + pTmp++; + *pTmp++ = 0x03030303; + *pTmp++ = 0x03030303; + *pTmp = 0x03030303; + + nonZeroBs = HANTRO_TRUE; + } + else + { + pTmp = (u32*)mb->totalCoeff; + + /* 16x16 inter mb -> ref addresses or motion vectors cannot differ, + * only check if either of the blocks contain coefficients */ + if (h264bsdNumMbPart(mb->mbType) == 1) + { + tmp1 = *pTmp++; + tmp2 = *pTmp++; + bS[1][4] = (tmp1 & 0x00FF00FF) ? 2 : 0; /* [2] || [0] */ + bS[1][5] = (tmp1 & 0xFF00FF00) ? 2 : 0; /* [3] || [1] */ + bS[0][4] = (tmp1 & 0x0000FFFF) ? 2 : 0; /* [1] || [0] */ + bS[0][5] = (tmp1 & 0xFFFF0000) ? 2 : 0; /* [3] || [2] */ + + tmp1 = *pTmp++; + bS[1][6] = (tmp2 & 0x00FF00FF) ? 2 : 0; /* [6] || [4] */ + bS[1][7] = (tmp2 & 0xFF00FF00) ? 2 : 0; /* [7] || [5] */ + bS[0][12] = (tmp2 & 0x0000FFFF) ? 2 : 0; /* [5] || [4] */ + bS[0][13] = (tmp2 & 0xFFFF0000) ? 2 : 0; /* [7] || [6] */ + tmp2 = *pTmp; + bS[1][12] = (tmp1 & 0x00FF00FF) ? 2 : 0; /* [10] || [8] */ + bS[1][13] = (tmp1 & 0xFF00FF00) ? 2 : 0; /* [11] || [9] */ + bS[0][6] = (tmp1 & 0x0000FFFF) ? 2 : 0; /* [9] || [8] */ + bS[0][7] = (tmp1 & 0xFFFF0000) ? 2 : 0; /* [11] || [10] */ + + bS[1][14] = (tmp2 & 0x00FF00FF) ? 2 : 0; /* [14] || [12] */ + bS[1][15] = (tmp2 & 0xFF00FF00) ? 2 : 0; /* [15] || [13] */ + bS[0][14] = (tmp2 & 0x0000FFFF) ? 2 : 0; /* [13] || [12] */ + bS[0][15] = (tmp2 & 0xFFFF0000) ? 2 : 0; /* [15] || [14] */ + + { + u32 tmp3, tmp4; + + tmp1 = mb->totalCoeff[8]; + tmp2 = mb->totalCoeff[2]; + tmp3 = mb->totalCoeff[9]; + tmp4 = mb->totalCoeff[3]; + + bS[1][8] = tmp1 || tmp2 ? 2 : 0; + tmp1 = mb->totalCoeff[12]; + tmp2 = mb->totalCoeff[6]; + bS[1][9] = tmp3 || tmp4 ? 2 : 0; + tmp3 = mb->totalCoeff[13]; + tmp4 = mb->totalCoeff[7]; + bS[1][10] = tmp1 || tmp2 ? 2 : 0; + tmp1 = mb->totalCoeff[4]; + tmp2 = mb->totalCoeff[1]; + bS[1][11] = tmp3 || tmp4 ? 2 : 0; + tmp3 = mb->totalCoeff[6]; + tmp4 = mb->totalCoeff[3]; + bS[0][8] = tmp1 || tmp2 ? 2 : 0; + tmp1 = mb->totalCoeff[12]; + tmp2 = mb->totalCoeff[9]; + bS[0][9] = tmp3 || tmp4 ? 2 : 0; + tmp3 = mb->totalCoeff[14]; + tmp4 = mb->totalCoeff[11]; + bS[0][10] = tmp1 || tmp2 ? 2 : 0; + bS[0][11] = tmp3 || tmp4 ? 2 : 0; + } + } + + /* 16x8 inter mb -> ref addresses and motion vectors can be different + * only for the middle horizontal edge, for the other top edges it is + * enough to check whether the blocks contain coefficients or not. The + * same applies to all internal left edges. */ + else if (mb->mbType == P_L0_L0_16x8) + { + tmp1 = *pTmp++; + tmp2 = *pTmp++; + bS[1][4] = (tmp1 & 0x00FF00FF) ? 2 : 0; /* [2] || [0] */ + bS[1][5] = (tmp1 & 0xFF00FF00) ? 2 : 0; /* [3] || [1] */ + bS[0][4] = (tmp1 & 0x0000FFFF) ? 2 : 0; /* [1] || [0] */ + bS[0][5] = (tmp1 & 0xFFFF0000) ? 2 : 0; /* [3] || [2] */ + tmp1 = *pTmp++; + bS[1][6] = (tmp2 & 0x00FF00FF) ? 2 : 0; /* [6] || [4] */ + bS[1][7] = (tmp2 & 0xFF00FF00) ? 2 : 0; /* [7] || [5] */ + bS[0][12] = (tmp2 & 0x0000FFFF) ? 2 : 0; /* [5] || [4] */ + bS[0][13] = (tmp2 & 0xFFFF0000) ? 2 : 0; /* [7] || [6] */ + tmp2 = *pTmp; + bS[1][12] = (tmp1 & 0x00FF00FF) ? 2 : 0; /* [10] || [8] */ + bS[1][13] = (tmp1 & 0xFF00FF00) ? 2 : 0; /* [11] || [9] */ + bS[0][6] = (tmp1 & 0x0000FFFF) ? 2 : 0; /* [9] || [8] */ + bS[0][7] = (tmp1 & 0xFFFF0000) ? 2 : 0; /* [11] || [10] */ + + bS[1][14] = (tmp2 & 0x00FF00FF) ? 2 : 0; /* [14] || [12] */ + bS[1][15] = (tmp2 & 0xFF00FF00) ? 2 : 0; /* [15] || [13] */ + bS[0][14] = (tmp2 & 0x0000FFFF) ? 2 : 0; /* [13] || [12] */ + bS[0][15] = (tmp2 & 0xFFFF0000) ? 2 : 0; /* [15] || [14] */ + + bS[1][8] = (u8)InnerBoundaryStrength(mb, 8, 2); + bS[1][9] = (u8)InnerBoundaryStrength(mb, 9, 3); + bS[1][10] = (u8)InnerBoundaryStrength(mb, 12, 6); + bS[1][11] = (u8)InnerBoundaryStrength(mb, 13, 7); + + { + u32 tmp3, tmp4; + + tmp1 = mb->totalCoeff[4]; + tmp2 = mb->totalCoeff[1]; + tmp3 = mb->totalCoeff[6]; + tmp4 = mb->totalCoeff[3]; + bS[0][8] = tmp1 || tmp2 ? 2 : 0; + tmp1 = mb->totalCoeff[12]; + tmp2 = mb->totalCoeff[9]; + bS[0][9] = tmp3 || tmp4 ? 2 : 0; + tmp3 = mb->totalCoeff[14]; + tmp4 = mb->totalCoeff[11]; + bS[0][10] = tmp1 || tmp2 ? 2 : 0; + bS[0][11] = tmp3 || tmp4 ? 2 : 0; + } + } + /* 8x16 inter mb -> ref addresses and motion vectors can be different + * only for the middle vertical edge, for the other left edges it is + * enough to check whether the blocks contain coefficients or not. The + * same applies to all internal top edges. */ + else if (mb->mbType == P_L0_L0_8x16) + { + tmp1 = *pTmp++; + tmp2 = *pTmp++; + bS[1][4] = (tmp1 & 0x00FF00FF) ? 2 : 0; /* [2] || [0] */ + bS[1][5] = (tmp1 & 0xFF00FF00) ? 2 : 0; /* [3] || [1] */ + bS[0][4] = (tmp1 & 0x0000FFFF) ? 2 : 0; /* [1] || [0] */ + bS[0][5] = (tmp1 & 0xFFFF0000) ? 2 : 0; /* [3] || [2] */ + tmp1 = *pTmp++; + bS[1][6] = (tmp2 & 0x00FF00FF) ? 2 : 0; /* [6] || [4] */ + bS[1][7] = (tmp2 & 0xFF00FF00) ? 2 : 0; /* [7] || [5] */ + bS[0][12] = (tmp2 & 0x0000FFFF) ? 2 : 0; /* [5] || [4] */ + bS[0][13] = (tmp2 & 0xFFFF0000) ? 2 : 0; /* [7] || [6] */ + tmp2 = *pTmp; + bS[1][12] = (tmp1 & 0x00FF00FF) ? 2 : 0; /* [10] || [8] */ + bS[1][13] = (tmp1 & 0xFF00FF00) ? 2 : 0; /* [11] || [9] */ + bS[0][6] = (tmp1 & 0x0000FFFF) ? 2 : 0; /* [9] || [8] */ + bS[0][7] = (tmp1 & 0xFFFF0000) ? 2 : 0; /* [11] || [10] */ + + bS[1][14] = (tmp2 & 0x00FF00FF) ? 2 : 0; /* [14] || [12] */ + bS[1][15] = (tmp2 & 0xFF00FF00) ? 2 : 0; /* [15] || [13] */ + bS[0][14] = (tmp2 & 0x0000FFFF) ? 2 : 0; /* [13] || [12] */ + bS[0][15] = (tmp2 & 0xFFFF0000) ? 2 : 0; /* [15] || [14] */ + + bS[0][8] = (u8)InnerBoundaryStrength(mb, 4, 1); + bS[0][9] = (u8)InnerBoundaryStrength(mb, 6, 3); + bS[0][10] = (u8)InnerBoundaryStrength(mb, 12, 9); + bS[0][11] = (u8)InnerBoundaryStrength(mb, 14, 11); + + { + u32 tmp3, tmp4; + + tmp1 = mb->totalCoeff[8]; + tmp2 = mb->totalCoeff[2]; + tmp3 = mb->totalCoeff[9]; + tmp4 = mb->totalCoeff[3]; + bS[1][8] = tmp1 || tmp2 ? 2 : 0; + tmp1 = mb->totalCoeff[12]; + tmp2 = mb->totalCoeff[6]; + bS[1][9] = tmp3 || tmp4 ? 2 : 0; + tmp3 = mb->totalCoeff[13]; + tmp4 = mb->totalCoeff[7]; + bS[1][10] = tmp1 || tmp2 ? 2 : 0; + bS[1][11] = tmp3 || tmp4 ? 2 : 0; + } + } + else + { + tmp1 = *pTmp++; + bS[1][4] = (tmp1 & 0x00FF00FF) ? 2 : (u8)InnerBoundaryStrength2(mb, 2, 0); + bS[1][5] = (tmp1 & 0xFF00FF00) ? 2 : (u8)InnerBoundaryStrength2(mb, 3, 1); + bS[0][4] = (tmp1 & 0x0000FFFF) ? 2 : (u8)InnerBoundaryStrength2(mb, 1, 0); + bS[0][5] = (tmp1 & 0xFFFF0000) ? 2 : (u8)InnerBoundaryStrength2(mb, 3, 2); + tmp1 = *pTmp++; + bS[1][6] = (tmp1 & 0x00FF00FF) ? 2 : (u8)InnerBoundaryStrength2(mb, 6, 4); + bS[1][7] = (tmp1 & 0xFF00FF00) ? 2 : (u8)InnerBoundaryStrength2(mb, 7, 5); + bS[0][12] = (tmp1 & 0x0000FFFF) ? 2 : (u8)InnerBoundaryStrength2(mb, 5, 4); + bS[0][13] = (tmp1 & 0xFFFF0000) ? 2 : (u8)InnerBoundaryStrength2(mb, 7, 6); + tmp1 = *pTmp++; + bS[1][12] = (tmp1 & 0x00FF00FF) ? 2 : (u8)InnerBoundaryStrength2(mb, 10, 8); + bS[1][13] = (tmp1 & 0xFF00FF00) ? 2 : (u8)InnerBoundaryStrength2(mb, 11, 9); + bS[0][6] = (tmp1 & 0x0000FFFF) ? 2 : (u8)InnerBoundaryStrength2(mb, 9, 8); + bS[0][7] = (tmp1 & 0xFFFF0000) ? 2 : (u8)InnerBoundaryStrength2(mb, 11, 10); + tmp1 = *pTmp; + bS[1][14] = (tmp1 & 0x00FF00FF) ? 2 : (u8)InnerBoundaryStrength2(mb, 14, 12); + bS[1][15] = (tmp1 & 0xFF00FF00) ? 2 : (u8)InnerBoundaryStrength2(mb, 15, 13); + bS[0][14] = (tmp1 & 0x0000FFFF) ? 2 : (u8)InnerBoundaryStrength2(mb, 13, 12); + bS[0][15] = (tmp1 & 0xFFFF0000) ? 2 : (u8)InnerBoundaryStrength2(mb, 15, 14); + + bS[1][8] = (u8)InnerBoundaryStrength(mb, 8, 2); + bS[1][9] = (u8)InnerBoundaryStrength(mb, 9, 3); + bS[1][10] = (u8)InnerBoundaryStrength(mb, 12, 6); + bS[1][11] = (u8)InnerBoundaryStrength(mb, 13, 7); + + bS[0][8] = (u8)InnerBoundaryStrength(mb, 4, 1); + bS[0][9] = (u8)InnerBoundaryStrength(mb, 6, 3); + bS[0][10] = (u8)InnerBoundaryStrength(mb, 12, 9); + bS[0][11] = (u8)InnerBoundaryStrength(mb, 14, 11); + } + pTmp = (u32*)&bS[0][0]; + if (!nonZeroBs && (pTmp[1] || pTmp[2] || pTmp[3] || + pTmp[5] || pTmp[6] || pTmp[7]) ) + { + nonZeroBs = HANTRO_TRUE; + } + } + + return(nonZeroBs); + +} + +/*------------------------------------------------------------------------------ + + Function: GetLumaEdgeThresholds + + Functional description: + Compute alpha, beta and tc0 thresholds for inner, left and top + luma edges of a macroblock. + +------------------------------------------------------------------------------*/ +void GetLumaEdgeThresholds( + mbStorage_t *mb, + u8 (*alpha)[2], + u8 (*beta)[2], + u8 (*threshold)[16], + u8 (*bs)[16], + u32 filteringFlags ) +{ + +/* Variables */ + + u32 indexA, indexB; + u32 qpAv, qp, qpTmp; + u32 i; + +/* Code */ + + ASSERT(threshold); + ASSERT(bs); + ASSERT(beta); + ASSERT(alpha); + ASSERT(mb); + + qp = mb->qpY; + + indexA = (u32)CLIP3(0, 51, (i32)qp + mb->filterOffsetA); + indexB = (u32)CLIP3(0, 51, (i32)qp + mb->filterOffsetB); + + /* Internal edge values */ + alpha[0][1] = alphas[indexA]; + alpha[1][1] = alphas[indexA]; + alpha[1][0] = alphas[indexA]; + alpha[0][0] = alphas[indexA]; + beta[0][1] = betas[indexB]; + beta[1][1] = betas[indexB]; + beta[1][0] = betas[indexB]; + beta[0][0] = betas[indexB]; + + /* vertical scan order */ + for (i = 0; i < 2; i++) + { + u32 t1, t2; + + t1 = bs[i][0]; + t2 = bs[i][1]; + threshold[i][0] = (t1) ? tc0[indexA][t1] : 0; + t1 = bs[i][2]; + threshold[i][1] = (t2) ? tc0[indexA][t2] : 0; + t2 = bs[i][3]; + threshold[i][2] = (t1) ? tc0[indexA][t1] : 0; + t1 = bs[i][4]; + threshold[i][3] = (t2) ? tc0[indexA][t2] : 0; + t2 = bs[i][5]; + threshold[i][4] = (t1) ? tc0[indexA][t1] : 0; + t1 = bs[i][6]; + threshold[i][5] = (t2) ? tc0[indexA][t2] : 0; + t2 = bs[i][7]; + threshold[i][6] = (t1) ? tc0[indexA][t1] : 0; + t1 = bs[i][8]; + threshold[i][7] = (t2) ? tc0[indexA][t2] : 0; + t2 = bs[i][9]; + threshold[i][8] = (t1) ? tc0[indexA][t1] : 0; + t1 = bs[i][10]; + threshold[i][9] = (t2) ? tc0[indexA][t2] : 0; + t2 = bs[i][11]; + threshold[i][10] = (t1) ? tc0[indexA][t1] : 0; + t1 = bs[i][12]; + threshold[i][11] = (t2) ? tc0[indexA][t2] : 0; + t2 = bs[i][13]; + threshold[i][12] = (t1) ? tc0[indexA][t1] : 0; + t1 = bs[i][14]; + threshold[i][13] = (t2) ? tc0[indexA][t2] : 0; + t2 = bs[i][15]; + threshold[i][14] = (t1) ? tc0[indexA][t1] : 0; + threshold[i][15] = (t2) ? tc0[indexA][t2] : 0; + } + + if (filteringFlags & FILTER_TOP_EDGE) + { + qpTmp = mb->mbB->qpY; + if (qpTmp != qp) + { + u32 t1, t2, t3, t4; + qpAv = (qp + qpTmp + 1) >> 1; + + indexA = (u32)CLIP3(0, 51, (i32)qpAv + mb->filterOffsetA); + indexB = (u32)CLIP3(0, 51, (i32)qpAv + mb->filterOffsetB); + + alpha[1][0] = alphas[indexA]; + beta[1][0] = betas[indexB]; + t1 = bs[1][0]; + t2 = bs[1][1]; + t3 = bs[1][2]; + t4 = bs[1][3]; + threshold[1][0] = (t1 && (t1 < 4)) ? tc0[indexA][t1] : 0; + threshold[1][1] = (t2 && (t2 < 4)) ? tc0[indexA][t2] : 0; + threshold[1][2] = (t3 && (t3 < 4)) ? tc0[indexA][t3] : 0; + threshold[1][3] = (t4 && (t4 < 4)) ? tc0[indexA][t4] : 0; + } + } + if (filteringFlags & FILTER_LEFT_EDGE) + { + qpTmp = mb->mbA->qpY; + if (qpTmp != qp) + { + qpAv = (qp + qpTmp + 1) >> 1; + + indexA = (u32)CLIP3(0, 51, (i32)qpAv + mb->filterOffsetA); + indexB = (u32)CLIP3(0, 51, (i32)qpAv + mb->filterOffsetB); + + alpha[0][0] = alphas[indexA]; + beta[0][0] = betas[indexB]; + threshold[0][0] = (bs[0][0] && (bs[0][0] < 4)) ? tc0[indexA][bs[0][0]] : 0; + threshold[0][1] = (bs[0][1] && (bs[0][1] < 4)) ? tc0[indexA][bs[0][1]] : 0; + threshold[0][2] = (bs[0][2] && (bs[0][2] < 4)) ? tc0[indexA][bs[0][2]] : 0; + threshold[0][3] = (bs[0][3] && (bs[0][3] < 4)) ? tc0[indexA][bs[0][3]] : 0; + } + } + +} + +/*------------------------------------------------------------------------------ + + Function: GetChromaEdgeThresholds + + Functional description: + Compute alpha, beta and tc0 thresholds for inner, left and top + chroma edges of a macroblock. + +------------------------------------------------------------------------------*/ +void GetChromaEdgeThresholds( + mbStorage_t *mb, + u8 (*alpha)[2], + u8 (*beta)[2], + u8 (*threshold)[8], + u8 (*bs)[16], + u32 filteringFlags, + i32 chromaQpIndexOffset) +{ + +/* Variables */ + + u32 indexA, indexB; + u32 qpAv, qp, qpTmp; + u32 i; + +/* Code */ + + ASSERT(threshold); + ASSERT(bs); + ASSERT(beta); + ASSERT(alpha); + ASSERT(mb); + ASSERT(mb); + + qp = mb->qpY; + qp = h264bsdQpC[CLIP3(0, 51, (i32)qp + chromaQpIndexOffset)]; + + indexA = (u32)CLIP3(0, 51, (i32)qp + mb->filterOffsetA); + indexB = (u32)CLIP3(0, 51, (i32)qp + mb->filterOffsetB); + + alpha[0][1] = alphas[indexA]; + alpha[1][1] = alphas[indexA]; + alpha[1][0] = alphas[indexA]; + alpha[0][0] = alphas[indexA]; + beta[0][1] = betas[indexB]; + beta[1][1] = betas[indexB]; + beta[1][0] = betas[indexB]; + beta[0][0] = betas[indexB]; + + for (i = 0; i < 2; i++) + { + u32 t1, t2; + + t1 = bs[i][0]; + t2 = bs[i][1]; + threshold[i][0] = (t1) ? tc0[indexA][t1] : 0; + t1 = bs[i][2]; + threshold[i][1] = (t2) ? tc0[indexA][t2] : 0; + t2 = bs[i][3]; + threshold[i][2] = (t1) ? tc0[indexA][t1] : 0; + t1 = bs[i][8]; + threshold[i][3] = (t2) ? tc0[indexA][t2] : 0; + t2 = bs[i][9]; + threshold[i][4] = (t1) ? tc0[indexA][t1] : 0; + t1 = bs[i][10]; + threshold[i][5] = (t2) ? tc0[indexA][t2] : 0; + t2 = bs[i][11]; + threshold[i][6] = (t1) ? tc0[indexA][t1] : 0; + threshold[i][7] = (t2) ? tc0[indexA][t2] : 0; + } + + if (filteringFlags & FILTER_TOP_EDGE) + { + qpTmp = mb->mbB->qpY; + if (qpTmp != mb->qpY) + { + u32 t1, t2, t3, t4; + qpTmp = h264bsdQpC[CLIP3(0, 51, (i32)qpTmp + chromaQpIndexOffset)]; + qpAv = (qp + qpTmp + 1) >> 1; + + indexA = (u32)CLIP3(0, 51, (i32)qpAv + mb->filterOffsetA); + indexB = (u32)CLIP3(0, 51, (i32)qpAv + mb->filterOffsetB); + + alpha[1][0] = alphas[indexA]; + beta[1][0] = betas[indexB]; + + t1 = bs[1][0]; + t2 = bs[1][1]; + t3 = bs[1][2]; + t4 = bs[1][3]; + threshold[1][0] = (t1) ? tc0[indexA][t1] : 0; + threshold[1][1] = (t2) ? tc0[indexA][t2] : 0; + threshold[1][2] = (t3) ? tc0[indexA][t3] : 0; + threshold[1][3] = (t4) ? tc0[indexA][t4] : 0; + } + } + if (filteringFlags & FILTER_LEFT_EDGE) + { + qpTmp = mb->mbA->qpY; + if (qpTmp != mb->qpY) + { + + qpTmp = h264bsdQpC[CLIP3(0, 51, (i32)qpTmp + chromaQpIndexOffset)]; + qpAv = (qp + qpTmp + 1) >> 1; + + indexA = (u32)CLIP3(0, 51, (i32)qpAv + mb->filterOffsetA); + indexB = (u32)CLIP3(0, 51, (i32)qpAv + mb->filterOffsetB); + + alpha[0][0] = alphas[indexA]; + beta[0][0] = betas[indexB]; + threshold[0][0] = (bs[0][0]) ? tc0[indexA][bs[0][0]] : 0; + threshold[0][1] = (bs[0][1]) ? tc0[indexA][bs[0][1]] : 0; + threshold[0][2] = (bs[0][2]) ? tc0[indexA][bs[0][2]] : 0; + threshold[0][3] = (bs[0][3]) ? tc0[indexA][bs[0][3]] : 0; + } + } + +} + +#endif /* H264DEC_OMXDL */ + +/*lint +e701 +e702 */ + diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_deblocking.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_deblocking.h new file mode 100755 index 0000000000000000000000000000000000000000..2571ddaf9c9db3b90fce334221d2fe47cf07b413 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_deblocking.h @@ -0,0 +1,56 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. Module defines + 3. Data types + 4. Function prototypes + +------------------------------------------------------------------------------*/ + +#ifndef H264SWDEC_DEBLOCKING_H +#define H264SWDEC_DEBLOCKING_H + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#include "basetype.h" +#include "h264bsd_image.h" +#include "h264bsd_macroblock_layer.h" + +/*------------------------------------------------------------------------------ + 2. Module defines +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 3. Data types +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 4. Function prototypes +------------------------------------------------------------------------------*/ + +void h264bsdFilterPicture( + image_t *image, + mbStorage_t *mb); + +#endif /* #ifdef H264SWDEC_DEBLOCKING_H */ + diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_decoder.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_decoder.c new file mode 100644 index 0000000000000000000000000000000000000000..a816871b0f23ffbd354fd23b08d0c1addaa6795a --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_decoder.c @@ -0,0 +1,961 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. External compiler flags + 3. Module defines + 4. Local function prototypes + 5. Functions + h264bsdInit + h264bsdDecode + h264bsdShutdown + h264bsdCurrentImage + h264bsdNextOutputPicture + h264bsdPicWidth + h264bsdPicHeight + h264bsdFlushBuffer + h264bsdCheckValidParamSets + h264bsdVideoRange + h264bsdMatrixCoefficients + h264bsdCroppingParams + +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#include "h264bsd_decoder.h" +#include "h264bsd_nal_unit.h" +#include "h264bsd_byte_stream.h" +#include "h264bsd_seq_param_set.h" +#include "h264bsd_pic_param_set.h" +#include "h264bsd_slice_header.h" +#include "h264bsd_slice_data.h" +#include "h264bsd_neighbour.h" +#include "h264bsd_util.h" +#include "h264bsd_dpb.h" +#include "h264bsd_deblocking.h" +#include "h264bsd_conceal.h" + +/*------------------------------------------------------------------------------ + 2. External compiler flags +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- + 3. Module defines +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 4. Local function prototypes +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + + Function name: h264bsdInit + + Functional description: + Initialize the decoder. + + Inputs: + noOutputReordering flag to indicate the decoder that it does not + have to perform reordering of display images. + + Outputs: + pStorage pointer to initialized storage structure + + Returns: + none + +------------------------------------------------------------------------------*/ + +u32 h264bsdInit(storage_t *pStorage, u32 noOutputReordering) +{ + +/* Variables */ + u32 size; +/* Code */ + + ASSERT(pStorage); + + h264bsdInitStorage(pStorage); + + /* allocate mbLayer to be next multiple of 64 to enable use of + * specific NEON optimized "memset" for clearing the structure */ + size = (sizeof(macroblockLayer_t) + 63) & ~0x3F; + + pStorage->mbLayer = (macroblockLayer_t*)H264SwDecMalloc(size); + if (!pStorage->mbLayer) + return HANTRO_NOK; + + if (noOutputReordering) + pStorage->noReordering = HANTRO_TRUE; + + return HANTRO_OK; +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdDecode + + Functional description: + Decode a NAL unit. This function calls other modules to perform + tasks like + * extract and decode NAL unit from the byte stream + * decode parameter sets + * decode slice header and slice data + * conceal errors in the picture + * perform deblocking filtering + + This function contains top level control logic of the decoder. + + Inputs: + pStorage pointer to storage data structure + byteStrm pointer to stream buffer given by application + len length of the buffer in bytes + picId identifier for a picture, assigned by the + application + + Outputs: + readBytes number of bytes read from the stream is stored + here + + Returns: + H264BSD_RDY decoding finished, nothing special + H264BSD_PIC_RDY decoding of a picture finished + H264BSD_HDRS_RDY param sets activated, information like + picture dimensions etc can be read + H264BSD_ERROR error in decoding + H264BSD_PARAM_SET_ERROR serius error in decoding, failed to + activate param sets + +------------------------------------------------------------------------------*/ + +u32 h264bsdDecode(storage_t *pStorage, u8 *byteStrm, u32 len, u32 picId, + u32 *readBytes) +{ + +/* Variables */ + + u32 tmp, ppsId, spsId; + i32 picOrderCnt; + nalUnit_t nalUnit; + seqParamSet_t seqParamSet; + picParamSet_t picParamSet; + strmData_t strm; + u32 accessUnitBoundaryFlag = HANTRO_FALSE; + u32 picReady = HANTRO_FALSE; + +/* Code */ + + ASSERT(pStorage); + ASSERT(byteStrm); + ASSERT(len); + ASSERT(readBytes); + + /* if previous buffer was not finished and same pointer given -> skip NAL + * unit extraction */ + if (pStorage->prevBufNotFinished && byteStrm == pStorage->prevBufPointer) + { + strm = pStorage->strm[0]; + strm.pStrmCurrPos = strm.pStrmBuffStart; + strm.strmBuffReadBits = strm.bitPosInWord = 0; + *readBytes = pStorage->prevBytesConsumed; + } + else + { + tmp = h264bsdExtractNalUnit(byteStrm, len, &strm, readBytes); + if (tmp != HANTRO_OK) + { + EPRINT("BYTE_STREAM"); + return(H264BSD_ERROR); + } + /* store stream */ + pStorage->strm[0] = strm; + pStorage->prevBytesConsumed = *readBytes; + pStorage->prevBufPointer = byteStrm; + } + pStorage->prevBufNotFinished = HANTRO_FALSE; + + tmp = h264bsdDecodeNalUnit(&strm, &nalUnit); + if (tmp != HANTRO_OK) + { + EPRINT("NAL_UNIT"); + return(H264BSD_ERROR); + } + + /* Discard unspecified, reserved, SPS extension and auxiliary picture slices */ + if(nalUnit.nalUnitType == 0 || nalUnit.nalUnitType >= 13) + { + DEBUG(("DISCARDED NAL (UNSPECIFIED, REGISTERED, SPS ext or AUX slice)\n")); + return(H264BSD_RDY); + } + + tmp = h264bsdCheckAccessUnitBoundary( + &strm, + &nalUnit, + pStorage, + &accessUnitBoundaryFlag); + if (tmp != HANTRO_OK) + { + EPRINT("ACCESS UNIT BOUNDARY CHECK"); + if (tmp == PARAM_SET_ERROR) + return(H264BSD_PARAM_SET_ERROR); + else + return(H264BSD_ERROR); + } + + if ( accessUnitBoundaryFlag ) + { + DEBUG(("Access unit boundary\n")); + /* conceal if picture started and param sets activated */ + if (pStorage->picStarted && pStorage->activeSps != NULL) + { + DEBUG(("CONCEALING...")); + + /* return error if second phase of + * initialization is not completed */ + if (pStorage->pendingActivation) + { + EPRINT("Pending activation not completed"); + return (H264BSD_ERROR); + } + + if (!pStorage->validSliceInAccessUnit) + { + pStorage->currImage->data = + h264bsdAllocateDpbImage(pStorage->dpb); + h264bsdInitRefPicList(pStorage->dpb); + tmp = h264bsdConceal(pStorage, pStorage->currImage, P_SLICE); + } + else + tmp = h264bsdConceal(pStorage, pStorage->currImage, + pStorage->sliceHeader->sliceType); + + picReady = HANTRO_TRUE; + + /* current NAL unit should be decoded on next activation -> set + * readBytes to 0 */ + *readBytes = 0; + pStorage->prevBufNotFinished = HANTRO_TRUE; + DEBUG(("...DONE\n")); + } + else + { + pStorage->validSliceInAccessUnit = HANTRO_FALSE; + } + pStorage->skipRedundantSlices = HANTRO_FALSE; + } + + if (!picReady) + { + switch (nalUnit.nalUnitType) + { + case NAL_SEQ_PARAM_SET: + DEBUG(("SEQ PARAM SET\n")); + tmp = h264bsdDecodeSeqParamSet(&strm, &seqParamSet); + if (tmp != HANTRO_OK) + { + EPRINT("SEQ_PARAM_SET"); + FREE(seqParamSet.offsetForRefFrame); + FREE(seqParamSet.vuiParameters); + return(H264BSD_ERROR); + } + tmp = h264bsdStoreSeqParamSet(pStorage, &seqParamSet); + break; + + case NAL_PIC_PARAM_SET: + DEBUG(("PIC PARAM SET\n")); + tmp = h264bsdDecodePicParamSet(&strm, &picParamSet); + if (tmp != HANTRO_OK) + { + EPRINT("PIC_PARAM_SET"); + FREE(picParamSet.runLength); + FREE(picParamSet.topLeft); + FREE(picParamSet.bottomRight); + FREE(picParamSet.sliceGroupId); + return(H264BSD_ERROR); + } + tmp = h264bsdStorePicParamSet(pStorage, &picParamSet); + break; + + case NAL_CODED_SLICE_IDR: + DEBUG(("IDR ")); + /* fall through */ + case NAL_CODED_SLICE: + DEBUG(("SLICE HEADER\n")); + + /* picture successfully finished and still decoding same old + * access unit -> no need to decode redundant slices */ + if (pStorage->skipRedundantSlices) + return(H264BSD_RDY); + + pStorage->picStarted = HANTRO_TRUE; + + if (h264bsdIsStartOfPicture(pStorage)) + { + pStorage->numConcealedMbs = 0; + pStorage->currentPicId = picId; + + tmp = h264bsdCheckPpsId(&strm, &ppsId); + ASSERT(tmp == HANTRO_OK); + /* store old activeSpsId and return headers ready + * indication if activeSps changes */ + spsId = pStorage->activeSpsId; + tmp = h264bsdActivateParamSets(pStorage, ppsId, + IS_IDR_NAL_UNIT(&nalUnit) ? + HANTRO_TRUE : HANTRO_FALSE); + if (tmp != HANTRO_OK) + { + EPRINT("Param set activation"); + pStorage->activePpsId = MAX_NUM_PIC_PARAM_SETS; + pStorage->activePps = NULL; + pStorage->activeSpsId = MAX_NUM_SEQ_PARAM_SETS; + pStorage->activeSps = NULL; + pStorage->pendingActivation = HANTRO_FALSE; + + if(tmp == MEMORY_ALLOCATION_ERROR) + { + return H264BSD_MEMALLOC_ERROR; + } + else + return(H264BSD_PARAM_SET_ERROR); + } + + if (spsId != pStorage->activeSpsId) + { + seqParamSet_t *oldSPS = NULL; + seqParamSet_t *newSPS = pStorage->activeSps; + u32 noOutputOfPriorPicsFlag = 1; + + if(pStorage->oldSpsId < MAX_NUM_SEQ_PARAM_SETS) + { + oldSPS = pStorage->sps[pStorage->oldSpsId]; + } + + *readBytes = 0; + pStorage->prevBufNotFinished = HANTRO_TRUE; + + + if(nalUnit.nalUnitType == NAL_CODED_SLICE_IDR) + { + tmp = + h264bsdCheckPriorPicsFlag(&noOutputOfPriorPicsFlag, + &strm, newSPS, + pStorage->activePps, + nalUnit.nalUnitType); + } + else + { + tmp = HANTRO_NOK; + } + + if((tmp != HANTRO_OK) || + (noOutputOfPriorPicsFlag != 0) || + (pStorage->dpb->noReordering) || + (oldSPS == NULL) || + (oldSPS->picWidthInMbs != newSPS->picWidthInMbs) || + (oldSPS->picHeightInMbs != newSPS->picHeightInMbs) || + (oldSPS->maxDpbSize != newSPS->maxDpbSize)) + { + pStorage->dpb->flushed = 0; + } + else + { + h264bsdFlushDpb(pStorage->dpb); + } + + pStorage->oldSpsId = pStorage->activeSpsId; + + return(H264BSD_HDRS_RDY); + } + } + + /* return error if second phase of + * initialization is not completed */ + if (pStorage->pendingActivation) + { + EPRINT("Pending activation not completed"); + return (H264BSD_ERROR); + } + tmp = h264bsdDecodeSliceHeader(&strm, pStorage->sliceHeader + 1, + pStorage->activeSps, pStorage->activePps, &nalUnit); + if (tmp != HANTRO_OK) + { + EPRINT("SLICE_HEADER"); + return(H264BSD_ERROR); + } + if (h264bsdIsStartOfPicture(pStorage)) + { + if (!IS_IDR_NAL_UNIT(&nalUnit)) + { + tmp = h264bsdCheckGapsInFrameNum(pStorage->dpb, + pStorage->sliceHeader[1].frameNum, + nalUnit.nalRefIdc != 0 ? + HANTRO_TRUE : HANTRO_FALSE, + pStorage->activeSps-> + gapsInFrameNumValueAllowedFlag); + if (tmp != HANTRO_OK) + { + EPRINT("Gaps in frame num"); + return(H264BSD_ERROR); + } + } + pStorage->currImage->data = + h264bsdAllocateDpbImage(pStorage->dpb); + } + + /* store slice header to storage if successfully decoded */ + pStorage->sliceHeader[0] = pStorage->sliceHeader[1]; + pStorage->validSliceInAccessUnit = HANTRO_TRUE; + pStorage->prevNalUnit[0] = nalUnit; + + h264bsdComputeSliceGroupMap(pStorage, + pStorage->sliceHeader->sliceGroupChangeCycle); + + h264bsdInitRefPicList(pStorage->dpb); + tmp = h264bsdReorderRefPicList(pStorage->dpb, + &pStorage->sliceHeader->refPicListReordering, + pStorage->sliceHeader->frameNum, + pStorage->sliceHeader->numRefIdxL0Active); + if (tmp != HANTRO_OK) + { + EPRINT("Reordering"); + return(H264BSD_ERROR); + } + + DEBUG(("SLICE DATA, FIRST %d\n", + pStorage->sliceHeader->firstMbInSlice)); + tmp = h264bsdDecodeSliceData(&strm, pStorage, + pStorage->currImage, pStorage->sliceHeader); + if (tmp != HANTRO_OK) + { + EPRINT("SLICE_DATA"); + h264bsdMarkSliceCorrupted(pStorage, + pStorage->sliceHeader->firstMbInSlice); + return(H264BSD_ERROR); + } + + if (h264bsdIsEndOfPicture(pStorage)) + { + picReady = HANTRO_TRUE; + pStorage->skipRedundantSlices = HANTRO_TRUE; + } + break; + + case NAL_SEI: + DEBUG(("SEI MESSAGE, NOT DECODED")); + break; + + default: + DEBUG(("NOT IMPLEMENTED YET %d\n",nalUnit.nalUnitType)); + } + } + + if (picReady) + { + h264bsdFilterPicture(pStorage->currImage, pStorage->mb); + + h264bsdResetStorage(pStorage); + + picOrderCnt = h264bsdDecodePicOrderCnt(pStorage->poc, + pStorage->activeSps, pStorage->sliceHeader, pStorage->prevNalUnit); + + if (pStorage->validSliceInAccessUnit) + { + if (pStorage->prevNalUnit->nalRefIdc) + { + tmp = h264bsdMarkDecRefPic(pStorage->dpb, + &pStorage->sliceHeader->decRefPicMarking, + pStorage->currImage, pStorage->sliceHeader->frameNum, + picOrderCnt, + IS_IDR_NAL_UNIT(pStorage->prevNalUnit) ? + HANTRO_TRUE : HANTRO_FALSE, + pStorage->currentPicId, pStorage->numConcealedMbs); + } + /* non-reference picture, just store for possible display + * reordering */ + else + { + tmp = h264bsdMarkDecRefPic(pStorage->dpb, NULL, + pStorage->currImage, pStorage->sliceHeader->frameNum, + picOrderCnt, + IS_IDR_NAL_UNIT(pStorage->prevNalUnit) ? + HANTRO_TRUE : HANTRO_FALSE, + pStorage->currentPicId, pStorage->numConcealedMbs); + } + } + + pStorage->picStarted = HANTRO_FALSE; + pStorage->validSliceInAccessUnit = HANTRO_FALSE; + + return(H264BSD_PIC_RDY); + } + else + return(H264BSD_RDY); + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdShutdown + + Functional description: + Shutdown a decoder instance. Function frees all the memories + allocated for the decoder instance. + + Inputs: + pStorage pointer to storage data structure + + Returns: + none + + +------------------------------------------------------------------------------*/ + +void h264bsdShutdown(storage_t *pStorage) +{ + +/* Variables */ + + u32 i; + +/* Code */ + + ASSERT(pStorage); + + for (i = 0; i < MAX_NUM_SEQ_PARAM_SETS; i++) + { + if (pStorage->sps[i]) + { + FREE(pStorage->sps[i]->offsetForRefFrame); + FREE(pStorage->sps[i]->vuiParameters); + FREE(pStorage->sps[i]); + } + } + + for (i = 0; i < MAX_NUM_PIC_PARAM_SETS; i++) + { + if (pStorage->pps[i]) + { + FREE(pStorage->pps[i]->runLength); + FREE(pStorage->pps[i]->topLeft); + FREE(pStorage->pps[i]->bottomRight); + FREE(pStorage->pps[i]->sliceGroupId); + FREE(pStorage->pps[i]); + } + } + + FREE(pStorage->mbLayer); + FREE(pStorage->mb); + FREE(pStorage->sliceGroupMap); + + h264bsdFreeDpb(pStorage->dpb); + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdNextOutputPicture + + Functional description: + Get next output picture in display order. + + Inputs: + pStorage pointer to storage data structure + + Outputs: + picId identifier of the picture will be stored here + isIdrPic IDR flag of the picture will be stored here + numErrMbs number of concealed macroblocks in the picture + will be stored here + + Returns: + pointer to the picture data + NULL if no pictures available for display + +------------------------------------------------------------------------------*/ + +u8* h264bsdNextOutputPicture(storage_t *pStorage, u32 *picId, u32 *isIdrPic, + u32 *numErrMbs) +{ + +/* Variables */ + + dpbOutPicture_t *pOut; + +/* Code */ + + ASSERT(pStorage); + + pOut = h264bsdDpbOutputPicture(pStorage->dpb); + + if (pOut != NULL) + { + *picId = pOut->picId; + *isIdrPic = pOut->isIdr; + *numErrMbs = pOut->numErrMbs; + return (pOut->data); + } + else + return(NULL); + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdPicWidth + + Functional description: + Get width of the picture in macroblocks + + Inputs: + pStorage pointer to storage data structure + + Outputs: + none + + Returns: + picture width + 0 if parameters sets not yet activated + +------------------------------------------------------------------------------*/ + +u32 h264bsdPicWidth(storage_t *pStorage) +{ + +/* Variables */ + +/* Code */ + + ASSERT(pStorage); + + if (pStorage->activeSps) + return(pStorage->activeSps->picWidthInMbs); + else + return(0); + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdPicHeight + + Functional description: + Get height of the picture in macroblocks + + Inputs: + pStorage pointer to storage data structure + + Outputs: + none + + Returns: + picture width + 0 if parameters sets not yet activated + +------------------------------------------------------------------------------*/ + +u32 h264bsdPicHeight(storage_t *pStorage) +{ + +/* Variables */ + +/* Code */ + + ASSERT(pStorage); + + if (pStorage->activeSps) + return(pStorage->activeSps->picHeightInMbs); + else + return(0); + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdFlushBuffer + + Functional description: + Flush the decoded picture buffer, see dpb.c for details + + Inputs: + pStorage pointer to storage data structure + +------------------------------------------------------------------------------*/ + +void h264bsdFlushBuffer(storage_t *pStorage) +{ + +/* Variables */ + +/* Code */ + + ASSERT(pStorage); + + h264bsdFlushDpb(pStorage->dpb); + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdCheckValidParamSets + + Functional description: + Check if any valid parameter set combinations (SPS/PPS) exists. + + Inputs: + pStorage pointer to storage structure + + Returns: + 1 at least one valid SPS/PPS combination found + 0 no valid param set combinations found + + +------------------------------------------------------------------------------*/ + +u32 h264bsdCheckValidParamSets(storage_t *pStorage) +{ + +/* Variables */ + +/* Code */ + + ASSERT(pStorage); + + return(h264bsdValidParamSets(pStorage) == HANTRO_OK ? 1 : 0); + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdVideoRange + + Functional description: + Get value of video_full_range_flag received in the VUI data. + + Inputs: + pStorage pointer to storage structure + + Returns: + 1 video_full_range_flag received and value is 1 + 0 otherwise + +------------------------------------------------------------------------------*/ + +u32 h264bsdVideoRange(storage_t *pStorage) +{ + +/* Variables */ + +/* Code */ + + ASSERT(pStorage); + + if (pStorage->activeSps && pStorage->activeSps->vuiParametersPresentFlag && + pStorage->activeSps->vuiParameters && + pStorage->activeSps->vuiParameters->videoSignalTypePresentFlag && + pStorage->activeSps->vuiParameters->videoFullRangeFlag) + return(1); + else /* default value of video_full_range_flag is 0 */ + return(0); + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdMatrixCoefficients + + Functional description: + Get value of matrix_coefficients received in the VUI data + + Inputs: + pStorage pointer to storage structure + + Outputs: + value of matrix_coefficients if received + 2 otherwise (this is the default value) + +------------------------------------------------------------------------------*/ + +u32 h264bsdMatrixCoefficients(storage_t *pStorage) +{ + +/* Variables */ + +/* Code */ + + ASSERT(pStorage); + + if (pStorage->activeSps && pStorage->activeSps->vuiParametersPresentFlag && + pStorage->activeSps->vuiParameters && + pStorage->activeSps->vuiParameters->videoSignalTypePresentFlag && + pStorage->activeSps->vuiParameters->colourDescriptionPresentFlag) + return(pStorage->activeSps->vuiParameters->matrixCoefficients); + else /* default unspecified */ + return(2); + +} + +/*------------------------------------------------------------------------------ + + Function: hh264bsdCroppingParams + + Functional description: + Get cropping parameters of the active SPS + + Inputs: + pStorage pointer to storage structure + + Outputs: + croppingFlag flag indicating if cropping params present is + stored here + leftOffset cropping left offset in pixels is stored here + width width of the image after cropping is stored here + topOffset cropping top offset in pixels is stored here + height height of the image after cropping is stored here + + Returns: + none + +------------------------------------------------------------------------------*/ + +void h264bsdCroppingParams(storage_t *pStorage, u32 *croppingFlag, + u32 *leftOffset, u32 *width, u32 *topOffset, u32 *height) +{ + +/* Variables */ + +/* Code */ + + ASSERT(pStorage); + + if (pStorage->activeSps && pStorage->activeSps->frameCroppingFlag) + { + *croppingFlag = 1; + *leftOffset = 2 * pStorage->activeSps->frameCropLeftOffset; + *width = 16 * pStorage->activeSps->picWidthInMbs - + 2 * (pStorage->activeSps->frameCropLeftOffset + + pStorage->activeSps->frameCropRightOffset); + *topOffset = 2 * pStorage->activeSps->frameCropTopOffset; + *height = 16 * pStorage->activeSps->picHeightInMbs - + 2 * (pStorage->activeSps->frameCropTopOffset + + pStorage->activeSps->frameCropBottomOffset); + } + else + { + *croppingFlag = 0; + *leftOffset = 0; + *width = 0; + *topOffset = 0; + *height = 0; + } + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdSampleAspectRatio + + Functional description: + Get aspect ratio received in the VUI data + + Inputs: + pStorage pointer to storage structure + + Outputs: + sarWidth sample aspect ratio height + sarHeight sample aspect ratio width + +------------------------------------------------------------------------------*/ + +void h264bsdSampleAspectRatio(storage_t *pStorage, u32 *sarWidth, u32 *sarHeight) +{ + +/* Variables */ + u32 w = 1; + u32 h = 1; +/* Code */ + + ASSERT(pStorage); + + + if (pStorage->activeSps && + pStorage->activeSps->vuiParametersPresentFlag && + pStorage->activeSps->vuiParameters && + pStorage->activeSps->vuiParameters->aspectRatioPresentFlag ) + { + switch (pStorage->activeSps->vuiParameters->aspectRatioIdc) + { + case ASPECT_RATIO_UNSPECIFIED: w = 0; h = 0; break; + case ASPECT_RATIO_1_1: w = 1; h = 1; break; + case ASPECT_RATIO_12_11: w = 12; h = 11; break; + case ASPECT_RATIO_10_11: w = 10; h = 11; break; + case ASPECT_RATIO_16_11: w = 16; h = 11; break; + case ASPECT_RATIO_40_33: w = 40; h = 33; break; + case ASPECT_RATIO_24_11: w = 24; h = 11; break; + case ASPECT_RATIO_20_11: w = 20; h = 11; break; + case ASPECT_RATIO_32_11: w = 32; h = 11; break; + case ASPECT_RATIO_80_33: w = 80; h = 33; break; + case ASPECT_RATIO_18_11: w = 18; h = 11; break; + case ASPECT_RATIO_15_11: w = 15; h = 11; break; + case ASPECT_RATIO_64_33: w = 64; h = 33; break; + case ASPECT_RATIO_160_99: w = 160; h = 99; break; + case ASPECT_RATIO_EXTENDED_SAR: + w = pStorage->activeSps->vuiParameters->sarWidth; + h = pStorage->activeSps->vuiParameters->sarHeight; + if ((w == 0) || (h == 0)) + w = h = 0; + break; + default: + w = 0; + h = 0; + break; + } + } + + /* set aspect ratio*/ + *sarWidth = w; + *sarHeight = h; + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdProfile + + Functional description: + Get profile information from active SPS + + Inputs: + pStorage pointer to storage structure + + Outputs: + profile current profile + +------------------------------------------------------------------------------*/ +u32 h264bsdProfile(storage_t *pStorage) +{ + if (pStorage->activeSps) + return pStorage->activeSps->profileIdc; + else + return 0; +} + diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_decoder.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_decoder.h new file mode 100644 index 0000000000000000000000000000000000000000..83365231c00c05ab735464479a09f4e38d573fd4 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_decoder.h @@ -0,0 +1,83 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. Module defines + 3. Data types + 4. Function prototypes + +------------------------------------------------------------------------------*/ + +#ifndef H264SWDEC_DECODER_H +#define H264SWDEC_DECODER_H + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#include "basetype.h" +#include "h264bsd_storage.h" + +/*------------------------------------------------------------------------------ + 2. Module defines +------------------------------------------------------------------------------*/ + +/* enumerated return values of the functions */ +enum { + H264BSD_RDY, + H264BSD_PIC_RDY, + H264BSD_HDRS_RDY, + H264BSD_ERROR, + H264BSD_PARAM_SET_ERROR, + H264BSD_MEMALLOC_ERROR +}; + +/*------------------------------------------------------------------------------ + 3. Data types +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 4. Function prototypes +------------------------------------------------------------------------------*/ + +u32 h264bsdInit(storage_t *pStorage, u32 noOutputReordering); +u32 h264bsdDecode(storage_t *pStorage, u8 *byteStrm, u32 len, u32 picId, + u32 *readBytes); +void h264bsdShutdown(storage_t *pStorage); + +u8* h264bsdNextOutputPicture(storage_t *pStorage, u32 *picId, u32 *isIdrPic, + u32 *numErrMbs); + +u32 h264bsdPicWidth(storage_t *pStorage); +u32 h264bsdPicHeight(storage_t *pStorage); +u32 h264bsdVideoRange(storage_t *pStorage); +u32 h264bsdMatrixCoefficients(storage_t *pStorage); +void h264bsdCroppingParams(storage_t *pStorage, u32 *croppingFlag, + u32 *left, u32 *width, u32 *top, u32 *height); +void h264bsdSampleAspectRatio(storage_t *pStorage, + u32 *sarWidth, u32 *sarHeight); +u32 h264bsdCheckValidParamSets(storage_t *pStorage); + +void h264bsdFlushBuffer(storage_t *pStorage); + +u32 h264bsdProfile(storage_t *pStorage); + +#endif /* #ifdef H264SWDEC_DECODER_H */ + diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_dpb.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_dpb.c new file mode 100755 index 0000000000000000000000000000000000000000..9517d0ae3eb075257a1182d63aedd44cce3428d6 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_dpb.c @@ -0,0 +1,1584 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. External compiler flags + 3. Module defines + 4. Local function prototypes + 5. Functions + ComparePictures + h264bsdReorderRefPicList + Mmcop1 + Mmcop2 + Mmcop3 + Mmcop4 + Mmcop5 + Mmcop6 + h264bsdMarkDecRefPic + h264bsdGetRefPicData + h264bsdAllocateDpbImage + SlidingWindowRefPicMarking + h264bsdInitDpb + h264bsdResetDpb + h264bsdInitRefPicList + FindDpbPic + SetPicNums + h264bsdCheckGapsInFrameNum + FindSmallestPicOrderCnt + OutputPicture + h264bsdDpbOutputPicture + h264bsdFlushDpb + h264bsdFreeDpb + +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#include "h264bsd_cfg.h" +#include "h264bsd_dpb.h" +#include "h264bsd_slice_header.h" +#include "h264bsd_image.h" +#include "h264bsd_util.h" +#include "basetype.h" + +/*------------------------------------------------------------------------------ + 2. External compiler flags +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- + 3. Module defines +------------------------------------------------------------------------------*/ + +/* macros to determine picture status. Note that IS_SHORT_TERM macro returns + * true also for non-existing pictures because non-existing pictures are + * regarded short term pictures according to H.264 standard */ +#define IS_REFERENCE(a) ((a).status) +#define IS_EXISTING(a) ((a).status > NON_EXISTING) +#define IS_SHORT_TERM(a) \ + ((a).status == NON_EXISTING || (a).status == SHORT_TERM) +#define IS_LONG_TERM(a) ((a).status == LONG_TERM) + +/* macro to set a picture unused for reference */ +#define SET_UNUSED(a) (a).status = UNUSED; + +#define MAX_NUM_REF_IDX_L0_ACTIVE 16 + +/*------------------------------------------------------------------------------ + 4. Local function prototypes +------------------------------------------------------------------------------*/ + +static i32 ComparePictures(const void *ptr1, const void *ptr2); + +static u32 Mmcop1(dpbStorage_t *dpb, u32 currPicNum, u32 differenceOfPicNums); + +static u32 Mmcop2(dpbStorage_t *dpb, u32 longTermPicNum); + +static u32 Mmcop3(dpbStorage_t *dpb, u32 currPicNum, u32 differenceOfPicNums, + u32 longTermFrameIdx); + +static u32 Mmcop4(dpbStorage_t *dpb, u32 maxLongTermFrameIdx); + +static u32 Mmcop5(dpbStorage_t *dpb); + +static u32 Mmcop6(dpbStorage_t *dpb, u32 frameNum, i32 picOrderCnt, + u32 longTermFrameIdx); + +static u32 SlidingWindowRefPicMarking(dpbStorage_t *dpb); + +static i32 FindDpbPic(dpbStorage_t *dpb, i32 picNum, u32 isShortTerm); + +static void SetPicNums(dpbStorage_t *dpb, u32 currFrameNum); + +static dpbPicture_t* FindSmallestPicOrderCnt(dpbStorage_t *dpb); + +static u32 OutputPicture(dpbStorage_t *dpb); + +static void ShellSort(dpbPicture_t *pPic, u32 num); + +/*------------------------------------------------------------------------------ + + Function: ComparePictures + + Functional description: + Function to compare dpb pictures, used by the ShellSort() function. + Order of the pictures after sorting shall be as follows: + 1) short term reference pictures starting with the largest + picNum + 2) long term reference pictures starting with the smallest + longTermPicNum + 3) pictures unused for reference but needed for display + 4) other pictures + + Returns: + -1 pic 1 is greater than pic 2 + 0 equal from comparison point of view + 1 pic 2 is greater then pic 1 + +------------------------------------------------------------------------------*/ + +static i32 ComparePictures(const void *ptr1, const void *ptr2) +{ + +/* Variables */ + + dpbPicture_t *pic1, *pic2; + +/* Code */ + + ASSERT(ptr1); + ASSERT(ptr2); + + pic1 = (dpbPicture_t*)ptr1; + pic2 = (dpbPicture_t*)ptr2; + + /* both are non-reference pictures, check if needed for display */ + if (!IS_REFERENCE(*pic1) && !IS_REFERENCE(*pic2)) + { + if (pic1->toBeDisplayed && !pic2->toBeDisplayed) + return(-1); + else if (!pic1->toBeDisplayed && pic2->toBeDisplayed) + return(1); + else + return(0); + } + /* only pic 1 needed for reference -> greater */ + else if (!IS_REFERENCE(*pic2)) + return(-1); + /* only pic 2 needed for reference -> greater */ + else if (!IS_REFERENCE(*pic1)) + return(1); + /* both are short term reference pictures -> check picNum */ + else if (IS_SHORT_TERM(*pic1) && IS_SHORT_TERM(*pic2)) + { + if (pic1->picNum > pic2->picNum) + return(-1); + else if (pic1->picNum < pic2->picNum) + return(1); + else + return(0); + } + /* only pic 1 is short term -> greater */ + else if (IS_SHORT_TERM(*pic1)) + return(-1); + /* only pic 2 is short term -> greater */ + else if (IS_SHORT_TERM(*pic2)) + return(1); + /* both are long term reference pictures -> check picNum (contains the + * longTermPicNum */ + else + { + if (pic1->picNum > pic2->picNum) + return(1); + else if (pic1->picNum < pic2->picNum) + return(-1); + else + return(0); + } +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdReorderRefPicList + + Functional description: + Function to perform reference picture list reordering based on + reordering commands received in the slice header. See details + of the process in the H.264 standard. + + Inputs: + dpb pointer to dpb storage structure + order pointer to reordering commands + currFrameNum current frame number + numRefIdxActive number of active reference indices for current + picture + + Outputs: + dpb 'list' field of the structure reordered + + Returns: + HANTRO_OK success + HANTRO_NOK if non-existing pictures referred to in the + reordering commands + +------------------------------------------------------------------------------*/ + +u32 h264bsdReorderRefPicList( + dpbStorage_t *dpb, + refPicListReordering_t *order, + u32 currFrameNum, + u32 numRefIdxActive) +{ + +/* Variables */ + + u32 i, j, k, picNumPred, refIdx; + i32 picNum, picNumNoWrap, index; + u32 isShortTerm; + +/* Code */ + + ASSERT(order); + ASSERT(currFrameNum <= dpb->maxFrameNum); + ASSERT(numRefIdxActive <= MAX_NUM_REF_IDX_L0_ACTIVE); + + /* set dpb picture numbers for sorting */ + SetPicNums(dpb, currFrameNum); + + if (!order->refPicListReorderingFlagL0) + return(HANTRO_OK); + + refIdx = 0; + picNumPred = currFrameNum; + + i = 0; + while (order->command[i].reorderingOfPicNumsIdc < 3) + { + /* short term */ + if (order->command[i].reorderingOfPicNumsIdc < 2) + { + if (order->command[i].reorderingOfPicNumsIdc == 0) + { + picNumNoWrap = + (i32)picNumPred - (i32)order->command[i].absDiffPicNum; + if (picNumNoWrap < 0) + picNumNoWrap += (i32)dpb->maxFrameNum; + } + else + { + picNumNoWrap = + (i32)(picNumPred + order->command[i].absDiffPicNum); + if (picNumNoWrap >= (i32)dpb->maxFrameNum) + picNumNoWrap -= (i32)dpb->maxFrameNum; + } + picNumPred = (u32)picNumNoWrap; + picNum = picNumNoWrap; + if ((u32)picNumNoWrap > currFrameNum) + picNum -= (i32)dpb->maxFrameNum; + isShortTerm = HANTRO_TRUE; + } + /* long term */ + else + { + picNum = (i32)order->command[i].longTermPicNum; + isShortTerm = HANTRO_FALSE; + + } + /* find corresponding picture from dpb */ + index = FindDpbPic(dpb, picNum, isShortTerm); + if (index < 0 || !IS_EXISTING(dpb->buffer[index])) + return(HANTRO_NOK); + + /* shift pictures */ + for (j = numRefIdxActive; j > refIdx; j--) + dpb->list[j] = dpb->list[j-1]; + /* put picture into the list */ + dpb->list[refIdx++] = &dpb->buffer[index]; + /* remove later references to the same picture */ + for (j = k = refIdx; j <= numRefIdxActive; j++) + if(dpb->list[j] != &dpb->buffer[index]) + dpb->list[k++] = dpb->list[j]; + + i++; + } + + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + Function: Mmcop1 + + Functional description: + Function to mark a short-term reference picture unused for + reference, memory_management_control_operation equal to 1 + + Returns: + HANTRO_OK success + HANTRO_NOK failure, picture does not exist in the buffer + +------------------------------------------------------------------------------*/ + +static u32 Mmcop1(dpbStorage_t *dpb, u32 currPicNum, u32 differenceOfPicNums) +{ + +/* Variables */ + + i32 index, picNum; + +/* Code */ + + ASSERT(currPicNum < dpb->maxFrameNum); + + picNum = (i32)currPicNum - (i32)differenceOfPicNums; + + index = FindDpbPic(dpb, picNum, HANTRO_TRUE); + if (index < 0) + return(HANTRO_NOK); + + SET_UNUSED(dpb->buffer[index]); + dpb->numRefFrames--; + if (!dpb->buffer[index].toBeDisplayed) + dpb->fullness--; + + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + Function: Mmcop2 + + Functional description: + Function to mark a long-term reference picture unused for + reference, memory_management_control_operation equal to 2 + + Returns: + HANTRO_OK success + HANTRO_NOK failure, picture does not exist in the buffer + +------------------------------------------------------------------------------*/ + +static u32 Mmcop2(dpbStorage_t *dpb, u32 longTermPicNum) +{ + +/* Variables */ + + i32 index; + +/* Code */ + + index = FindDpbPic(dpb, (i32)longTermPicNum, HANTRO_FALSE); + if (index < 0) + return(HANTRO_NOK); + + SET_UNUSED(dpb->buffer[index]); + dpb->numRefFrames--; + if (!dpb->buffer[index].toBeDisplayed) + dpb->fullness--; + + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + Function: Mmcop3 + + Functional description: + Function to assing a longTermFrameIdx to a short-term reference + frame (i.e. to change it to a long-term reference picture), + memory_management_control_operation equal to 3 + + Returns: + HANTRO_OK success + HANTRO_NOK failure, short-term picture does not exist in the + buffer or is a non-existing picture, or invalid + longTermFrameIdx given + +------------------------------------------------------------------------------*/ + +static u32 Mmcop3(dpbStorage_t *dpb, u32 currPicNum, u32 differenceOfPicNums, + u32 longTermFrameIdx) +{ + +/* Variables */ + + i32 index, picNum; + u32 i; + +/* Code */ + + ASSERT(dpb); + ASSERT(currPicNum < dpb->maxFrameNum); + + if ( (dpb->maxLongTermFrameIdx == NO_LONG_TERM_FRAME_INDICES) || + (longTermFrameIdx > dpb->maxLongTermFrameIdx) ) + return(HANTRO_NOK); + + /* check if a long term picture with the same longTermFrameIdx already + * exist and remove it if necessary */ + for (i = 0; i < dpb->maxRefFrames; i++) + if (IS_LONG_TERM(dpb->buffer[i]) && + (u32)dpb->buffer[i].picNum == longTermFrameIdx) + { + SET_UNUSED(dpb->buffer[i]); + dpb->numRefFrames--; + if (!dpb->buffer[i].toBeDisplayed) + dpb->fullness--; + break; + } + + picNum = (i32)currPicNum - (i32)differenceOfPicNums; + + index = FindDpbPic(dpb, picNum, HANTRO_TRUE); + if (index < 0) + return(HANTRO_NOK); + if (!IS_EXISTING(dpb->buffer[index])) + return(HANTRO_NOK); + + dpb->buffer[index].status = LONG_TERM; + dpb->buffer[index].picNum = (i32)longTermFrameIdx; + + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + Function: Mmcop4 + + Functional description: + Function to set maxLongTermFrameIdx, + memory_management_control_operation equal to 4 + + Returns: + HANTRO_OK success + +------------------------------------------------------------------------------*/ + +static u32 Mmcop4(dpbStorage_t *dpb, u32 maxLongTermFrameIdx) +{ + +/* Variables */ + + u32 i; + +/* Code */ + + dpb->maxLongTermFrameIdx = maxLongTermFrameIdx; + + for (i = 0; i < dpb->maxRefFrames; i++) + if (IS_LONG_TERM(dpb->buffer[i]) && + ( ((u32)dpb->buffer[i].picNum > maxLongTermFrameIdx) || + (dpb->maxLongTermFrameIdx == NO_LONG_TERM_FRAME_INDICES) ) ) + { + SET_UNUSED(dpb->buffer[i]); + dpb->numRefFrames--; + if (!dpb->buffer[i].toBeDisplayed) + dpb->fullness--; + } + + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + Function: Mmcop5 + + Functional description: + Function to mark all reference pictures unused for reference and + set maxLongTermFrameIdx to NO_LONG_TERM_FRAME_INDICES, + memory_management_control_operation equal to 5. Function flushes + the buffer and places all pictures that are needed for display into + the output buffer. + + Returns: + HANTRO_OK success + +------------------------------------------------------------------------------*/ + +static u32 Mmcop5(dpbStorage_t *dpb) +{ + +/* Variables */ + + u32 i; + +/* Code */ + + for (i = 0; i < 16; i++) + { + if (IS_REFERENCE(dpb->buffer[i])) + { + SET_UNUSED(dpb->buffer[i]); + if (!dpb->buffer[i].toBeDisplayed) + dpb->fullness--; + } + } + + /* output all pictures */ + while (OutputPicture(dpb) == HANTRO_OK) + ; + dpb->numRefFrames = 0; + dpb->maxLongTermFrameIdx = NO_LONG_TERM_FRAME_INDICES; + dpb->prevRefFrameNum = 0; + + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + Function: Mmcop6 + + Functional description: + Function to assign longTermFrameIdx to the current picture, + memory_management_control_operation equal to 6 + + Returns: + HANTRO_OK success + HANTRO_NOK invalid longTermFrameIdx or no room for current + picture in the buffer + +------------------------------------------------------------------------------*/ + +static u32 Mmcop6(dpbStorage_t *dpb, u32 frameNum, i32 picOrderCnt, + u32 longTermFrameIdx) +{ + +/* Variables */ + + u32 i; + +/* Code */ + + ASSERT(frameNum < dpb->maxFrameNum); + + if ( (dpb->maxLongTermFrameIdx == NO_LONG_TERM_FRAME_INDICES) || + (longTermFrameIdx > dpb->maxLongTermFrameIdx) ) + return(HANTRO_NOK); + + /* check if a long term picture with the same longTermFrameIdx already + * exist and remove it if necessary */ + for (i = 0; i < dpb->maxRefFrames; i++) + if (IS_LONG_TERM(dpb->buffer[i]) && + (u32)dpb->buffer[i].picNum == longTermFrameIdx) + { + SET_UNUSED(dpb->buffer[i]); + dpb->numRefFrames--; + if (!dpb->buffer[i].toBeDisplayed) + dpb->fullness--; + break; + } + + if (dpb->numRefFrames < dpb->maxRefFrames) + { + dpb->currentOut->frameNum = frameNum; + dpb->currentOut->picNum = (i32)longTermFrameIdx; + dpb->currentOut->picOrderCnt = picOrderCnt; + dpb->currentOut->status = LONG_TERM; + if (dpb->noReordering) + dpb->currentOut->toBeDisplayed = HANTRO_FALSE; + else + dpb->currentOut->toBeDisplayed = HANTRO_TRUE; + dpb->numRefFrames++; + dpb->fullness++; + return(HANTRO_OK); + } + /* if there is no room, return an error */ + else + return(HANTRO_NOK); + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdMarkDecRefPic + + Functional description: + Function to perform reference picture marking process. This + function should be called both for reference and non-reference + pictures. Non-reference pictures shall have mark pointer set to + NULL. + + Inputs: + dpb pointer to the DPB data structure + mark pointer to reference picture marking commands + image pointer to current picture to be placed in the buffer + frameNum frame number of the current picture + picOrderCnt picture order count for the current picture + isIdr flag to indicate if the current picture is an + IDR picture + currentPicId identifier for the current picture, from the + application, stored along with the picture + numErrMbs number of concealed macroblocks in the current + picture, stored along with the picture + + Outputs: + dpb 'buffer' modified, possible output frames placed into + 'outBuf' + + Returns: + HANTRO_OK success + HANTRO_NOK failure + +------------------------------------------------------------------------------*/ + +u32 h264bsdMarkDecRefPic( + dpbStorage_t *dpb, + decRefPicMarking_t *mark, + image_t *image, + u32 frameNum, + i32 picOrderCnt, + u32 isIdr, + u32 currentPicId, + u32 numErrMbs) +{ + +/* Variables */ + + u32 i, status; + u32 markedAsLongTerm; + u32 toBeDisplayed; + +/* Code */ + + ASSERT(dpb); + ASSERT(mark || !isIdr); + ASSERT(!isIdr || (frameNum == 0 && picOrderCnt == 0)); + ASSERT(frameNum < dpb->maxFrameNum); + + if (image->data != dpb->currentOut->data) + { + EPRINT("TRYING TO MARK NON-ALLOCATED IMAGE"); + return(HANTRO_NOK); + } + + dpb->lastContainsMmco5 = HANTRO_FALSE; + status = HANTRO_OK; + + toBeDisplayed = dpb->noReordering ? HANTRO_FALSE : HANTRO_TRUE; + + /* non-reference picture, stored for display reordering purposes */ + if (mark == NULL) + { + dpb->currentOut->status = UNUSED; + dpb->currentOut->frameNum = frameNum; + dpb->currentOut->picNum = (i32)frameNum; + dpb->currentOut->picOrderCnt = picOrderCnt; + dpb->currentOut->toBeDisplayed = toBeDisplayed; + if (!dpb->noReordering) + dpb->fullness++; + } + /* IDR picture */ + else if (isIdr) + { + + /* h264bsdCheckGapsInFrameNum not called for IDR pictures -> have to + * reset numOut and outIndex here */ + dpb->numOut = dpb->outIndex = 0; + + /* flush the buffer */ + Mmcop5(dpb); + /* if noOutputOfPriorPicsFlag was set -> the pictures preceding the + * IDR picture shall not be output -> set output buffer empty */ + if (mark->noOutputOfPriorPicsFlag || dpb->noReordering) + { + dpb->numOut = 0; + dpb->outIndex = 0; + } + + if (mark->longTermReferenceFlag) + { + dpb->currentOut->status = LONG_TERM; + dpb->maxLongTermFrameIdx = 0; + } + else + { + dpb->currentOut->status = SHORT_TERM; + dpb->maxLongTermFrameIdx = NO_LONG_TERM_FRAME_INDICES; + } + dpb->currentOut->frameNum = 0; + dpb->currentOut->picNum = 0; + dpb->currentOut->picOrderCnt = 0; + dpb->currentOut->toBeDisplayed = toBeDisplayed; + dpb->fullness = 1; + dpb->numRefFrames = 1; + } + /* reference picture */ + else + { + markedAsLongTerm = HANTRO_FALSE; + if (mark->adaptiveRefPicMarkingModeFlag) + { + i = 0; + while (mark->operation[i].memoryManagementControlOperation) + { + switch (mark->operation[i].memoryManagementControlOperation) + { + case 1: + status = Mmcop1( + dpb, + frameNum, + mark->operation[i].differenceOfPicNums); + break; + + case 2: + status = Mmcop2(dpb, mark->operation[i].longTermPicNum); + break; + + case 3: + status = Mmcop3( + dpb, + frameNum, + mark->operation[i].differenceOfPicNums, + mark->operation[i].longTermFrameIdx); + break; + + case 4: + status = Mmcop4( + dpb, + mark->operation[i].maxLongTermFrameIdx); + break; + + case 5: + status = Mmcop5(dpb); + dpb->lastContainsMmco5 = HANTRO_TRUE; + frameNum = 0; + break; + + case 6: + status = Mmcop6( + dpb, + frameNum, + picOrderCnt, + mark->operation[i].longTermFrameIdx); + if (status == HANTRO_OK) + markedAsLongTerm = HANTRO_TRUE; + break; + + default: /* invalid memory management control operation */ + status = HANTRO_NOK; + break; + } + if (status != HANTRO_OK) + { + break; + } + i++; + } + } + else + { + status = SlidingWindowRefPicMarking(dpb); + } + /* if current picture was not marked as long-term reference by + * memory management control operation 6 -> mark current as short + * term and insert it into dpb (if there is room) */ + if (!markedAsLongTerm) + { + if (dpb->numRefFrames < dpb->maxRefFrames) + { + dpb->currentOut->frameNum = frameNum; + dpb->currentOut->picNum = (i32)frameNum; + dpb->currentOut->picOrderCnt = picOrderCnt; + dpb->currentOut->status = SHORT_TERM; + dpb->currentOut->toBeDisplayed = toBeDisplayed; + dpb->fullness++; + dpb->numRefFrames++; + } + /* no room */ + else + { + status = HANTRO_NOK; + } + } + } + + dpb->currentOut->isIdr = isIdr; + dpb->currentOut->picId = currentPicId; + dpb->currentOut->numErrMbs = numErrMbs; + + /* dpb was initialized to not to reorder the pictures -> output current + * picture immediately */ + if (dpb->noReordering) + { + ASSERT(dpb->numOut == 0); + ASSERT(dpb->outIndex == 0); + dpb->outBuf[dpb->numOut].data = dpb->currentOut->data; + dpb->outBuf[dpb->numOut].isIdr = dpb->currentOut->isIdr; + dpb->outBuf[dpb->numOut].picId = dpb->currentOut->picId; + dpb->outBuf[dpb->numOut].numErrMbs = dpb->currentOut->numErrMbs; + dpb->numOut++; + } + else + { + /* output pictures if buffer full */ + while (dpb->fullness > dpb->dpbSize) + { + i = OutputPicture(dpb); + ASSERT(i == HANTRO_OK); + } + } + + /* sort dpb */ + ShellSort(dpb->buffer, dpb->dpbSize+1); + + return(status); + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdGetRefPicData + + Functional description: + Function to get reference picture data from the reference picture + list + + Returns: + pointer to desired reference picture data + NULL if invalid index or non-existing picture referred + +------------------------------------------------------------------------------*/ + +u8* h264bsdGetRefPicData(dpbStorage_t *dpb, u32 index) +{ + +/* Variables */ + +/* Code */ + + if(index > 16 || dpb->list[index] == NULL) + return(NULL); + else if(!IS_EXISTING(*dpb->list[index])) + return(NULL); + else + return(dpb->list[index]->data); + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdAllocateDpbImage + + Functional description: + function to allocate memory for a image. This function does not + really allocate any memory but reserves one of the buffer + positions for decoding of current picture + + Returns: + pointer to memory area for the image + + +------------------------------------------------------------------------------*/ + +u8* h264bsdAllocateDpbImage(dpbStorage_t *dpb) +{ + +/* Variables */ + +/* Code */ + + ASSERT( !dpb->buffer[dpb->dpbSize].toBeDisplayed && + !IS_REFERENCE(dpb->buffer[dpb->dpbSize]) ); + ASSERT(dpb->fullness <= dpb->dpbSize); + + dpb->currentOut = dpb->buffer + dpb->dpbSize; + + return(dpb->currentOut->data); + +} + +/*------------------------------------------------------------------------------ + + Function: SlidingWindowRefPicMarking + + Functional description: + Function to perform sliding window refence picture marking process. + + Outputs: + HANTRO_OK success + HANTRO_NOK failure, no short-term reference frame found that + could be marked unused + + +------------------------------------------------------------------------------*/ + +static u32 SlidingWindowRefPicMarking(dpbStorage_t *dpb) +{ + +/* Variables */ + + i32 index, picNum; + u32 i; + +/* Code */ + + if (dpb->numRefFrames < dpb->maxRefFrames) + { + return(HANTRO_OK); + } + else + { + index = -1; + picNum = 0; + /* find the oldest short term picture */ + for (i = 0; i < dpb->numRefFrames; i++) + if (IS_SHORT_TERM(dpb->buffer[i])) + if (dpb->buffer[i].picNum < picNum || index == -1) + { + index = (i32)i; + picNum = dpb->buffer[i].picNum; + } + if (index >= 0) + { + SET_UNUSED(dpb->buffer[index]); + dpb->numRefFrames--; + if (!dpb->buffer[index].toBeDisplayed) + dpb->fullness--; + + return(HANTRO_OK); + } + } + + return(HANTRO_NOK); + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdInitDpb + + Functional description: + Function to initialize DPB. Reserves memories for the buffer, + reference picture list and output buffer. dpbSize indicates + the maximum DPB size indicated by the levelIdc in the stream. + If noReordering flag is FALSE the DPB stores dpbSize pictures + for display reordering purposes. On the other hand, if the + flag is TRUE the DPB only stores maxRefFrames reference pictures + and outputs all the pictures immediately. + + Inputs: + picSizeInMbs picture size in macroblocks + dpbSize size of the DPB (number of pictures) + maxRefFrames max number of reference frames + maxFrameNum max frame number + noReordering flag to indicate that DPB does not have to + prepare to reorder frames for display + + Outputs: + dpb pointer to dpb data storage + + Returns: + HANTRO_OK success + MEMORY_ALLOCATION_ERROR if memory allocation failed + +------------------------------------------------------------------------------*/ + +u32 h264bsdInitDpb( + dpbStorage_t *dpb, + u32 picSizeInMbs, + u32 dpbSize, + u32 maxRefFrames, + u32 maxFrameNum, + u32 noReordering) +{ + +/* Variables */ + + u32 i; + +/* Code */ + + ASSERT(picSizeInMbs); + ASSERT(maxRefFrames <= MAX_NUM_REF_PICS); + ASSERT(maxRefFrames <= dpbSize); + ASSERT(maxFrameNum); + ASSERT(dpbSize); + + dpb->maxLongTermFrameIdx = NO_LONG_TERM_FRAME_INDICES; + dpb->maxRefFrames = MAX(maxRefFrames, 1); + if (noReordering) + dpb->dpbSize = dpb->maxRefFrames; + else + dpb->dpbSize = dpbSize; + dpb->maxFrameNum = maxFrameNum; + dpb->noReordering = noReordering; + dpb->fullness = 0; + dpb->numRefFrames = 0; + dpb->prevRefFrameNum = 0; + + ALLOCATE(dpb->buffer, MAX_NUM_REF_IDX_L0_ACTIVE + 1, dpbPicture_t); + if (dpb->buffer == NULL) + return(MEMORY_ALLOCATION_ERROR); + H264SwDecMemset(dpb->buffer, 0, + (MAX_NUM_REF_IDX_L0_ACTIVE + 1)*sizeof(dpbPicture_t)); + for (i = 0; i < dpb->dpbSize + 1; i++) + { + /* Allocate needed amount of memory, which is: + * image size + 32 + 15, where 32 cames from the fact that in ARM OpenMax + * DL implementation Functions may read beyond the end of an array, + * by a maximum of 32 bytes. And +15 cames for the need to align memory + * to 16-byte boundary */ + ALLOCATE(dpb->buffer[i].pAllocatedData, (picSizeInMbs*384 + 32+15), u8); + if (dpb->buffer[i].pAllocatedData == NULL) + return(MEMORY_ALLOCATION_ERROR); + + dpb->buffer[i].data = ALIGN(dpb->buffer[i].pAllocatedData, 16); + } + + ALLOCATE(dpb->list, MAX_NUM_REF_IDX_L0_ACTIVE + 1, dpbPicture_t*); + ALLOCATE(dpb->outBuf, dpb->dpbSize+1, dpbOutPicture_t); + + if (dpb->list == NULL || dpb->outBuf == NULL) + return(MEMORY_ALLOCATION_ERROR); + + H264SwDecMemset(dpb->list, 0, + ((MAX_NUM_REF_IDX_L0_ACTIVE + 1) * sizeof(dpbPicture_t*)) ); + + dpb->numOut = dpb->outIndex = 0; + + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdResetDpb + + Functional description: + Function to reset DPB. This function should be called when an IDR + slice (other than the first) activates new sequence parameter set. + Function calls h264bsdFreeDpb to free old allocated memories and + h264bsdInitDpb to re-initialize the DPB. Same inputs, outputs and + returns as for h264bsdInitDpb. + +------------------------------------------------------------------------------*/ + +u32 h264bsdResetDpb( + dpbStorage_t *dpb, + u32 picSizeInMbs, + u32 dpbSize, + u32 maxRefFrames, + u32 maxFrameNum, + u32 noReordering) +{ + +/* Code */ + + ASSERT(picSizeInMbs); + ASSERT(maxRefFrames <= MAX_NUM_REF_PICS); + ASSERT(maxRefFrames <= dpbSize); + ASSERT(maxFrameNum); + ASSERT(dpbSize); + + h264bsdFreeDpb(dpb); + + return h264bsdInitDpb(dpb, picSizeInMbs, dpbSize, maxRefFrames, + maxFrameNum, noReordering); +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdInitRefPicList + + Functional description: + Function to initialize reference picture list. Function just + sets pointers in the list according to pictures in the buffer. + The buffer is assumed to contain pictures sorted according to + what the H.264 standard says about initial reference picture list. + + Inputs: + dpb pointer to dpb data structure + + Outputs: + dpb 'list' field initialized + + Returns: + none + +------------------------------------------------------------------------------*/ + +void h264bsdInitRefPicList(dpbStorage_t *dpb) +{ + +/* Variables */ + + u32 i; + +/* Code */ + + for (i = 0; i < dpb->numRefFrames; i++) + dpb->list[i] = &dpb->buffer[i]; + +} + +/*------------------------------------------------------------------------------ + + Function: FindDpbPic + + Functional description: + Function to find a reference picture from the buffer. The picture + to be found is identified by picNum and isShortTerm flag. + + Returns: + index of the picture in the buffer + -1 if the specified picture was not found in the buffer + +------------------------------------------------------------------------------*/ + +static i32 FindDpbPic(dpbStorage_t *dpb, i32 picNum, u32 isShortTerm) +{ + +/* Variables */ + + u32 i = 0; + u32 found = HANTRO_FALSE; + +/* Code */ + + if (isShortTerm) + { + while (i < dpb->maxRefFrames && !found) + { + if (IS_SHORT_TERM(dpb->buffer[i]) && + dpb->buffer[i].picNum == picNum) + found = HANTRO_TRUE; + else + i++; + } + } + else + { + ASSERT(picNum >= 0); + while (i < dpb->maxRefFrames && !found) + { + if (IS_LONG_TERM(dpb->buffer[i]) && + dpb->buffer[i].picNum == picNum) + found = HANTRO_TRUE; + else + i++; + } + } + + if (found) + return((i32)i); + else + return(-1); + +} + +/*------------------------------------------------------------------------------ + + Function: SetPicNums + + Functional description: + Function to set picNum values for short-term pictures in the + buffer. Numbering of pictures is based on frame numbers and as + frame numbers are modulo maxFrameNum -> frame numbers of older + pictures in the buffer may be bigger than the currFrameNum. + picNums will be set so that current frame has the largest picNum + and all the short-term frames in the buffer will get smaller picNum + representing their "distance" from the current frame. This + function kind of maps the modulo arithmetic back to normal. + +------------------------------------------------------------------------------*/ + +static void SetPicNums(dpbStorage_t *dpb, u32 currFrameNum) +{ + +/* Variables */ + + u32 i; + i32 frameNumWrap; + +/* Code */ + + ASSERT(dpb); + ASSERT(currFrameNum < dpb->maxFrameNum); + + for (i = 0; i < dpb->numRefFrames; i++) + if (IS_SHORT_TERM(dpb->buffer[i])) + { + if (dpb->buffer[i].frameNum > currFrameNum) + frameNumWrap = + (i32)dpb->buffer[i].frameNum - (i32)dpb->maxFrameNum; + else + frameNumWrap = (i32)dpb->buffer[i].frameNum; + dpb->buffer[i].picNum = frameNumWrap; + } + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdCheckGapsInFrameNum + + Functional description: + Function to check gaps in frame_num and generate non-existing + (short term) reference pictures if necessary. This function should + be called only for non-IDR pictures. + + Inputs: + dpb pointer to dpb data structure + frameNum frame number of the current picture + isRefPic flag to indicate if current picture is a reference or + non-reference picture + gapsAllowed Flag which indicates active SPS stance on whether + to allow gaps + + Outputs: + dpb 'buffer' possibly modified by inserting non-existing + pictures with sliding window marking process + + Returns: + HANTRO_OK success + HANTRO_NOK error in sliding window reference picture marking or + frameNum equal to previous reference frame used for + a reference picture + +------------------------------------------------------------------------------*/ + +u32 h264bsdCheckGapsInFrameNum(dpbStorage_t *dpb, u32 frameNum, u32 isRefPic, + u32 gapsAllowed) +{ + +/* Variables */ + + u32 unUsedShortTermFrameNum; + u8 *tmp; + +/* Code */ + + ASSERT(dpb); + ASSERT(dpb->fullness <= dpb->dpbSize); + ASSERT(frameNum < dpb->maxFrameNum); + + dpb->numOut = 0; + dpb->outIndex = 0; + + if(!gapsAllowed) + return(HANTRO_OK); + + if ( (frameNum != dpb->prevRefFrameNum) && + (frameNum != ((dpb->prevRefFrameNum + 1) % dpb->maxFrameNum))) + { + + unUsedShortTermFrameNum = (dpb->prevRefFrameNum + 1) % dpb->maxFrameNum; + + /* store data pointer of last buffer position to be used as next + * "allocated" data pointer if last buffer position after this process + * contains data pointer located in outBuf (buffer placed in the output + * shall not be overwritten by the current picture) */ + tmp = dpb->buffer[dpb->dpbSize].data; + do + { + SetPicNums(dpb, unUsedShortTermFrameNum); + + if (SlidingWindowRefPicMarking(dpb) != HANTRO_OK) + { + return(HANTRO_NOK); + } + + /* output pictures if buffer full */ + while (dpb->fullness >= dpb->dpbSize) + { +#ifdef _ASSERT_USED + ASSERT(!dpb->noReordering); + ASSERT(OutputPicture(dpb) == HANTRO_OK); +#else + OutputPicture(dpb); +#endif + } + + /* add to end of list */ + ASSERT( !dpb->buffer[dpb->dpbSize].toBeDisplayed && + !IS_REFERENCE(dpb->buffer[dpb->dpbSize]) ); + dpb->buffer[dpb->dpbSize].status = NON_EXISTING; + dpb->buffer[dpb->dpbSize].frameNum = unUsedShortTermFrameNum; + dpb->buffer[dpb->dpbSize].picNum = (i32)unUsedShortTermFrameNum; + dpb->buffer[dpb->dpbSize].picOrderCnt = 0; + dpb->buffer[dpb->dpbSize].toBeDisplayed = HANTRO_FALSE; + dpb->fullness++; + dpb->numRefFrames++; + + /* sort the buffer */ + ShellSort(dpb->buffer, dpb->dpbSize+1); + + unUsedShortTermFrameNum = (unUsedShortTermFrameNum + 1) % + dpb->maxFrameNum; + + } while (unUsedShortTermFrameNum != frameNum); + + /* pictures placed in output buffer -> check that 'data' in + * buffer position dpbSize is not in the output buffer (this will be + * "allocated" by h264bsdAllocateDpbImage). If it is -> exchange data + * pointer with the one stored in the beginning */ + if (dpb->numOut) + { + u32 i; + + for (i = 0; i < dpb->numOut; i++) + { + if (dpb->outBuf[i].data == dpb->buffer[dpb->dpbSize].data) + { + /* find buffer position containing data pointer stored in + * tmp */ + for (i = 0; i < dpb->dpbSize; i++) + { + if (dpb->buffer[i].data == tmp) + { + dpb->buffer[i].data = + dpb->buffer[dpb->dpbSize].data; + dpb->buffer[dpb->dpbSize].data = tmp; + break; + } + } + ASSERT(i < dpb->dpbSize); + break; + } + } + } + } + /* frameNum for reference pictures shall not be the same as for previous + * reference picture, otherwise accesses to pictures in the buffer cannot + * be solved unambiguously */ + else if (isRefPic && frameNum == dpb->prevRefFrameNum) + { + return(HANTRO_NOK); + } + + /* save current frame_num in prevRefFrameNum. For non-reference frame + * prevFrameNum is set to frame number of last non-existing frame above */ + if (isRefPic) + dpb->prevRefFrameNum = frameNum; + else if (frameNum != dpb->prevRefFrameNum) + { + dpb->prevRefFrameNum = + (frameNum + dpb->maxFrameNum - 1) % dpb->maxFrameNum; + } + + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + Function: FindSmallestPicOrderCnt + + Functional description: + Function to find picture with smallest picture order count. This + will be the next picture in display order. + + Returns: + pointer to the picture, NULL if no pictures to be displayed + +------------------------------------------------------------------------------*/ + +dpbPicture_t* FindSmallestPicOrderCnt(dpbStorage_t *dpb) +{ + +/* Variables */ + + u32 i; + i32 picOrderCnt; + dpbPicture_t *tmp; + +/* Code */ + + ASSERT(dpb); + + picOrderCnt = 0x7FFFFFFF; + tmp = NULL; + + for (i = 0; i <= dpb->dpbSize; i++) + { + if (dpb->buffer[i].toBeDisplayed && + (dpb->buffer[i].picOrderCnt < picOrderCnt)) + { + tmp = dpb->buffer + i; + picOrderCnt = dpb->buffer[i].picOrderCnt; + } + } + + return(tmp); + +} + +/*------------------------------------------------------------------------------ + + Function: OutputPicture + + Functional description: + Function to put next display order picture into the output buffer. + + Returns: + HANTRO_OK success + HANTRO_NOK no pictures to display + +------------------------------------------------------------------------------*/ + +u32 OutputPicture(dpbStorage_t *dpb) +{ + +/* Variables */ + + dpbPicture_t *tmp; + +/* Code */ + + ASSERT(dpb); + + if (dpb->noReordering) + return(HANTRO_NOK); + + tmp = FindSmallestPicOrderCnt(dpb); + + /* no pictures to be displayed */ + if (tmp == NULL) + return(HANTRO_NOK); + + dpb->outBuf[dpb->numOut].data = tmp->data; + dpb->outBuf[dpb->numOut].isIdr = tmp->isIdr; + dpb->outBuf[dpb->numOut].picId = tmp->picId; + dpb->outBuf[dpb->numOut].numErrMbs = tmp->numErrMbs; + dpb->numOut++; + + tmp->toBeDisplayed = HANTRO_FALSE; + if (!IS_REFERENCE(*tmp)) + { + dpb->fullness--; + } + + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdDpbOutputPicture + + Functional description: + Function to get next display order picture from the output buffer. + + Return: + pointer to output picture structure, NULL if no pictures to + display + +------------------------------------------------------------------------------*/ + +dpbOutPicture_t* h264bsdDpbOutputPicture(dpbStorage_t *dpb) +{ + +/* Variables */ + +/* Code */ + + ASSERT(dpb); + + if (dpb->outIndex < dpb->numOut) + return(dpb->outBuf + dpb->outIndex++); + else + return(NULL); + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdFlushDpb + + Functional description: + Function to flush the DPB. Function puts all pictures needed for + display into the output buffer. This function shall be called in + the end of the stream to obtain pictures buffered for display + re-ordering purposes. + +------------------------------------------------------------------------------*/ + +void h264bsdFlushDpb(dpbStorage_t *dpb) +{ + + /* don't do anything if buffer not reserved */ + if (dpb->buffer) + { + dpb->flushed = 1; + /* output all pictures */ + while (OutputPicture(dpb) == HANTRO_OK) + ; + } + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdFreeDpb + + Functional description: + Function to free memories reserved for the DPB. + +------------------------------------------------------------------------------*/ + +void h264bsdFreeDpb(dpbStorage_t *dpb) +{ + +/* Variables */ + + u32 i; + +/* Code */ + + ASSERT(dpb); + + if (dpb->buffer) + { + for (i = 0; i < dpb->dpbSize+1; i++) + { + FREE(dpb->buffer[i].pAllocatedData); + } + } + FREE(dpb->buffer); + FREE(dpb->list); + FREE(dpb->outBuf); + +} + +/*------------------------------------------------------------------------------ + + Function: ShellSort + + Functional description: + Sort pictures in the buffer. Function implements Shell's method, + i.e. diminishing increment sort. See e.g. "Numerical Recipes in C" + for more information. + +------------------------------------------------------------------------------*/ + +static void ShellSort(dpbPicture_t *pPic, u32 num) +{ + + u32 i, j; + u32 step; + dpbPicture_t tmpPic; + + step = 7; + + while (step) + { + for (i = step; i < num; i++) + { + tmpPic = pPic[i]; + j = i; + while (j >= step && ComparePictures(pPic + j - step, &tmpPic) > 0) + { + pPic[j] = pPic[j-step]; + j -= step; + } + pPic[j] = tmpPic; + } + step >>= 1; + } + +} + diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_dpb.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_dpb.h new file mode 100755 index 0000000000000000000000000000000000000000..0e25084dd8407de73dd8bf6ffab63a5b1fc419cd --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_dpb.h @@ -0,0 +1,149 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. Module defines + 3. Data types + 4. Function prototypes + +------------------------------------------------------------------------------*/ + +#ifndef H264SWDEC_DPB_H +#define H264SWDEC_DPB_H + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#include "basetype.h" +#include "h264bsd_slice_header.h" +#include "h264bsd_image.h" + +/*------------------------------------------------------------------------------ + 2. Module defines +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 3. Data types +------------------------------------------------------------------------------*/ + +/* enumeration to represent status of buffered image */ +typedef enum { + UNUSED = 0, + NON_EXISTING, + SHORT_TERM, + LONG_TERM +} dpbPictureStatus_e; + +/* structure to represent a buffered picture */ +typedef struct { + u8 *data; /* 16-byte aligned pointer of pAllocatedData */ + u8 *pAllocatedData; /* allocated picture pointer; (size + 15) bytes */ + i32 picNum; + u32 frameNum; + i32 picOrderCnt; + dpbPictureStatus_e status; + u32 toBeDisplayed; + u32 picId; + u32 numErrMbs; + u32 isIdr; +} dpbPicture_t; + +/* structure to represent display image output from the buffer */ +typedef struct { + u8 *data; + u32 picId; + u32 numErrMbs; + u32 isIdr; +} dpbOutPicture_t; + +/* structure to represent DPB */ +typedef struct { + dpbPicture_t *buffer; + dpbPicture_t **list; + dpbPicture_t *currentOut; + dpbOutPicture_t *outBuf; + u32 numOut; + u32 outIndex; + u32 maxRefFrames; + u32 dpbSize; + u32 maxFrameNum; + u32 maxLongTermFrameIdx; + u32 numRefFrames; + u32 fullness; + u32 prevRefFrameNum; + u32 lastContainsMmco5; + u32 noReordering; + u32 flushed; +} dpbStorage_t; + +/*------------------------------------------------------------------------------ + 4. Function prototypes +------------------------------------------------------------------------------*/ + +u32 h264bsdInitDpb( + dpbStorage_t *dpb, + u32 picSizeInMbs, + u32 dpbSize, + u32 numRefFrames, + u32 maxFrameNum, + u32 noReordering); + +u32 h264bsdResetDpb( + dpbStorage_t *dpb, + u32 picSizeInMbs, + u32 dpbSize, + u32 numRefFrames, + u32 maxFrameNum, + u32 noReordering); + +void h264bsdInitRefPicList(dpbStorage_t *dpb); + +u8* h264bsdAllocateDpbImage(dpbStorage_t *dpb); + +u8* h264bsdGetRefPicData(dpbStorage_t *dpb, u32 index); + +u32 h264bsdReorderRefPicList( + dpbStorage_t *dpb, + refPicListReordering_t *order, + u32 currFrameNum, + u32 numRefIdxActive); + +u32 h264bsdMarkDecRefPic( + dpbStorage_t *dpb, + decRefPicMarking_t *mark, + image_t *image, + u32 frameNum, + i32 picOrderCnt, + u32 isIdr, + u32 picId, + u32 numErrMbs); + +u32 h264bsdCheckGapsInFrameNum(dpbStorage_t *dpb, u32 frameNum, u32 isRefPic, + u32 gapsAllowed); + +dpbOutPicture_t* h264bsdDpbOutputPicture(dpbStorage_t *dpb); + +void h264bsdFlushDpb(dpbStorage_t *dpb); + +void h264bsdFreeDpb(dpbStorage_t *dpb); + +#endif /* #ifdef H264SWDEC_DPB_H */ + diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_image.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_image.c new file mode 100755 index 0000000000000000000000000000000000000000..7b928709be14e9c131de1a75d37cfdc8277eb1ef --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_image.c @@ -0,0 +1,345 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. External compiler flags + 3. Module defines + 4. Local function prototypes + 5. Functions + h264bsdWriteMacroblock + h264bsdWriteOutputBlocks + +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#include "h264bsd_image.h" +#include "h264bsd_util.h" +#include "h264bsd_neighbour.h" + +/*------------------------------------------------------------------------------ + 2. External compiler flags +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- + 3. Module defines +------------------------------------------------------------------------------*/ + +/* x- and y-coordinates for each block, defined in h264bsd_intra_prediction.c */ +extern const u32 h264bsdBlockX[]; +extern const u32 h264bsdBlockY[]; + +/* clipping table, defined in h264bsd_intra_prediction.c */ +extern const u8 h264bsdClip[]; + +/*------------------------------------------------------------------------------ + 4. Local function prototypes +------------------------------------------------------------------------------*/ + + + +/*------------------------------------------------------------------------------ + + Function: h264bsdWriteMacroblock + + Functional description: + Write one macroblock into the image. Both luma and chroma + components will be written at the same time. + + Inputs: + data pointer to macroblock data to be written, 256 values for + luma followed by 64 values for both chroma components + + Outputs: + image pointer to the image where the macroblock will be written + + Returns: + none + +------------------------------------------------------------------------------*/ +#ifndef H264DEC_NEON +void h264bsdWriteMacroblock(image_t *image, u8 *data) +{ + +/* Variables */ + + u32 i; + u32 width; + u32 *lum, *cb, *cr; + u32 *ptr; + u32 tmp1, tmp2; + +/* Code */ + + ASSERT(image); + ASSERT(data); + ASSERT(!((u32)data&0x3)); + + width = image->width; + + /*lint -save -e826 lum, cb and cr used to copy 4 bytes at the time, disable + * "area too small" info message */ + lum = (u32*)image->luma; + cb = (u32*)image->cb; + cr = (u32*)image->cr; + ASSERT(!((u32)lum&0x3)); + ASSERT(!((u32)cb&0x3)); + ASSERT(!((u32)cr&0x3)); + + ptr = (u32*)data; + + width *= 4; + for (i = 16; i ; i--) + { + tmp1 = *ptr++; + tmp2 = *ptr++; + *lum++ = tmp1; + *lum++ = tmp2; + tmp1 = *ptr++; + tmp2 = *ptr++; + *lum++ = tmp1; + *lum++ = tmp2; + lum += width-4; + } + + width >>= 1; + for (i = 8; i ; i--) + { + tmp1 = *ptr++; + tmp2 = *ptr++; + *cb++ = tmp1; + *cb++ = tmp2; + cb += width-2; + } + + for (i = 8; i ; i--) + { + tmp1 = *ptr++; + tmp2 = *ptr++; + *cr++ = tmp1; + *cr++ = tmp2; + cr += width-2; + } + +} +#endif +#ifndef H264DEC_OMXDL +/*------------------------------------------------------------------------------ + + Function: h264bsdWriteOutputBlocks + + Functional description: + Write one macroblock into the image. Prediction for the macroblock + and the residual are given separately and will be combined while + writing the data to the image + + Inputs: + data pointer to macroblock prediction data, 256 values for + luma followed by 64 values for both chroma components + mbNum number of the macroblock + residual pointer to residual data, 16 16-element arrays for luma + followed by 4 16-element arrays for both chroma + components + + Outputs: + image pointer to the image where the data will be written + + Returns: + none + +------------------------------------------------------------------------------*/ + +void h264bsdWriteOutputBlocks(image_t *image, u32 mbNum, u8 *data, + i32 residual[][16]) +{ + +/* Variables */ + + u32 i; + u32 picWidth, picSize; + u8 *lum, *cb, *cr; + u8 *imageBlock; + u8 *tmp; + u32 row, col; + u32 block; + u32 x, y; + i32 *pRes; + i32 tmp1, tmp2, tmp3, tmp4; + const u8 *clp = h264bsdClip + 512; + +/* Code */ + + ASSERT(image); + ASSERT(data); + ASSERT(mbNum < image->width * image->height); + ASSERT(!((u32)data&0x3)); + + /* Image size in macroblocks */ + picWidth = image->width; + picSize = picWidth * image->height; + row = mbNum / picWidth; + col = mbNum % picWidth; + + /* Output macroblock position in output picture */ + lum = (image->data + row * picWidth * 256 + col * 16); + cb = (image->data + picSize * 256 + row * picWidth * 64 + col * 8); + cr = (cb + picSize * 64); + + picWidth *= 16; + + for (block = 0; block < 16; block++) + { + x = h264bsdBlockX[block]; + y = h264bsdBlockY[block]; + + pRes = residual[block]; + + ASSERT(pRes); + + tmp = data + y*16 + x; + imageBlock = lum + y*picWidth + x; + + ASSERT(!((u32)tmp&0x3)); + ASSERT(!((u32)imageBlock&0x3)); + + if (IS_RESIDUAL_EMPTY(pRes)) + { + /*lint -e826 */ + i32 *in32 = (i32*)tmp; + i32 *out32 = (i32*)imageBlock; + + /* Residual is zero => copy prediction block to output */ + tmp1 = *in32; in32 += 4; + tmp2 = *in32; in32 += 4; + *out32 = tmp1; out32 += picWidth/4; + *out32 = tmp2; out32 += picWidth/4; + tmp1 = *in32; in32 += 4; + tmp2 = *in32; + *out32 = tmp1; out32 += picWidth/4; + *out32 = tmp2; + } + else + { + + RANGE_CHECK_ARRAY(pRes, -512, 511, 16); + + /* Calculate image = prediction + residual + * Process four pixels in a loop */ + for (i = 4; i; i--) + { + tmp1 = tmp[0]; + tmp2 = *pRes++; + tmp3 = tmp[1]; + tmp1 = clp[tmp1 + tmp2]; + tmp4 = *pRes++; + imageBlock[0] = (u8)tmp1; + tmp3 = clp[tmp3 + tmp4]; + tmp1 = tmp[2]; + tmp2 = *pRes++; + imageBlock[1] = (u8)tmp3; + tmp1 = clp[tmp1 + tmp2]; + tmp3 = tmp[3]; + tmp4 = *pRes++; + imageBlock[2] = (u8)tmp1; + tmp3 = clp[tmp3 + tmp4]; + tmp += 16; + imageBlock[3] = (u8)tmp3; + imageBlock += picWidth; + } + } + + } + + picWidth /= 2; + + for (block = 16; block <= 23; block++) + { + x = h264bsdBlockX[block & 0x3]; + y = h264bsdBlockY[block & 0x3]; + + pRes = residual[block]; + + ASSERT(pRes); + + tmp = data + 256; + imageBlock = cb; + + if (block >= 20) + { + imageBlock = cr; + tmp += 64; + } + + tmp += y*8 + x; + imageBlock += y*picWidth + x; + + ASSERT(!((u32)tmp&0x3)); + ASSERT(!((u32)imageBlock&0x3)); + + if (IS_RESIDUAL_EMPTY(pRes)) + { + /*lint -e826 */ + i32 *in32 = (i32*)tmp; + i32 *out32 = (i32*)imageBlock; + + /* Residual is zero => copy prediction block to output */ + tmp1 = *in32; in32 += 2; + tmp2 = *in32; in32 += 2; + *out32 = tmp1; out32 += picWidth/4; + *out32 = tmp2; out32 += picWidth/4; + tmp1 = *in32; in32 += 2; + tmp2 = *in32; + *out32 = tmp1; out32 += picWidth/4; + *out32 = tmp2; + } + else + { + + RANGE_CHECK_ARRAY(pRes, -512, 511, 16); + + for (i = 4; i; i--) + { + tmp1 = tmp[0]; + tmp2 = *pRes++; + tmp3 = tmp[1]; + tmp1 = clp[tmp1 + tmp2]; + tmp4 = *pRes++; + imageBlock[0] = (u8)tmp1; + tmp3 = clp[tmp3 + tmp4]; + tmp1 = tmp[2]; + tmp2 = *pRes++; + imageBlock[1] = (u8)tmp3; + tmp1 = clp[tmp1 + tmp2]; + tmp3 = tmp[3]; + tmp4 = *pRes++; + imageBlock[2] = (u8)tmp1; + tmp3 = clp[tmp3 + tmp4]; + tmp += 8; + imageBlock[3] = (u8)tmp3; + imageBlock += picWidth; + } + } + } + +} +#endif /* H264DEC_OMXDL */ + diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_image.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_image.h new file mode 100755 index 0000000000000000000000000000000000000000..ed7c18ca8dbdc394d2349143984f9090d48ad14e --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_image.h @@ -0,0 +1,68 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. Module defines + 3. Data types + 4. Function prototypes + +------------------------------------------------------------------------------*/ + +#ifndef H264SWDEC_IMAGE_H +#define H264SWDEC_IMAGE_H + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#include "basetype.h" + +/*------------------------------------------------------------------------------ + 2. Module defines +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 3. Data types +------------------------------------------------------------------------------*/ + +typedef struct +{ + u8 *data; + u32 width; + u32 height; + /* current MB's components */ + u8 *luma; + u8 *cb; + u8 *cr; +} image_t; + +/*------------------------------------------------------------------------------ + 4. Function prototypes +------------------------------------------------------------------------------*/ + +void h264bsdWriteMacroblock(image_t *image, u8 *data); + +#ifndef H264DEC_OMXDL +void h264bsdWriteOutputBlocks(image_t *image, u32 mbNum, u8 *data, + i32 residual[][16]); +#endif + +#endif /* #ifdef H264SWDEC_IMAGE_H */ + diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_inter_prediction.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_inter_prediction.c new file mode 100755 index 0000000000000000000000000000000000000000..2a81c4a2c26bf8e62998b0af052a73234daadb34 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_inter_prediction.c @@ -0,0 +1,1027 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. External compiler flags + 3. Module defines + 4. Local function prototypes + 5. Functions + h264bsdInterPrediction + MvPrediction16x16 + MvPrediction16x8 + MvPrediction8x16 + MvPrediction8x8 + MvPrediction + MedianFilter + GetInterNeighbour + GetPredictionMv + +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#include "h264bsd_inter_prediction.h" +#include "h264bsd_neighbour.h" +#include "h264bsd_util.h" +#include "h264bsd_reconstruct.h" +#include "h264bsd_dpb.h" + +/*------------------------------------------------------------------------------ + 2. External compiler flags +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- + 3. Module defines +------------------------------------------------------------------------------*/ + +typedef struct +{ + u32 available; + u32 refIndex; + mv_t mv; +} interNeighbour_t; + +/*------------------------------------------------------------------------------ + 4. Local function prototypes +------------------------------------------------------------------------------*/ + +static u32 MvPrediction16x16(mbStorage_t *pMb, mbPred_t *mbPred, + dpbStorage_t *dpb); +static u32 MvPrediction16x8(mbStorage_t *pMb, mbPred_t *mbPred, + dpbStorage_t *dpb); +static u32 MvPrediction8x16(mbStorage_t *pMb, mbPred_t *mbPred, + dpbStorage_t *dpb); +static u32 MvPrediction8x8(mbStorage_t *pMb, subMbPred_t *subMbPred, + dpbStorage_t *dpb); +static u32 MvPrediction(mbStorage_t *pMb, subMbPred_t *subMbPred, + u32 mbPartIdx, u32 subMbPartIdx); +static i32 MedianFilter(i32 a, i32 b, i32 c); + +static void GetInterNeighbour(u32 sliceId, mbStorage_t *nMb, + interNeighbour_t *n, u32 index); +static void GetPredictionMv(mv_t *mv, interNeighbour_t *a, u32 refIndex); + +static const neighbour_t N_A_SUB_PART[4][4][4] = { + { { {MB_A,5}, {MB_NA,0}, {MB_NA,0}, {MB_NA,0} }, + { {MB_A,5}, {MB_A,7}, {MB_NA,0}, {MB_NA,0} }, + { {MB_A,5}, {MB_CURR,0}, {MB_NA,0}, {MB_NA,0} }, + { {MB_A,5}, {MB_CURR,0}, {MB_A,7}, {MB_CURR,2} } }, + + { { {MB_CURR,1}, {MB_NA,0}, {MB_NA,0}, {MB_NA,0} }, + { {MB_CURR,1}, {MB_CURR,3}, {MB_NA,0}, {MB_NA,0} }, + { {MB_CURR,1}, {MB_CURR,4}, {MB_NA,0}, {MB_NA,0} }, + { {MB_CURR,1}, {MB_CURR,4}, {MB_CURR,3}, {MB_CURR,6} } }, + + { { {MB_A,13}, {MB_NA,0}, {MB_NA,0}, {MB_NA,0} }, + { {MB_A,13}, {MB_A,15}, {MB_NA,0}, {MB_NA,0} }, + { {MB_A,13}, {MB_CURR,8}, {MB_NA,0}, {MB_NA,0} }, + { {MB_A,13}, {MB_CURR,8}, {MB_A,15}, {MB_CURR,10} } }, + + { { {MB_CURR,9}, {MB_NA,0}, {MB_NA,0}, {MB_NA,0} }, + { {MB_CURR,9}, {MB_CURR,11}, {MB_NA,0}, {MB_NA,0} }, + { {MB_CURR,9}, {MB_CURR,12}, {MB_NA,0}, {MB_NA,0} }, + { {MB_CURR,9}, {MB_CURR,12}, {MB_CURR,11}, {MB_CURR,14} } } }; + +static const neighbour_t N_B_SUB_PART[4][4][4] = { + { { {MB_B,10}, {MB_NA,0}, {MB_NA,0}, {MB_NA,0} }, + { {MB_B,10}, {MB_CURR,0}, {MB_NA,0}, {MB_NA,0} }, + { {MB_B,10}, {MB_B,11}, {MB_NA,0}, {MB_NA,0} }, + { {MB_B,10}, {MB_B,11}, {MB_CURR,0}, {MB_CURR,1} } }, + + { { {MB_B,14}, {MB_NA,0}, {MB_NA,0}, {MB_NA,0} }, + { {MB_B,14}, {MB_CURR,4}, {MB_NA,0}, {MB_NA,0} }, + { {MB_B,14}, {MB_B,15}, {MB_NA,0}, {MB_NA,0} }, + { {MB_B,14}, {MB_B,15}, {MB_CURR,4}, {MB_CURR,5} } }, + + { { {MB_CURR,2}, {MB_NA,0}, {MB_NA,0}, {MB_NA,0} }, + { {MB_CURR,2}, {MB_CURR,8}, {MB_NA,0}, {MB_NA,0} }, + { {MB_CURR,2}, {MB_CURR,3}, {MB_NA,0}, {MB_NA,0} }, + { {MB_CURR,2}, {MB_CURR,3}, {MB_CURR,8}, {MB_CURR,9} } }, + + { { {MB_CURR,6}, {MB_NA,0}, {MB_NA,0}, {MB_NA,0} }, + { {MB_CURR,6}, {MB_CURR,12}, {MB_NA,0}, {MB_NA,0} }, + { {MB_CURR,6}, {MB_CURR,7}, {MB_NA,0}, {MB_NA,0} }, + { {MB_CURR,6}, {MB_CURR,7}, {MB_CURR,12}, {MB_CURR,13} } } }; + +static const neighbour_t N_C_SUB_PART[4][4][4] = { + { { {MB_B,14}, {MB_NA,0}, {MB_NA,0}, {MB_NA,0} }, + { {MB_B,14}, {MB_NA,4}, {MB_NA,0}, {MB_NA,0} }, + { {MB_B,11}, {MB_B,14}, {MB_NA,0}, {MB_NA,0} }, + { {MB_B,11}, {MB_B,14}, {MB_CURR,1}, {MB_NA,4} } }, + + { { {MB_C,10}, {MB_NA,0}, {MB_NA,0}, {MB_NA,0} }, + { {MB_C,10}, {MB_NA,0}, {MB_NA,0}, {MB_NA,0} }, + { {MB_B,15}, {MB_C,10}, {MB_NA,0}, {MB_NA,0} }, + { {MB_B,15}, {MB_C,10}, {MB_CURR,5}, {MB_NA,0} } }, + + { { {MB_CURR,6}, {MB_NA,0}, {MB_NA,0}, {MB_NA,0} }, + { {MB_CURR,6}, {MB_NA,12}, {MB_NA,0}, {MB_NA,0} }, + { {MB_CURR,3}, {MB_CURR,6}, {MB_NA,0}, {MB_NA,0} }, + { {MB_CURR,3}, {MB_CURR,6}, {MB_CURR,9}, {MB_NA,12} } }, + + { { {MB_NA,2}, {MB_NA,0}, {MB_NA,0}, {MB_NA,0} }, + { {MB_NA,2}, {MB_NA,8}, {MB_NA,0}, {MB_NA,0} }, + { {MB_CURR,7}, {MB_NA,2}, {MB_NA,0}, {MB_NA,0} }, + { {MB_CURR,7}, {MB_NA,2}, {MB_CURR,13}, {MB_NA,8} } } }; + +static const neighbour_t N_D_SUB_PART[4][4][4] = { + { { {MB_D,15}, {MB_NA,0}, {MB_NA,0}, {MB_NA,0} }, + { {MB_D,15}, {MB_A,5}, {MB_NA,0}, {MB_NA,0} }, + { {MB_D,15}, {MB_B,10}, {MB_NA,0}, {MB_NA,0} }, + { {MB_D,15}, {MB_B,10}, {MB_A,5}, {MB_CURR,0} } }, + + { { {MB_B,11}, {MB_NA,0}, {MB_NA,0}, {MB_NA,0} }, + { {MB_B,11}, {MB_CURR,1}, {MB_NA,0}, {MB_NA,0} }, + { {MB_B,11}, {MB_B,14}, {MB_NA,0}, {MB_NA,0} }, + { {MB_B,11}, {MB_B,14}, {MB_CURR,1}, {MB_CURR,4} } }, + + { { {MB_A,7}, {MB_NA,0}, {MB_NA,0}, {MB_NA,0} }, + { {MB_A,7}, {MB_A,13}, {MB_NA,0}, {MB_NA,0} }, + { {MB_A,7}, {MB_CURR,2}, {MB_NA,0}, {MB_NA,0} }, + { {MB_A,7}, {MB_CURR,2}, {MB_A,13}, {MB_CURR,8} } }, + + { { {MB_CURR,3}, {MB_NA,0}, {MB_NA,0}, {MB_NA,0} }, + { {MB_CURR,3}, {MB_CURR,9}, {MB_NA,0}, {MB_NA,0} }, + { {MB_CURR,3}, {MB_CURR,6}, {MB_NA,0}, {MB_NA,0} }, + { {MB_CURR,3}, {MB_CURR,6}, {MB_CURR,9}, {MB_CURR,12} } } }; + + +#ifdef H264DEC_OMXDL + +/*------------------------------------------------------------------------------ + + Function: h264bsdInterPrediction + + Functional description: + Processes one inter macroblock. Performs motion vector prediction + and reconstructs prediction macroblock. Writes the final macroblock + (prediction + residual) into the output image (currImage) + + Inputs: + pMb pointer to macroblock specific information + pMbLayer pointer to current macroblock data from stream + dpb pointer to decoded picture buffer + mbNum current macroblock number + currImage pointer to output image + data pointer where predicted macroblock will be stored + + Outputs: + pMb structure is updated with current macroblock + currImage current macroblock is written into image + data prediction is stored here + + Returns: + HANTRO_OK success + HANTRO_NOK error in motion vector prediction + +------------------------------------------------------------------------------*/ +u32 h264bsdInterPrediction(mbStorage_t *pMb, macroblockLayer_t *pMbLayer, + dpbStorage_t *dpb, u32 mbNum, image_t *currImage, u8 *data) +{ + +/* Variables */ + + u32 i; + u32 x, y; + u32 colAndRow; + subMbPartMode_e subPartMode; + image_t refImage; + u8 fillBuff[32*21 + 15 + 32]; + u8 *pFill; + u32 tmp; +/* Code */ + + ASSERT(pMb); + ASSERT(h264bsdMbPartPredMode(pMb->mbType) == PRED_MODE_INTER); + ASSERT(pMbLayer); + + /* 16-byte alignment */ + pFill = ALIGN(fillBuff, 16); + + /* set row bits 15:0 */ + colAndRow = mbNum / currImage->width; + /*set col to bits 31:16 */ + colAndRow += (mbNum - colAndRow * currImage->width) << 16; + colAndRow <<= 4; + + refImage.width = currImage->width; + refImage.height = currImage->height; + + switch (pMb->mbType) + { + case P_Skip: + case P_L0_16x16: + if (MvPrediction16x16(pMb, &pMbLayer->mbPred, dpb) != HANTRO_OK) + return(HANTRO_NOK); + refImage.data = pMb->refAddr[0]; + tmp = (0<<24) + (0<<16) + (16<<8) + 16; + h264bsdPredictSamples(data, pMb->mv, &refImage, + colAndRow, tmp, pFill); + break; + + case P_L0_L0_16x8: + if ( MvPrediction16x8(pMb, &pMbLayer->mbPred, dpb) != HANTRO_OK) + return(HANTRO_NOK); + refImage.data = pMb->refAddr[0]; + tmp = (0<<24) + (0<<16) + (16<<8) + 8; + h264bsdPredictSamples(data, pMb->mv, &refImage, + colAndRow, tmp, pFill); + + refImage.data = pMb->refAddr[2]; + tmp = (0<<24) + (8<<16) + (16<<8) + 8; + h264bsdPredictSamples(data, pMb->mv+8, &refImage, + colAndRow, tmp, pFill); + break; + + case P_L0_L0_8x16: + if ( MvPrediction8x16(pMb, &pMbLayer->mbPred, dpb) != HANTRO_OK) + return(HANTRO_NOK); + refImage.data = pMb->refAddr[0]; + tmp = (0<<24) + (0<<16) + (8<<8) + 16; + h264bsdPredictSamples(data, pMb->mv, &refImage, + colAndRow, tmp, pFill); + refImage.data = pMb->refAddr[1]; + tmp = (8<<24) + (0<<16) + (8<<8) + 16; + h264bsdPredictSamples(data, pMb->mv+4, &refImage, + colAndRow, tmp, pFill); + break; + + default: /* P_8x8 and P_8x8ref0 */ + if ( MvPrediction8x8(pMb, &pMbLayer->subMbPred, dpb) != HANTRO_OK) + return(HANTRO_NOK); + for (i = 0; i < 4; i++) + { + refImage.data = pMb->refAddr[i]; + subPartMode = + h264bsdSubMbPartMode(pMbLayer->subMbPred.subMbType[i]); + x = i & 0x1 ? 8 : 0; + y = i < 2 ? 0 : 8; + switch (subPartMode) + { + case MB_SP_8x8: + tmp = (x<<24) + (y<<16) + (8<<8) + 8; + h264bsdPredictSamples(data, pMb->mv+4*i, &refImage, + colAndRow, tmp, pFill); + break; + + case MB_SP_8x4: + tmp = (x<<24) + (y<<16) + (8<<8) + 4; + h264bsdPredictSamples(data, pMb->mv+4*i, &refImage, + colAndRow, tmp, pFill); + tmp = (x<<24) + ((y+4)<<16) + (8<<8) + 4; + h264bsdPredictSamples(data, pMb->mv+4*i+2, &refImage, + colAndRow, tmp, pFill); + break; + + case MB_SP_4x8: + tmp = (x<<24) + (y<<16) + (4<<8) + 8; + h264bsdPredictSamples(data, pMb->mv+4*i, &refImage, + colAndRow, tmp, pFill); + tmp = ((x+4)<<24) + (y<<16) + (4<<8) + 8; + h264bsdPredictSamples(data, pMb->mv+4*i+1, &refImage, + colAndRow, tmp, pFill); + break; + + default: + tmp = (x<<24) + (y<<16) + (4<<8) + 4; + h264bsdPredictSamples(data, pMb->mv+4*i, &refImage, + colAndRow, tmp, pFill); + tmp = ((x+4)<<24) + (y<<16) + (4<<8) + 4; + h264bsdPredictSamples(data, pMb->mv+4*i+1, &refImage, + colAndRow, tmp, pFill); + tmp = (x<<24) + ((y+4)<<16) + (4<<8) + 4; + h264bsdPredictSamples(data, pMb->mv+4*i+2, &refImage, + colAndRow, tmp, pFill); + tmp = ((x+4)<<24) + ((y+4)<<16) + (4<<8) + 4; + h264bsdPredictSamples(data, pMb->mv+4*i+3, &refImage, + colAndRow, tmp, pFill); + break; + } + } + break; + } + + /* if decoded flag > 1 -> mb has already been successfully decoded and + * written to output -> do not write again */ + if (pMb->decoded > 1) + return HANTRO_OK; + + return(HANTRO_OK); +} + +#else /* H264DEC_OMXDL */ + +/*------------------------------------------------------------------------------ + + Function: h264bsdInterPrediction + + Functional description: + Processes one inter macroblock. Performs motion vector prediction + and reconstructs prediction macroblock. Writes the final macroblock + (prediction + residual) into the output image (currImage) + + Inputs: + pMb pointer to macroblock specific information + pMbLayer pointer to current macroblock data from stream + dpb pointer to decoded picture buffer + mbNum current macroblock number + currImage pointer to output image + data pointer where predicted macroblock will be stored + + Outputs: + pMb structure is updated with current macroblock + currImage current macroblock is written into image + data prediction is stored here + + Returns: + HANTRO_OK success + HANTRO_NOK error in motion vector prediction + +------------------------------------------------------------------------------*/ +u32 h264bsdInterPrediction(mbStorage_t *pMb, macroblockLayer_t *pMbLayer, + dpbStorage_t *dpb, u32 mbNum, image_t *currImage, u8 *data) +{ + +/* Variables */ + + u32 i; + u32 x, y; + u32 row, col; + subMbPartMode_e subPartMode; + image_t refImage; + +/* Code */ + + ASSERT(pMb); + ASSERT(h264bsdMbPartPredMode(pMb->mbType) == PRED_MODE_INTER); + ASSERT(pMbLayer); + + row = mbNum / currImage->width; + col = mbNum - row * currImage->width; + row *= 16; + col *= 16; + + refImage.width = currImage->width; + refImage.height = currImage->height; + + switch (pMb->mbType) + { + case P_Skip: + case P_L0_16x16: + if (MvPrediction16x16(pMb, &pMbLayer->mbPred, dpb) != HANTRO_OK) + return(HANTRO_NOK); + refImage.data = pMb->refAddr[0]; + h264bsdPredictSamples(data, pMb->mv, &refImage, col, row, 0, 0, + 16, 16); + break; + + case P_L0_L0_16x8: + if ( MvPrediction16x8(pMb, &pMbLayer->mbPred, dpb) != HANTRO_OK) + return(HANTRO_NOK); + refImage.data = pMb->refAddr[0]; + h264bsdPredictSamples(data, pMb->mv, &refImage, col, row, 0, 0, + 16, 8); + refImage.data = pMb->refAddr[2]; + h264bsdPredictSamples(data, pMb->mv+8, &refImage, col, row, 0, 8, + 16, 8); + break; + + case P_L0_L0_8x16: + if ( MvPrediction8x16(pMb, &pMbLayer->mbPred, dpb) != HANTRO_OK) + return(HANTRO_NOK); + refImage.data = pMb->refAddr[0]; + h264bsdPredictSamples(data, pMb->mv, &refImage, col, row, 0, 0, + 8, 16); + refImage.data = pMb->refAddr[1]; + h264bsdPredictSamples(data, pMb->mv+4, &refImage, col, row, 8, 0, + 8, 16); + break; + + default: /* P_8x8 and P_8x8ref0 */ + if ( MvPrediction8x8(pMb, &pMbLayer->subMbPred, dpb) != HANTRO_OK) + return(HANTRO_NOK); + for (i = 0; i < 4; i++) + { + refImage.data = pMb->refAddr[i]; + subPartMode = + h264bsdSubMbPartMode(pMbLayer->subMbPred.subMbType[i]); + x = i & 0x1 ? 8 : 0; + y = i < 2 ? 0 : 8; + switch (subPartMode) + { + case MB_SP_8x8: + h264bsdPredictSamples(data, pMb->mv+4*i, &refImage, + col, row, x, y, 8, 8); + break; + + case MB_SP_8x4: + h264bsdPredictSamples(data, pMb->mv+4*i, &refImage, + col, row, x, y, 8, 4); + h264bsdPredictSamples(data, pMb->mv+4*i+2, &refImage, + col, row, x, y+4, 8, 4); + break; + + case MB_SP_4x8: + h264bsdPredictSamples(data, pMb->mv+4*i, &refImage, + col, row, x, y, 4, 8); + h264bsdPredictSamples(data, pMb->mv+4*i+1, &refImage, + col, row, x+4, y, 4, 8); + break; + + default: + h264bsdPredictSamples(data, pMb->mv+4*i, &refImage, + col, row, x, y, 4, 4); + h264bsdPredictSamples(data, pMb->mv+4*i+1, &refImage, + col, row, x+4, y, 4, 4); + h264bsdPredictSamples(data, pMb->mv+4*i+2, &refImage, + col, row, x, y+4, 4, 4); + h264bsdPredictSamples(data, pMb->mv+4*i+3, &refImage, + col, row, x+4, y+4, 4, 4); + break; + } + } + break; + } + + /* if decoded flag > 1 -> mb has already been successfully decoded and + * written to output -> do not write again */ + if (pMb->decoded > 1) + return HANTRO_OK; + + if (pMb->mbType != P_Skip) + { + h264bsdWriteOutputBlocks(currImage, mbNum, data, + pMbLayer->residual.level); + } + else + { + h264bsdWriteMacroblock(currImage, data); + } + + return(HANTRO_OK); +} +#endif /* H264DEC_OMXDL */ + +/*------------------------------------------------------------------------------ + + Function: MvPrediction16x16 + + Functional description: + Motion vector prediction for 16x16 partition mode + +------------------------------------------------------------------------------*/ + +u32 MvPrediction16x16(mbStorage_t *pMb, mbPred_t *mbPred, dpbStorage_t *dpb) +{ + +/* Variables */ + + mv_t mv; + mv_t mvPred; + interNeighbour_t a[3]; /* A, B, C */ + u32 refIndex; + u8 *tmp; + u32 *tmpMv1, *tmpMv2; + +/* Code */ + + refIndex = mbPred->refIdxL0[0]; + + GetInterNeighbour(pMb->sliceId, pMb->mbA, a, 5); + GetInterNeighbour(pMb->sliceId, pMb->mbB, a+1, 10); + /*lint --e(740) Unusual pointer cast (incompatible indirect types) */ + tmpMv1 = (u32*)(&a[0].mv); /* we test just that both MVs are zero */ + /*lint --e(740) */ + tmpMv2 = (u32*)(&a[1].mv); /* i.e. a[0].mv.hor == 0 && a[0].mv.ver == 0 */ + if (pMb->mbType == P_Skip && + (!a[0].available || !a[1].available || + ( a[0].refIndex == 0 && ((u32)(*tmpMv1) == 0) ) || + ( a[1].refIndex == 0 && ((u32)(*tmpMv2) == 0) ))) + { + mv.hor = mv.ver = 0; + } + else + { + mv = mbPred->mvdL0[0]; + GetInterNeighbour(pMb->sliceId, pMb->mbC, a+2, 10); + if (!a[2].available) + { + GetInterNeighbour(pMb->sliceId, pMb->mbD, a+2, 15); + } + + GetPredictionMv(&mvPred, a, refIndex); + + mv.hor += mvPred.hor; + mv.ver += mvPred.ver; + + /* horizontal motion vector range [-2048, 2047.75] */ + if ((u32)(i32)(mv.hor+8192) >= (16384)) + return(HANTRO_NOK); + + /* vertical motion vector range [-512, 511.75] + * (smaller for low levels) */ + if ((u32)(i32)(mv.ver+2048) >= (4096)) + return(HANTRO_NOK); + } + + tmp = h264bsdGetRefPicData(dpb, refIndex); + if (tmp == NULL) + return(HANTRO_NOK); + + pMb->mv[0] = pMb->mv[1] = pMb->mv[2] = pMb->mv[3] = + pMb->mv[4] = pMb->mv[5] = pMb->mv[6] = pMb->mv[7] = + pMb->mv[8] = pMb->mv[9] = pMb->mv[10] = pMb->mv[11] = + pMb->mv[12] = pMb->mv[13] = pMb->mv[14] = pMb->mv[15] = mv; + + pMb->refPic[0] = refIndex; + pMb->refPic[1] = refIndex; + pMb->refPic[2] = refIndex; + pMb->refPic[3] = refIndex; + pMb->refAddr[0] = tmp; + pMb->refAddr[1] = tmp; + pMb->refAddr[2] = tmp; + pMb->refAddr[3] = tmp; + + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + Function: MvPrediction16x8 + + Functional description: + Motion vector prediction for 16x8 partition mode + +------------------------------------------------------------------------------*/ + +u32 MvPrediction16x8(mbStorage_t *pMb, mbPred_t *mbPred, dpbStorage_t *dpb) +{ + +/* Variables */ + + mv_t mv; + mv_t mvPred; + interNeighbour_t a[3]; /* A, B, C */ + u32 refIndex; + u8 *tmp; + +/* Code */ + + mv = mbPred->mvdL0[0]; + refIndex = mbPred->refIdxL0[0]; + + GetInterNeighbour(pMb->sliceId, pMb->mbB, a+1, 10); + + if (a[1].refIndex == refIndex) + mvPred = a[1].mv; + else + { + GetInterNeighbour(pMb->sliceId, pMb->mbA, a, 5); + GetInterNeighbour(pMb->sliceId, pMb->mbC, a+2, 10); + if (!a[2].available) + { + GetInterNeighbour(pMb->sliceId, pMb->mbD, a+2, 15); + } + + GetPredictionMv(&mvPred, a, refIndex); + + } + mv.hor += mvPred.hor; + mv.ver += mvPred.ver; + + /* horizontal motion vector range [-2048, 2047.75] */ + if ((u32)(i32)(mv.hor+8192) >= (16384)) + return(HANTRO_NOK); + + /* vertical motion vector range [-512, 511.75] (smaller for low levels) */ + if ((u32)(i32)(mv.ver+2048) >= (4096)) + return(HANTRO_NOK); + + tmp = h264bsdGetRefPicData(dpb, refIndex); + if (tmp == NULL) + return(HANTRO_NOK); + + pMb->mv[0] = pMb->mv[1] = pMb->mv[2] = pMb->mv[3] = + pMb->mv[4] = pMb->mv[5] = pMb->mv[6] = pMb->mv[7] = mv; + pMb->refPic[0] = refIndex; + pMb->refPic[1] = refIndex; + pMb->refAddr[0] = tmp; + pMb->refAddr[1] = tmp; + + mv = mbPred->mvdL0[1]; + refIndex = mbPred->refIdxL0[1]; + + GetInterNeighbour(pMb->sliceId, pMb->mbA, a, 13); + if (a[0].refIndex == refIndex) + mvPred = a[0].mv; + else + { + a[1].available = HANTRO_TRUE; + a[1].refIndex = pMb->refPic[0]; + a[1].mv = pMb->mv[0]; + + /* c is not available */ + GetInterNeighbour(pMb->sliceId, pMb->mbA, a+2, 7); + + GetPredictionMv(&mvPred, a, refIndex); + + } + mv.hor += mvPred.hor; + mv.ver += mvPred.ver; + + /* horizontal motion vector range [-2048, 2047.75] */ + if ((u32)(i32)(mv.hor+8192) >= (16384)) + return(HANTRO_NOK); + + /* vertical motion vector range [-512, 511.75] (smaller for low levels) */ + if ((u32)(i32)(mv.ver+2048) >= (4096)) + return(HANTRO_NOK); + + tmp = h264bsdGetRefPicData(dpb, refIndex); + if (tmp == NULL) + return(HANTRO_NOK); + + pMb->mv[8] = pMb->mv[9] = pMb->mv[10] = pMb->mv[11] = + pMb->mv[12] = pMb->mv[13] = pMb->mv[14] = pMb->mv[15] = mv; + pMb->refPic[2] = refIndex; + pMb->refPic[3] = refIndex; + pMb->refAddr[2] = tmp; + pMb->refAddr[3] = tmp; + + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + Function: MvPrediction8x16 + + Functional description: + Motion vector prediction for 8x16 partition mode + +------------------------------------------------------------------------------*/ + +u32 MvPrediction8x16(mbStorage_t *pMb, mbPred_t *mbPred, dpbStorage_t *dpb) +{ + +/* Variables */ + + mv_t mv; + mv_t mvPred; + interNeighbour_t a[3]; /* A, B, C */ + u32 refIndex; + u8 *tmp; + +/* Code */ + + mv = mbPred->mvdL0[0]; + refIndex = mbPred->refIdxL0[0]; + + GetInterNeighbour(pMb->sliceId, pMb->mbA, a, 5); + + if (a[0].refIndex == refIndex) + mvPred = a[0].mv; + else + { + GetInterNeighbour(pMb->sliceId, pMb->mbB, a+1, 10); + GetInterNeighbour(pMb->sliceId, pMb->mbB, a+2, 14); + if (!a[2].available) + { + GetInterNeighbour(pMb->sliceId, pMb->mbD, a+2, 15); + } + + GetPredictionMv(&mvPred, a, refIndex); + + } + mv.hor += mvPred.hor; + mv.ver += mvPred.ver; + + /* horizontal motion vector range [-2048, 2047.75] */ + if ((u32)(i32)(mv.hor+8192) >= (16384)) + return(HANTRO_NOK); + + /* vertical motion vector range [-512, 511.75] (smaller for low levels) */ + if ((u32)(i32)(mv.ver+2048) >= (4096)) + return(HANTRO_NOK); + + tmp = h264bsdGetRefPicData(dpb, refIndex); + if (tmp == NULL) + return(HANTRO_NOK); + + pMb->mv[0] = pMb->mv[1] = pMb->mv[2] = pMb->mv[3] = + pMb->mv[8] = pMb->mv[9] = pMb->mv[10] = pMb->mv[11] = mv; + pMb->refPic[0] = refIndex; + pMb->refPic[2] = refIndex; + pMb->refAddr[0] = tmp; + pMb->refAddr[2] = tmp; + + mv = mbPred->mvdL0[1]; + refIndex = mbPred->refIdxL0[1]; + + GetInterNeighbour(pMb->sliceId, pMb->mbC, a+2, 10); + if (!a[2].available) + { + GetInterNeighbour(pMb->sliceId, pMb->mbB, a+2, 11); + } + if (a[2].refIndex == refIndex) + mvPred = a[2].mv; + else + { + a[0].available = HANTRO_TRUE; + a[0].refIndex = pMb->refPic[0]; + a[0].mv = pMb->mv[0]; + + GetInterNeighbour(pMb->sliceId, pMb->mbB, a+1, 14); + + GetPredictionMv(&mvPred, a, refIndex); + + } + mv.hor += mvPred.hor; + mv.ver += mvPred.ver; + + /* horizontal motion vector range [-2048, 2047.75] */ + if ((u32)(i32)(mv.hor+8192) >= (16384)) + return(HANTRO_NOK); + + /* vertical motion vector range [-512, 511.75] (smaller for low levels) */ + if ((u32)(i32)(mv.ver+2048) >= (4096)) + return(HANTRO_NOK); + + tmp = h264bsdGetRefPicData(dpb, refIndex); + if (tmp == NULL) + return(HANTRO_NOK); + + pMb->mv[4] = pMb->mv[5] = pMb->mv[6] = pMb->mv[7] = + pMb->mv[12] = pMb->mv[13] = pMb->mv[14] = pMb->mv[15] = mv; + pMb->refPic[1] = refIndex; + pMb->refPic[3] = refIndex; + pMb->refAddr[1] = tmp; + pMb->refAddr[3] = tmp; + + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + Function: MvPrediction8x8 + + Functional description: + Motion vector prediction for 8x8 partition mode + +------------------------------------------------------------------------------*/ + +u32 MvPrediction8x8(mbStorage_t *pMb, subMbPred_t *subMbPred, dpbStorage_t *dpb) +{ + +/* Variables */ + + u32 i, j; + u32 numSubMbPart; + +/* Code */ + + for (i = 0; i < 4; i++) + { + numSubMbPart = h264bsdNumSubMbPart(subMbPred->subMbType[i]); + pMb->refPic[i] = subMbPred->refIdxL0[i]; + pMb->refAddr[i] = h264bsdGetRefPicData(dpb, subMbPred->refIdxL0[i]); + if (pMb->refAddr[i] == NULL) + return(HANTRO_NOK); + for (j = 0; j < numSubMbPart; j++) + { + if (MvPrediction(pMb, subMbPred, i, j) != HANTRO_OK) + return(HANTRO_NOK); + } + } + + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + Function: MvPrediction + + Functional description: + Perform motion vector prediction for sub-partition + +------------------------------------------------------------------------------*/ + +u32 MvPrediction(mbStorage_t *pMb, subMbPred_t *subMbPred, u32 mbPartIdx, + u32 subMbPartIdx) +{ + +/* Variables */ + + mv_t mv, mvPred; + u32 refIndex; + subMbPartMode_e subMbPartMode; + const neighbour_t *n; + mbStorage_t *nMb; + interNeighbour_t a[3]; /* A, B, C */ + +/* Code */ + + mv = subMbPred->mvdL0[mbPartIdx][subMbPartIdx]; + subMbPartMode = h264bsdSubMbPartMode(subMbPred->subMbType[mbPartIdx]); + refIndex = subMbPred->refIdxL0[mbPartIdx]; + + n = N_A_SUB_PART[mbPartIdx][subMbPartMode]+subMbPartIdx; + nMb = h264bsdGetNeighbourMb(pMb, n->mb); + GetInterNeighbour(pMb->sliceId, nMb, a, n->index); + + n = N_B_SUB_PART[mbPartIdx][subMbPartMode]+subMbPartIdx; + nMb = h264bsdGetNeighbourMb(pMb, n->mb); + GetInterNeighbour(pMb->sliceId, nMb, a+1, n->index); + + n = N_C_SUB_PART[mbPartIdx][subMbPartMode]+subMbPartIdx; + nMb = h264bsdGetNeighbourMb(pMb, n->mb); + GetInterNeighbour(pMb->sliceId, nMb, a+2, n->index); + + if (!a[2].available) + { + n = N_D_SUB_PART[mbPartIdx][subMbPartMode]+subMbPartIdx; + nMb = h264bsdGetNeighbourMb(pMb, n->mb); + GetInterNeighbour(pMb->sliceId, nMb, a+2, n->index); + } + + GetPredictionMv(&mvPred, a, refIndex); + + mv.hor += mvPred.hor; + mv.ver += mvPred.ver; + + /* horizontal motion vector range [-2048, 2047.75] */ + if (((u32)(i32)(mv.hor+8192) >= (16384))) + return(HANTRO_NOK); + + /* vertical motion vector range [-512, 511.75] (smaller for low levels) */ + if (((u32)(i32)(mv.ver+2048) >= (4096))) + return(HANTRO_NOK); + + switch (subMbPartMode) + { + case MB_SP_8x8: + pMb->mv[4*mbPartIdx] = mv; + pMb->mv[4*mbPartIdx + 1] = mv; + pMb->mv[4*mbPartIdx + 2] = mv; + pMb->mv[4*mbPartIdx + 3] = mv; + break; + + case MB_SP_8x4: + pMb->mv[4*mbPartIdx + 2*subMbPartIdx] = mv; + pMb->mv[4*mbPartIdx + 2*subMbPartIdx + 1] = mv; + break; + + case MB_SP_4x8: + pMb->mv[4*mbPartIdx + subMbPartIdx] = mv; + pMb->mv[4*mbPartIdx + subMbPartIdx + 2] = mv; + break; + + case MB_SP_4x4: + pMb->mv[4*mbPartIdx + subMbPartIdx] = mv; + break; + } + + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + Function: MedianFilter + + Functional description: + Median filtering for motion vector prediction + +------------------------------------------------------------------------------*/ + +i32 MedianFilter(i32 a, i32 b, i32 c) +{ + +/* Variables */ + + i32 max,min,med; + +/* Code */ + + max = min = med = a; + if (b > max) + { + max = b; + } + else if (b < min) + { + min = b; + } + if (c > max) + { + med = max; + } + else if (c < min) + { + med = min; + } + else + { + med = c; + } + + return(med); +} + +/*------------------------------------------------------------------------------ + + Function: GetInterNeighbour + + Functional description: + Get availability, reference index and motion vector of a neighbour + +------------------------------------------------------------------------------*/ + +void GetInterNeighbour(u32 sliceId, mbStorage_t *nMb, + interNeighbour_t *n, u32 index) +{ + + n->available = HANTRO_FALSE; + n->refIndex = 0xFFFFFFFF; + n->mv.hor = n->mv.ver = 0; + + if (nMb && (sliceId == nMb->sliceId)) + { + u32 tmp; + mv_t tmpMv; + + tmp = nMb->mbType; + n->available = HANTRO_TRUE; + /* MbPartPredMode "inlined" */ + if (tmp <= P_8x8ref0) + { + tmpMv = nMb->mv[index]; + tmp = nMb->refPic[index>>2]; + n->refIndex = tmp; + n->mv = tmpMv; + } + } + +} + +/*------------------------------------------------------------------------------ + + Function: GetPredictionMv + + Functional description: + Compute motion vector predictor based on neighbours A, B and C + +------------------------------------------------------------------------------*/ + +void GetPredictionMv(mv_t *mv, interNeighbour_t *a, u32 refIndex) +{ + + if ( a[1].available || a[2].available || !a[0].available) + { + u32 isA, isB, isC; + isA = (a[0].refIndex == refIndex) ? HANTRO_TRUE : HANTRO_FALSE; + isB = (a[1].refIndex == refIndex) ? HANTRO_TRUE : HANTRO_FALSE; + isC = (a[2].refIndex == refIndex) ? HANTRO_TRUE : HANTRO_FALSE; + + if (((u32)isA+(u32)isB+(u32)isC) != 1) + { + mv->hor = (i16)MedianFilter(a[0].mv.hor, a[1].mv.hor, a[2].mv.hor); + mv->ver = (i16)MedianFilter(a[0].mv.ver, a[1].mv.ver, a[2].mv.ver); + } + else if (isA) + *mv = a[0].mv; + else if (isB) + *mv = a[1].mv; + else + *mv = a[2].mv; + } + else + { + *mv = a[0].mv; + } + +} + + diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_inter_prediction.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_inter_prediction.h new file mode 100755 index 0000000000000000000000000000000000000000..94dee259eae4de330d984efb382b27af8bad95bc --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_inter_prediction.h @@ -0,0 +1,56 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. Module defines + 3. Data types + 4. Function prototypes + +------------------------------------------------------------------------------*/ + +#ifndef H264SWDEC_INTER_PREDICTION_H +#define H264SWDEC_INTER_PREDICTION_H + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#include "basetype.h" +#include "h264bsd_image.h" +#include "h264bsd_macroblock_layer.h" +#include "h264bsd_dpb.h" + +/*------------------------------------------------------------------------------ + 2. Module defines +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 3. Data types +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 4. Function prototypes +------------------------------------------------------------------------------*/ + +u32 h264bsdInterPrediction(mbStorage_t *pMb, macroblockLayer_t *pMbLayer, + dpbStorage_t *dpb, u32 mbNum, image_t *image, u8 *data); + +#endif /* #ifdef H264SWDEC_INTER_PREDICTION_H */ + diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_intra_prediction.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_intra_prediction.c new file mode 100755 index 0000000000000000000000000000000000000000..15eabfb63e8314d975c71810265b27d4be42373f --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_intra_prediction.c @@ -0,0 +1,1937 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. External compiler flags + 3. Module defines + 4. Local function prototypes + 5. Functions + h264bsdIntraPrediction + h264bsdGetNeighbourPels + h264bsdIntra16x16Prediction + h264bsdIntra4x4Prediction + h264bsdIntraChromaPrediction + h264bsdAddResidual + Intra16x16VerticalPrediction + Intra16x16HorizontalPrediction + Intra16x16DcPrediction + Intra16x16PlanePrediction + IntraChromaDcPrediction + IntraChromaHorizontalPrediction + IntraChromaVerticalPrediction + IntraChromaPlanePrediction + Get4x4NeighbourPels + Write4x4To16x16 + Intra4x4VerticalPrediction + Intra4x4HorizontalPrediction + Intra4x4DcPrediction + Intra4x4DiagonalDownLeftPrediction + Intra4x4DiagonalDownRightPrediction + Intra4x4VerticalRightPrediction + Intra4x4HorizontalDownPrediction + Intra4x4VerticalLeftPrediction + Intra4x4HorizontalUpPrediction + DetermineIntra4x4PredMode + +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#include "h264bsd_intra_prediction.h" +#include "h264bsd_util.h" +#include "h264bsd_macroblock_layer.h" +#include "h264bsd_neighbour.h" +#include "h264bsd_image.h" + +#ifdef H264DEC_OMXDL +#include "omxtypes.h" +#include "omxVC.h" +#endif /* H264DEC_OMXDL */ + +/*------------------------------------------------------------------------------ + 2. External compiler flags +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- + 3. Module defines +------------------------------------------------------------------------------*/ + +/* Switch off the following Lint messages for this file: + * Info 702: Shift right of signed quantity (int) + */ +/*lint -e702 */ + + +/* x- and y-coordinates for each block */ +const u32 h264bsdBlockX[16] = + { 0, 4, 0, 4, 8, 12, 8, 12, 0, 4, 0, 4, 8, 12, 8, 12 }; +const u32 h264bsdBlockY[16] = + { 0, 0, 4, 4, 0, 0, 4, 4, 8, 8, 12, 12, 8, 8, 12, 12 }; + +const u8 h264bsdClip[1280] = +{ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, + 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31, + 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47, + 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63, + 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79, + 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95, + 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111, + 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127, + 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, + 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159, + 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175, + 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191, + 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207, + 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223, + 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239, + 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255 +}; + +#ifndef H264DEC_OMXDL +/*------------------------------------------------------------------------------ + 4. Local function prototypes +------------------------------------------------------------------------------*/ +static void Get4x4NeighbourPels(u8 *a, u8 *l, u8 *data, u8 *above, u8 *left, + u32 blockNum); +static void Intra16x16VerticalPrediction(u8 *data, u8 *above); +static void Intra16x16HorizontalPrediction(u8 *data, u8 *left); +static void Intra16x16DcPrediction(u8 *data, u8 *above, u8 *left, + u32 A, u32 B); +static void Intra16x16PlanePrediction(u8 *data, u8 *above, u8 *left); +static void IntraChromaDcPrediction(u8 *data, u8 *above, u8 *left, + u32 A, u32 B); +static void IntraChromaHorizontalPrediction(u8 *data, u8 *left); +static void IntraChromaVerticalPrediction(u8 *data, u8 *above); +static void IntraChromaPlanePrediction(u8 *data, u8 *above, u8 *left); + +static void Intra4x4VerticalPrediction(u8 *data, u8 *above); +static void Intra4x4HorizontalPrediction(u8 *data, u8 *left); +static void Intra4x4DcPrediction(u8 *data, u8 *above, u8 *left, u32 A, u32 B); +static void Intra4x4DiagonalDownLeftPrediction(u8 *data, u8 *above); +static void Intra4x4DiagonalDownRightPrediction(u8 *data, u8 *above, u8 *left); +static void Intra4x4VerticalRightPrediction(u8 *data, u8 *above, u8 *left); +static void Intra4x4HorizontalDownPrediction(u8 *data, u8 *above, u8 *left); +static void Intra4x4VerticalLeftPrediction(u8 *data, u8 *above); +static void Intra4x4HorizontalUpPrediction(u8 *data, u8 *left); +void h264bsdAddResidual(u8 *data, i32 *residual, u32 blockNum); + +static void Write4x4To16x16(u8 *data, u8 *data4x4, u32 blockNum); +#endif /* H264DEC_OMXDL */ + +static u32 DetermineIntra4x4PredMode(macroblockLayer_t *pMbLayer, + u32 available, neighbour_t *nA, neighbour_t *nB, u32 index, + mbStorage_t *nMbA, mbStorage_t *nMbB); + + +#ifdef H264DEC_OMXDL + +/*------------------------------------------------------------------------------ + + Function: h264bsdIntra16x16Prediction + + Functional description: + Perform intra 16x16 prediction mode for luma pixels and add + residual into prediction. The resulting luma pixels are + stored in macroblock array 'data'. + +------------------------------------------------------------------------------*/ +u32 h264bsdIntra16x16Prediction(mbStorage_t *pMb, u8 *data, u8 *ptr, + u32 width, u32 constrainedIntraPred) +{ + +/* Variables */ + + u32 availableA, availableB, availableD; + OMXResult omxRes; + +/* Code */ + ASSERT(pMb); + ASSERT(data); + ASSERT(ptr); + ASSERT(h264bsdPredModeIntra16x16(pMb->mbType) < 4); + + availableA = h264bsdIsNeighbourAvailable(pMb, pMb->mbA); + if (availableA && constrainedIntraPred && + (h264bsdMbPartPredMode(pMb->mbA->mbType) == PRED_MODE_INTER)) + availableA = HANTRO_FALSE; + availableB = h264bsdIsNeighbourAvailable(pMb, pMb->mbB); + if (availableB && constrainedIntraPred && + (h264bsdMbPartPredMode(pMb->mbB->mbType) == PRED_MODE_INTER)) + availableB = HANTRO_FALSE; + availableD = h264bsdIsNeighbourAvailable(pMb, pMb->mbD); + if (availableD && constrainedIntraPred && + (h264bsdMbPartPredMode(pMb->mbD->mbType) == PRED_MODE_INTER)) + availableD = HANTRO_FALSE; + + omxRes = omxVCM4P10_PredictIntra_16x16( (ptr-1), + (ptr - width), + (ptr - width-1), + data, + (i32)width, + 16, + (OMXVCM4P10Intra16x16PredMode) + h264bsdPredModeIntra16x16(pMb->mbType), + (i32)(availableB + (availableA<<1) + + (availableD<<5)) ); + if (omxRes != OMX_Sts_NoErr) + return HANTRO_NOK; + else + return(HANTRO_OK); +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdIntra4x4Prediction + + Functional description: + Perform intra 4x4 prediction for luma pixels and add residual + into prediction. The resulting luma pixels are stored in + macroblock array 'data'. The intra 4x4 prediction mode for each + block is stored in 'pMb' structure. + +------------------------------------------------------------------------------*/ +u32 h264bsdIntra4x4Prediction(mbStorage_t *pMb, u8 *data, + macroblockLayer_t *mbLayer, + u8 *ptr, u32 width, + u32 constrainedIntraPred, u32 block) +{ + +/* Variables */ + u32 mode; + neighbour_t neighbour, neighbourB; + mbStorage_t *nMb, *nMb2; + u32 availableA, availableB, availableC, availableD; + + OMXResult omxRes; + u32 x, y; + u8 *l, *a, *al; +/* Code */ + ASSERT(pMb); + ASSERT(data); + ASSERT(mbLayer); + ASSERT(ptr); + ASSERT(pMb->intra4x4PredMode[block] < 9); + + neighbour = *h264bsdNeighbour4x4BlockA(block); + nMb = h264bsdGetNeighbourMb(pMb, neighbour.mb); + availableA = h264bsdIsNeighbourAvailable(pMb, nMb); + if (availableA && constrainedIntraPred && + ( h264bsdMbPartPredMode(nMb->mbType) == PRED_MODE_INTER) ) + { + availableA = HANTRO_FALSE; + } + + neighbourB = *h264bsdNeighbour4x4BlockB(block); + nMb2 = h264bsdGetNeighbourMb(pMb, neighbourB.mb); + availableB = h264bsdIsNeighbourAvailable(pMb, nMb2); + if (availableB && constrainedIntraPred && + ( h264bsdMbPartPredMode(nMb2->mbType) == PRED_MODE_INTER) ) + { + availableB = HANTRO_FALSE; + } + + mode = DetermineIntra4x4PredMode(mbLayer, + (u32)(availableA && availableB), + &neighbour, &neighbourB, block, nMb, nMb2); + pMb->intra4x4PredMode[block] = (u8)mode; + + neighbour = *h264bsdNeighbour4x4BlockC(block); + nMb = h264bsdGetNeighbourMb(pMb, neighbour.mb); + availableC = h264bsdIsNeighbourAvailable(pMb, nMb); + if (availableC && constrainedIntraPred && + ( h264bsdMbPartPredMode(nMb->mbType) == PRED_MODE_INTER) ) + { + availableC = HANTRO_FALSE; + } + + neighbour = *h264bsdNeighbour4x4BlockD(block); + nMb = h264bsdGetNeighbourMb(pMb, neighbour.mb); + availableD = h264bsdIsNeighbourAvailable(pMb, nMb); + if (availableD && constrainedIntraPred && + ( h264bsdMbPartPredMode(nMb->mbType) == PRED_MODE_INTER) ) + { + availableD = HANTRO_FALSE; + } + + x = h264bsdBlockX[block]; + y = h264bsdBlockY[block]; + + if (y == 0) + a = ptr - width + x; + else + a = data-16; + + if (x == 0) + l = ptr + y * width -1; + else + { + l = data-1; + width = 16; + } + + if (x == 0) + al = l-width; + else + al = a-1; + + omxRes = omxVCM4P10_PredictIntra_4x4( l, + a, + al, + data, + (i32)width, + 16, + (OMXVCM4P10Intra4x4PredMode)mode, + (i32)(availableB + + (availableA<<1) + + (availableD<<5) + + (availableC<<6)) ); + if (omxRes != OMX_Sts_NoErr) + return HANTRO_NOK; + + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdIntraChromaPrediction + + Functional description: + Perform intra prediction for chroma pixels and add residual + into prediction. The resulting chroma pixels are stored in 'data'. + +------------------------------------------------------------------------------*/ +u32 h264bsdIntraChromaPrediction(mbStorage_t *pMb, u8 *data, image_t *image, + u32 predMode, u32 constrainedIntraPred) +{ + +/* Variables */ + + u32 availableA, availableB, availableD; + OMXResult omxRes; + u8 *ptr; + u32 width; + +/* Code */ + ASSERT(pMb); + ASSERT(data); + ASSERT(image); + ASSERT(predMode < 4); + + availableA = h264bsdIsNeighbourAvailable(pMb, pMb->mbA); + if (availableA && constrainedIntraPred && + (h264bsdMbPartPredMode(pMb->mbA->mbType) == PRED_MODE_INTER)) + availableA = HANTRO_FALSE; + availableB = h264bsdIsNeighbourAvailable(pMb, pMb->mbB); + if (availableB && constrainedIntraPred && + (h264bsdMbPartPredMode(pMb->mbB->mbType) == PRED_MODE_INTER)) + availableB = HANTRO_FALSE; + availableD = h264bsdIsNeighbourAvailable(pMb, pMb->mbD); + if (availableD && constrainedIntraPred && + (h264bsdMbPartPredMode(pMb->mbD->mbType) == PRED_MODE_INTER)) + availableD = HANTRO_FALSE; + + ptr = image->cb; + width = image->width*8; + + omxRes = omxVCM4P10_PredictIntraChroma_8x8( (ptr-1), + (ptr - width), + (ptr - width -1), + data, + (i32)width, + 8, + (OMXVCM4P10IntraChromaPredMode) + predMode, + (i32)(availableB + + (availableA<<1) + + (availableD<<5)) ); + if (omxRes != OMX_Sts_NoErr) + return HANTRO_NOK; + + /* advance pointers */ + data += 64; + ptr = image->cr; + + omxRes = omxVCM4P10_PredictIntraChroma_8x8( (ptr-1), + (ptr - width), + (ptr - width -1), + data, + (i32)width, + 8, + (OMXVCM4P10IntraChromaPredMode) + predMode, + (i32)(availableB + + (availableA<<1) + + (availableD<<5)) ); + if (omxRes != OMX_Sts_NoErr) + return HANTRO_NOK; + + return(HANTRO_OK); + +} + + +#else /* H264DEC_OMXDL */ + + +/*------------------------------------------------------------------------------ + + Function: h264bsdIntraPrediction + + Functional description: + Processes one intra macroblock. Performs intra prediction using + specified prediction mode. Writes the final macroblock + (prediction + residual) into the output image (image) + + Inputs: + pMb pointer to macroblock specific information + mbLayer pointer to current macroblock data from stream + image pointer to output image + mbNum current macroblock number + constrainedIntraPred flag specifying if neighbouring inter + macroblocks are used in intra prediction + data pointer where output macroblock will be stored + + Outputs: + pMb structure is updated with current macroblock + image current macroblock is written into image + data current macroblock is stored here + + Returns: + HANTRO_OK success + HANTRO_NOK error in intra prediction + +------------------------------------------------------------------------------*/ +u32 h264bsdIntraPrediction(mbStorage_t *pMb, macroblockLayer_t *mbLayer, + image_t *image, u32 mbNum, u32 constrainedIntraPred, u8 *data) +{ + +/* Variables */ + + /* pelAbove and pelLeft contain samples above and left to the current + * macroblock. Above array contains also sample above-left to the current + * mb as well as 4 samples above-right to the current mb (latter only for + * luma) */ + /* lumD + lumB + lumC + cbD + cbB + crD + crB */ + u8 pelAbove[1 + 16 + 4 + 1 + 8 + 1 + 8]; + /* lumA + cbA + crA */ + u8 pelLeft[16 + 8 + 8]; + u32 tmp; + +/* Code */ + + ASSERT(pMb); + ASSERT(image); + ASSERT(mbNum < image->width * image->height); + ASSERT(h264bsdMbPartPredMode(pMb->mbType) != PRED_MODE_INTER); + + h264bsdGetNeighbourPels(image, pelAbove, pelLeft, mbNum); + + if (h264bsdMbPartPredMode(pMb->mbType) == PRED_MODE_INTRA16x16) + { + tmp = h264bsdIntra16x16Prediction(pMb, data, mbLayer->residual.level, + pelAbove, pelLeft, constrainedIntraPred); + if (tmp != HANTRO_OK) + return(tmp); + } + else + { + tmp = h264bsdIntra4x4Prediction(pMb, data, mbLayer, + pelAbove, pelLeft, constrainedIntraPred); + if (tmp != HANTRO_OK) + return(tmp); + } + + tmp = h264bsdIntraChromaPrediction(pMb, data + 256, + mbLayer->residual.level+16, pelAbove + 21, pelLeft + 16, + mbLayer->mbPred.intraChromaPredMode, constrainedIntraPred); + if (tmp != HANTRO_OK) + return(tmp); + + /* if decoded flag > 1 -> mb has already been successfully decoded and + * written to output -> do not write again */ + if (pMb->decoded > 1) + return HANTRO_OK; + + h264bsdWriteMacroblock(image, data); + + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdGetNeighbourPels + + Functional description: + Get pixel values from neighbouring macroblocks into 'above' + and 'left' arrays. + +------------------------------------------------------------------------------*/ + +void h264bsdGetNeighbourPels(image_t *image, u8 *above, u8 *left, u32 mbNum) +{ + +/* Variables */ + + u32 i; + u32 width, picSize; + u8 *ptr, *tmp; + u32 row, col; + +/* Code */ + + ASSERT(image); + ASSERT(above); + ASSERT(left); + ASSERT(mbNum < image->width * image->height); + + if (!mbNum) + return; + + width = image->width; + picSize = width * image->height; + row = mbNum / width; + col = mbNum - row * width; + + width *= 16; + ptr = image->data + row * 16 * width + col * 16; + + /* note that luma samples above-right to current macroblock do not make + * sense when current mb is the right-most mb in a row. Same applies to + * sample above-left if col is zero. However, usage of pels in prediction + * is controlled by neighbour availability information in actual prediction + * process */ + if (row) + { + tmp = ptr - (width + 1); + for (i = 21; i--;) + *above++ = *tmp++; + } + + if (col) + { + ptr--; + for (i = 16; i--; ptr+=width) + *left++ = *ptr; + } + + width >>= 1; + ptr = image->data + picSize * 256 + row * 8 * width + col * 8; + + if (row) + { + tmp = ptr - (width + 1); + for (i = 9; i--;) + *above++ = *tmp++; + tmp += (picSize * 64) - 9; + for (i = 9; i--;) + *above++ = *tmp++; + } + + if (col) + { + ptr--; + for (i = 8; i--; ptr+=width) + *left++ = *ptr; + ptr += (picSize * 64) - 8 * width; + for (i = 8; i--; ptr+=width) + *left++ = *ptr; + } +} + +/*------------------------------------------------------------------------------ + + Function: Intra16x16Prediction + + Functional description: + Perform intra 16x16 prediction mode for luma pixels and add + residual into prediction. The resulting luma pixels are + stored in macroblock array 'data'. + +------------------------------------------------------------------------------*/ + +u32 h264bsdIntra16x16Prediction(mbStorage_t *pMb, u8 *data, i32 residual[][16], + u8 *above, u8 *left, u32 constrainedIntraPred) +{ + +/* Variables */ + + u32 i; + u32 availableA, availableB, availableD; + +/* Code */ + + ASSERT(data); + ASSERT(residual); + ASSERT(above); + ASSERT(left); + ASSERT(h264bsdPredModeIntra16x16(pMb->mbType) < 4); + + availableA = h264bsdIsNeighbourAvailable(pMb, pMb->mbA); + if (availableA && constrainedIntraPred && + (h264bsdMbPartPredMode(pMb->mbA->mbType) == PRED_MODE_INTER)) + availableA = HANTRO_FALSE; + availableB = h264bsdIsNeighbourAvailable(pMb, pMb->mbB); + if (availableB && constrainedIntraPred && + (h264bsdMbPartPredMode(pMb->mbB->mbType) == PRED_MODE_INTER)) + availableB = HANTRO_FALSE; + availableD = h264bsdIsNeighbourAvailable(pMb, pMb->mbD); + if (availableD && constrainedIntraPred && + (h264bsdMbPartPredMode(pMb->mbD->mbType) == PRED_MODE_INTER)) + availableD = HANTRO_FALSE; + + switch(h264bsdPredModeIntra16x16(pMb->mbType)) + { + case 0: /* Intra_16x16_Vertical */ + if (!availableB) + return(HANTRO_NOK); + Intra16x16VerticalPrediction(data, above+1); + break; + + case 1: /* Intra_16x16_Horizontal */ + if (!availableA) + return(HANTRO_NOK); + Intra16x16HorizontalPrediction(data, left); + break; + + case 2: /* Intra_16x16_DC */ + Intra16x16DcPrediction(data, above+1, left, availableA, availableB); + break; + + default: /* case 3: Intra_16x16_Plane */ + if (!availableA || !availableB || !availableD) + return(HANTRO_NOK); + Intra16x16PlanePrediction(data, above+1, left); + break; + } + /* add residual */ + for (i = 0; i < 16; i++) + h264bsdAddResidual(data, residual[i], i); + + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + Function: Intra4x4Prediction + + Functional description: + Perform intra 4x4 prediction for luma pixels and add residual + into prediction. The resulting luma pixels are stored in + macroblock array 'data'. The intra 4x4 prediction mode for each + block is stored in 'pMb' structure. + +------------------------------------------------------------------------------*/ + +u32 h264bsdIntra4x4Prediction(mbStorage_t *pMb, u8 *data, + macroblockLayer_t *mbLayer, u8 *above, + u8 *left, u32 constrainedIntraPred) +{ + +/* Variables */ + + u32 block; + u32 mode; + neighbour_t neighbour, neighbourB; + mbStorage_t *nMb, *nMb2; + u8 a[1 + 4 + 4], l[1 + 4]; + u32 data4x4[4]; + u32 availableA, availableB, availableC, availableD; + +/* Code */ + + ASSERT(data); + ASSERT(mbLayer); + ASSERT(above); + ASSERT(left); + + for (block = 0; block < 16; block++) + { + + ASSERT(pMb->intra4x4PredMode[block] < 9); + + neighbour = *h264bsdNeighbour4x4BlockA(block); + nMb = h264bsdGetNeighbourMb(pMb, neighbour.mb); + availableA = h264bsdIsNeighbourAvailable(pMb, nMb); + if (availableA && constrainedIntraPred && + ( h264bsdMbPartPredMode(nMb->mbType) == PRED_MODE_INTER) ) + { + availableA = HANTRO_FALSE; + } + + neighbourB = *h264bsdNeighbour4x4BlockB(block); + nMb2 = h264bsdGetNeighbourMb(pMb, neighbourB.mb); + availableB = h264bsdIsNeighbourAvailable(pMb, nMb2); + if (availableB && constrainedIntraPred && + ( h264bsdMbPartPredMode(nMb2->mbType) == PRED_MODE_INTER) ) + { + availableB = HANTRO_FALSE; + } + + mode = DetermineIntra4x4PredMode(mbLayer, + (u32)(availableA && availableB), + &neighbour, &neighbourB, block, nMb, nMb2); + pMb->intra4x4PredMode[block] = (u8)mode; + + neighbour = *h264bsdNeighbour4x4BlockC(block); + nMb = h264bsdGetNeighbourMb(pMb, neighbour.mb); + availableC = h264bsdIsNeighbourAvailable(pMb, nMb); + if (availableC && constrainedIntraPred && + ( h264bsdMbPartPredMode(nMb->mbType) == PRED_MODE_INTER) ) + { + availableC = HANTRO_FALSE; + } + + neighbour = *h264bsdNeighbour4x4BlockD(block); + nMb = h264bsdGetNeighbourMb(pMb, neighbour.mb); + availableD = h264bsdIsNeighbourAvailable(pMb, nMb); + if (availableD && constrainedIntraPred && + ( h264bsdMbPartPredMode(nMb->mbType) == PRED_MODE_INTER) ) + { + availableD = HANTRO_FALSE; + } + + Get4x4NeighbourPels(a, l, data, above, left, block); + + switch(mode) + { + case 0: /* Intra_4x4_Vertical */ + if (!availableB) + return(HANTRO_NOK); + Intra4x4VerticalPrediction((u8*)data4x4, a + 1); + break; + case 1: /* Intra_4x4_Horizontal */ + if (!availableA) + return(HANTRO_NOK); + Intra4x4HorizontalPrediction((u8*)data4x4, l + 1); + break; + case 2: /* Intra_4x4_DC */ + Intra4x4DcPrediction((u8*)data4x4, a + 1, l + 1, + availableA, availableB); + break; + case 3: /* Intra_4x4_Diagonal_Down_Left */ + if (!availableB) + return(HANTRO_NOK); + if (!availableC) + { + a[5] = a[6] = a[7] = a[8] = a[4]; + } + Intra4x4DiagonalDownLeftPrediction((u8*)data4x4, a + 1); + break; + case 4: /* Intra_4x4_Diagonal_Down_Right */ + if (!availableA || !availableB || !availableD) + return(HANTRO_NOK); + Intra4x4DiagonalDownRightPrediction((u8*)data4x4, a + 1, l + 1); + break; + case 5: /* Intra_4x4_Vertical_Right */ + if (!availableA || !availableB || !availableD) + return(HANTRO_NOK); + Intra4x4VerticalRightPrediction((u8*)data4x4, a + 1, l + 1); + break; + case 6: /* Intra_4x4_Horizontal_Down */ + if (!availableA || !availableB || !availableD) + return(HANTRO_NOK); + Intra4x4HorizontalDownPrediction((u8*)data4x4, a + 1, l + 1); + break; + case 7: /* Intra_4x4_Vertical_Left */ + if (!availableB) + return(HANTRO_NOK); + if (!availableC) + { + a[5] = a[6] = a[7] = a[8] = a[4]; + } + Intra4x4VerticalLeftPrediction((u8*)data4x4, a + 1); + break; + default: /* case 8 Intra_4x4_Horizontal_Up */ + if (!availableA) + return(HANTRO_NOK); + Intra4x4HorizontalUpPrediction((u8*)data4x4, l + 1); + break; + } + + Write4x4To16x16(data, (u8*)data4x4, block); + h264bsdAddResidual(data, mbLayer->residual.level[block], block); + } + + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + Function: IntraChromaPrediction + + Functional description: + Perform intra prediction for chroma pixels and add residual + into prediction. The resulting chroma pixels are stored in 'data'. + +------------------------------------------------------------------------------*/ + +u32 h264bsdIntraChromaPrediction(mbStorage_t *pMb, u8 *data, i32 residual[][16], + u8 *above, u8 *left, u32 predMode, u32 constrainedIntraPred) +{ + +/* Variables */ + + u32 i, comp, block; + u32 availableA, availableB, availableD; + +/* Code */ + + ASSERT(data); + ASSERT(residual); + ASSERT(above); + ASSERT(left); + ASSERT(predMode < 4); + + availableA = h264bsdIsNeighbourAvailable(pMb, pMb->mbA); + if (availableA && constrainedIntraPred && + (h264bsdMbPartPredMode(pMb->mbA->mbType) == PRED_MODE_INTER)) + availableA = HANTRO_FALSE; + availableB = h264bsdIsNeighbourAvailable(pMb, pMb->mbB); + if (availableB && constrainedIntraPred && + (h264bsdMbPartPredMode(pMb->mbB->mbType) == PRED_MODE_INTER)) + availableB = HANTRO_FALSE; + availableD = h264bsdIsNeighbourAvailable(pMb, pMb->mbD); + if (availableD && constrainedIntraPred && + (h264bsdMbPartPredMode(pMb->mbD->mbType) == PRED_MODE_INTER)) + availableD = HANTRO_FALSE; + + for (comp = 0, block = 16; comp < 2; comp++) + { + switch(predMode) + { + case 0: /* Intra_Chroma_DC */ + IntraChromaDcPrediction(data, above+1, left, availableA, + availableB); + break; + + case 1: /* Intra_Chroma_Horizontal */ + if (!availableA) + return(HANTRO_NOK); + IntraChromaHorizontalPrediction(data, left); + break; + + case 2: /* Intra_Chroma_Vertical */ + if (!availableB) + return(HANTRO_NOK); + IntraChromaVerticalPrediction(data, above+1); + + break; + + default: /* case 3: Intra_Chroma_Plane */ + if (!availableA || !availableB || !availableD) + return(HANTRO_NOK); + IntraChromaPlanePrediction(data, above+1, left); + break; + } + for (i = 0; i < 4; i++, block++) + h264bsdAddResidual(data, residual[i], block); + + /* advance pointers */ + data += 64; + above += 9; + left += 8; + residual += 4; + } + + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdAddResidual + + Functional description: + Add residual of a block into prediction in macroblock array 'data'. + The result (residual + prediction) is stored in 'data'. + +------------------------------------------------------------------------------*/ +#ifndef H264DEC_OMXDL +void h264bsdAddResidual(u8 *data, i32 *residual, u32 blockNum) +{ + +/* Variables */ + + u32 i; + u32 x, y; + u32 width; + i32 tmp1, tmp2, tmp3, tmp4; + u8 *tmp; + const u8 *clp = h264bsdClip + 512; + +/* Code */ + + ASSERT(data); + ASSERT(residual); + ASSERT(blockNum < 16 + 4 + 4); + + if (IS_RESIDUAL_EMPTY(residual)) + return; + + RANGE_CHECK_ARRAY(residual, -512, 511, 16); + + if (blockNum < 16) + { + width = 16; + x = h264bsdBlockX[blockNum]; + y = h264bsdBlockY[blockNum]; + } + else + { + width = 8; + x = h264bsdBlockX[blockNum & 0x3]; + y = h264bsdBlockY[blockNum & 0x3]; + } + + tmp = data + y*width + x; + for (i = 4; i; i--) + { + tmp1 = *residual++; + tmp2 = tmp[0]; + tmp3 = *residual++; + tmp4 = tmp[1]; + + tmp[0] = clp[tmp1 + tmp2]; + + tmp1 = *residual++; + tmp2 = tmp[2]; + + tmp[1] = clp[tmp3 + tmp4]; + + tmp3 = *residual++; + tmp4 = tmp[3]; + + tmp1 = clp[tmp1 + tmp2]; + tmp3 = clp[tmp3 + tmp4]; + tmp[2] = (u8)tmp1; + tmp[3] = (u8)tmp3; + + tmp += width; + } + +} +#endif +/*------------------------------------------------------------------------------ + + Function: Intra16x16VerticalPrediction + + Functional description: + Perform intra 16x16 vertical prediction mode. + +------------------------------------------------------------------------------*/ + +void Intra16x16VerticalPrediction(u8 *data, u8 *above) +{ + +/* Variables */ + + u32 i, j; + +/* Code */ + + ASSERT(data); + ASSERT(above); + + for (i = 0; i < 16; i++) + { + for (j = 0; j < 16; j++) + { + *data++ = above[j]; + } + } + +} + +/*------------------------------------------------------------------------------ + + Function: Intra16x16HorizontalPrediction + + Functional description: + Perform intra 16x16 horizontal prediction mode. + +------------------------------------------------------------------------------*/ + +void Intra16x16HorizontalPrediction(u8 *data, u8 *left) +{ + +/* Variables */ + + u32 i, j; + +/* Code */ + + ASSERT(data); + ASSERT(left); + + for (i = 0; i < 16; i++) + { + for (j = 0; j < 16; j++) + { + *data++ = left[i]; + } + } + +} + +/*------------------------------------------------------------------------------ + + Function: Intra16x16DcPrediction + + Functional description: + Perform intra 16x16 DC prediction mode. + +------------------------------------------------------------------------------*/ + +void Intra16x16DcPrediction(u8 *data, u8 *above, u8 *left, u32 availableA, + u32 availableB) +{ + +/* Variables */ + + u32 i, tmp; + +/* Code */ + + ASSERT(data); + ASSERT(above); + ASSERT(left); + + if (availableA && availableB) + { + for (i = 0, tmp = 0; i < 16; i++) + tmp += above[i] + left[i]; + tmp = (tmp + 16) >> 5; + } + else if (availableA) + { + for (i = 0, tmp = 0; i < 16; i++) + tmp += left[i]; + tmp = (tmp + 8) >> 4; + } + else if (availableB) + { + for (i = 0, tmp = 0; i < 16; i++) + tmp += above[i]; + tmp = (tmp + 8) >> 4; + } + /* neither A nor B available */ + else + { + tmp = 128; + } + for (i = 0; i < 256; i++) + data[i] = (u8)tmp; + +} + +/*------------------------------------------------------------------------------ + + Function: Intra16x16PlanePrediction + + Functional description: + Perform intra 16x16 plane prediction mode. + +------------------------------------------------------------------------------*/ + +void Intra16x16PlanePrediction(u8 *data, u8 *above, u8 *left) +{ + +/* Variables */ + + u32 i, j; + i32 a, b, c; + i32 tmp; + +/* Code */ + + ASSERT(data); + ASSERT(above); + ASSERT(left); + + a = 16 * (above[15] + left[15]); + + for (i = 0, b = 0; i < 8; i++) + b += ((i32)i + 1) * (above[8+i] - above[6-i]); + b = (5 * b + 32) >> 6; + + for (i = 0, c = 0; i < 7; i++) + c += ((i32)i + 1) * (left[8+i] - left[6-i]); + /* p[-1,-1] has to be accessed through above pointer */ + c += ((i32)i + 1) * (left[8+i] - above[-1]); + c = (5 * c + 32) >> 6; + + for (i = 0; i < 16; i++) + { + for (j = 0; j < 16; j++) + { + tmp = (a + b * ((i32)j - 7) + c * ((i32)i - 7) + 16) >> 5; + data[i*16+j] = (u8)CLIP1(tmp); + } + } + +} + +/*------------------------------------------------------------------------------ + + Function: IntraChromaDcPrediction + + Functional description: + Perform intra chroma DC prediction mode. + +------------------------------------------------------------------------------*/ + +void IntraChromaDcPrediction(u8 *data, u8 *above, u8 *left, u32 availableA, + u32 availableB) +{ + +/* Variables */ + + u32 i; + u32 tmp1, tmp2; + +/* Code */ + + ASSERT(data); + ASSERT(above); + ASSERT(left); + + /* y = 0..3 */ + if (availableA && availableB) + { + tmp1 = above[0] + above[1] + above[2] + above[3] + + left[0] + left[1] + left[2] + left[3]; + tmp1 = (tmp1 + 4) >> 3; + tmp2 = (above[4] + above[5] + above[6] + above[7] + 2) >> 2; + } + else if (availableB) + { + tmp1 = (above[0] + above[1] + above[2] + above[3] + 2) >> 2; + tmp2 = (above[4] + above[5] + above[6] + above[7] + 2) >> 2; + } + else if (availableA) + { + tmp1 = (left[0] + left[1] + left[2] + left[3] + 2) >> 2; + tmp2 = tmp1; + } + /* neither A nor B available */ + else + { + tmp1 = tmp2 = 128; + } + + ASSERT(tmp1 < 256 && tmp2 < 256); + for (i = 4; i--;) + { + *data++ = (u8)tmp1; + *data++ = (u8)tmp1; + *data++ = (u8)tmp1; + *data++ = (u8)tmp1; + *data++ = (u8)tmp2; + *data++ = (u8)tmp2; + *data++ = (u8)tmp2; + *data++ = (u8)tmp2; + } + + /* y = 4...7 */ + if (availableA) + { + tmp1 = (left[4] + left[5] + left[6] + left[7] + 2) >> 2; + if (availableB) + { + tmp2 = above[4] + above[5] + above[6] + above[7] + + left[4] + left[5] + left[6] + left[7]; + tmp2 = (tmp2 + 4) >> 3; + } + else + tmp2 = tmp1; + } + else if (availableB) + { + tmp1 = (above[0] + above[1] + above[2] + above[3] + 2) >> 2; + tmp2 = (above[4] + above[5] + above[6] + above[7] + 2) >> 2; + } + else + { + tmp1 = tmp2 = 128; + } + + ASSERT(tmp1 < 256 && tmp2 < 256); + for (i = 4; i--;) + { + *data++ = (u8)tmp1; + *data++ = (u8)tmp1; + *data++ = (u8)tmp1; + *data++ = (u8)tmp1; + *data++ = (u8)tmp2; + *data++ = (u8)tmp2; + *data++ = (u8)tmp2; + *data++ = (u8)tmp2; + } +} + +/*------------------------------------------------------------------------------ + + Function: IntraChromaHorizontalPrediction + + Functional description: + Perform intra chroma horizontal prediction mode. + +------------------------------------------------------------------------------*/ + +void IntraChromaHorizontalPrediction(u8 *data, u8 *left) +{ + +/* Variables */ + + u32 i; + +/* Code */ + + ASSERT(data); + ASSERT(left); + + for (i = 8; i--;) + { + *data++ = *left; + *data++ = *left; + *data++ = *left; + *data++ = *left; + *data++ = *left; + *data++ = *left; + *data++ = *left; + *data++ = *left++; + } + +} + +/*------------------------------------------------------------------------------ + + Function: IntraChromaVerticalPrediction + + Functional description: + Perform intra chroma vertical prediction mode. + +------------------------------------------------------------------------------*/ + +void IntraChromaVerticalPrediction(u8 *data, u8 *above) +{ + +/* Variables */ + + u32 i; + +/* Code */ + + ASSERT(data); + ASSERT(above); + + for (i = 8; i--;data++/*above-=8*/) + { + data[0] = *above; + data[8] = *above; + data[16] = *above; + data[24] = *above; + data[32] = *above; + data[40] = *above; + data[48] = *above; + data[56] = *above++; + } + +} + +/*------------------------------------------------------------------------------ + + Function: IntraChromaPlanePrediction + + Functional description: + Perform intra chroma plane prediction mode. + +------------------------------------------------------------------------------*/ + +void IntraChromaPlanePrediction(u8 *data, u8 *above, u8 *left) +{ + +/* Variables */ + + u32 i; + i32 a, b, c; + i32 tmp; + const u8 *clp = h264bsdClip + 512; + +/* Code */ + + ASSERT(data); + ASSERT(above); + ASSERT(left); + + a = 16 * (above[7] + left[7]); + + b = (above[4] - above[2]) + 2 * (above[5] - above[1]) + + 3 * (above[6] - above[0]) + 4 * (above[7] - above[-1]); + b = (17 * b + 16) >> 5; + + /* p[-1,-1] has to be accessed through above pointer */ + c = (left[4] - left[2]) + 2 * (left[5] - left[1]) + + 3 * (left[6] - left[0]) + 4 * (left[7] - above[-1]); + c = (17 * c + 16) >> 5; + + /*a += 16;*/ + a = a - 3 * c + 16; + for (i = 8; i--; a += c) + { + tmp = (a - 3 * b); + *data++ = clp[tmp>>5]; + tmp += b; + *data++ = clp[tmp>>5]; + tmp += b; + *data++ = clp[tmp>>5]; + tmp += b; + *data++ = clp[tmp>>5]; + tmp += b; + *data++ = clp[tmp>>5]; + tmp += b; + *data++ = clp[tmp>>5]; + tmp += b; + *data++ = clp[tmp>>5]; + tmp += b; + *data++ = clp[tmp>>5]; + } + +} + +/*------------------------------------------------------------------------------ + + Function: Get4x4NeighbourPels + + Functional description: + Get neighbouring pixels of a 4x4 block into 'a' and 'l'. + +------------------------------------------------------------------------------*/ + +void Get4x4NeighbourPels(u8 *a, u8 *l, u8 *data, u8 *above, u8 *left, + u32 blockNum) +{ + +/* Variables */ + + u32 x, y; + u8 t1, t2; + +/* Code */ + + ASSERT(a); + ASSERT(l); + ASSERT(data); + ASSERT(above); + ASSERT(left); + ASSERT(blockNum < 16); + + x = h264bsdBlockX[blockNum]; + y = h264bsdBlockY[blockNum]; + + /* A and D */ + if (x == 0) + { + t1 = left[y ]; + t2 = left[y + 1]; + l[1] = t1; + l[2] = t2; + t1 = left[y + 2]; + t2 = left[y + 3]; + l[3] = t1; + l[4] = t2; + } + else + { + t1 = data[y * 16 + x - 1 ]; + t2 = data[y * 16 + x - 1 + 16]; + l[1] = t1; + l[2] = t2; + t1 = data[y * 16 + x - 1 + 32]; + t2 = data[y * 16 + x - 1 + 48]; + l[3] = t1; + l[4] = t2; + } + + /* B, C and D */ + if (y == 0) + { + t1 = above[x ]; + t2 = above[x ]; + l[0] = t1; + a[0] = t2; + t1 = above[x + 1]; + t2 = above[x + 2]; + a[1] = t1; + a[2] = t2; + t1 = above[x + 3]; + t2 = above[x + 4]; + a[3] = t1; + a[4] = t2; + t1 = above[x + 5]; + t2 = above[x + 6]; + a[5] = t1; + a[6] = t2; + t1 = above[x + 7]; + t2 = above[x + 8]; + a[7] = t1; + a[8] = t2; + } + else + { + t1 = data[(y - 1) * 16 + x ]; + t2 = data[(y - 1) * 16 + x + 1]; + a[1] = t1; + a[2] = t2; + t1 = data[(y - 1) * 16 + x + 2]; + t2 = data[(y - 1) * 16 + x + 3]; + a[3] = t1; + a[4] = t2; + t1 = data[(y - 1) * 16 + x + 4]; + t2 = data[(y - 1) * 16 + x + 5]; + a[5] = t1; + a[6] = t2; + t1 = data[(y - 1) * 16 + x + 6]; + t2 = data[(y - 1) * 16 + x + 7]; + a[7] = t1; + a[8] = t2; + + if (x == 0) + l[0] = a[0] = left[y-1]; + else + l[0] = a[0] = data[(y - 1) * 16 + x - 1]; + } +} + + +/*------------------------------------------------------------------------------ + + Function: Intra4x4VerticalPrediction + + Functional description: + Perform intra 4x4 vertical prediction mode. + +------------------------------------------------------------------------------*/ + +void Intra4x4VerticalPrediction(u8 *data, u8 *above) +{ + +/* Variables */ + + u8 t1, t2; + +/* Code */ + + ASSERT(data); + ASSERT(above); + + t1 = above[0]; + t2 = above[1]; + data[0] = data[4] = data[8] = data[12] = t1; + data[1] = data[5] = data[9] = data[13] = t2; + t1 = above[2]; + t2 = above[3]; + data[2] = data[6] = data[10] = data[14] = t1; + data[3] = data[7] = data[11] = data[15] = t2; + +} + +/*------------------------------------------------------------------------------ + + Function: Intra4x4HorizontalPrediction + + Functional description: + Perform intra 4x4 horizontal prediction mode. + +------------------------------------------------------------------------------*/ + +void Intra4x4HorizontalPrediction(u8 *data, u8 *left) +{ + +/* Variables */ + + u8 t1, t2; + +/* Code */ + + ASSERT(data); + ASSERT(left); + + t1 = left[0]; + t2 = left[1]; + data[0] = data[1] = data[2] = data[3] = t1; + data[4] = data[5] = data[6] = data[7] = t2; + t1 = left[2]; + t2 = left[3]; + data[8] = data[9] = data[10] = data[11] = t1; + data[12] = data[13] = data[14] = data[15] = t2; + +} + +/*------------------------------------------------------------------------------ + + Function: Intra4x4DcPrediction + + Functional description: + Perform intra 4x4 DC prediction mode. + +------------------------------------------------------------------------------*/ + +void Intra4x4DcPrediction(u8 *data, u8 *above, u8 *left, u32 availableA, + u32 availableB) +{ + +/* Variables */ + + u32 tmp; + u8 t1, t2, t3, t4; + +/* Code */ + + ASSERT(data); + ASSERT(above); + ASSERT(left); + + if (availableA && availableB) + { + t1 = above[0]; t2 = above[1]; t3 = above[2]; t4 = above[3]; + tmp = t1 + t2 + t3 + t4; + t1 = left[0]; t2 = left[1]; t3 = left[2]; t4 = left[3]; + tmp += t1 + t2 + t3 + t4; + tmp = (tmp + 4) >> 3; + } + else if (availableA) + { + t1 = left[0]; t2 = left[1]; t3 = left[2]; t4 = left[3]; + tmp = (t1 + t2 + t3 + t4 + 2) >> 2; + } + else if (availableB) + { + t1 = above[0]; t2 = above[1]; t3 = above[2]; t4 = above[3]; + tmp = (t1 + t2 + t3 + t4 + 2) >> 2; + } + else + { + tmp = 128; + } + + ASSERT(tmp < 256); + data[0] = data[1] = data[2] = data[3] = + data[4] = data[5] = data[6] = data[7] = + data[8] = data[9] = data[10] = data[11] = + data[12] = data[13] = data[14] = data[15] = (u8)tmp; + +} + +/*------------------------------------------------------------------------------ + + Function: Intra4x4DiagonalDownLeftPrediction + + Functional description: + Perform intra 4x4 diagonal down-left prediction mode. + +------------------------------------------------------------------------------*/ + +void Intra4x4DiagonalDownLeftPrediction(u8 *data, u8 *above) +{ + +/* Variables */ + +/* Code */ + + ASSERT(data); + ASSERT(above); + + data[ 0] = (above[0] + 2 * above[1] + above[2] + 2) >> 2; + data[ 1] = (above[1] + 2 * above[2] + above[3] + 2) >> 2; + data[ 4] = (above[1] + 2 * above[2] + above[3] + 2) >> 2; + data[ 2] = (above[2] + 2 * above[3] + above[4] + 2) >> 2; + data[ 5] = (above[2] + 2 * above[3] + above[4] + 2) >> 2; + data[ 8] = (above[2] + 2 * above[3] + above[4] + 2) >> 2; + data[ 3] = (above[3] + 2 * above[4] + above[5] + 2) >> 2; + data[ 6] = (above[3] + 2 * above[4] + above[5] + 2) >> 2; + data[ 9] = (above[3] + 2 * above[4] + above[5] + 2) >> 2; + data[12] = (above[3] + 2 * above[4] + above[5] + 2) >> 2; + data[ 7] = (above[4] + 2 * above[5] + above[6] + 2) >> 2; + data[10] = (above[4] + 2 * above[5] + above[6] + 2) >> 2; + data[13] = (above[4] + 2 * above[5] + above[6] + 2) >> 2; + data[11] = (above[5] + 2 * above[6] + above[7] + 2) >> 2; + data[14] = (above[5] + 2 * above[6] + above[7] + 2) >> 2; + data[15] = (above[6] + 3 * above[7] + 2) >> 2; + +} + +/*------------------------------------------------------------------------------ + + Function: Intra4x4DiagonalDownRightPrediction + + Functional description: + Perform intra 4x4 diagonal down-right prediction mode. + +------------------------------------------------------------------------------*/ + +void Intra4x4DiagonalDownRightPrediction(u8 *data, u8 *above, u8 *left) +{ + +/* Variables */ + +/* Code */ + + ASSERT(data); + ASSERT(above); + ASSERT(left); + + data[ 0] = (above[0] + 2 * above[-1] + left[0] + 2) >> 2; + data[ 5] = (above[0] + 2 * above[-1] + left[0] + 2) >> 2; + data[10] = (above[0] + 2 * above[-1] + left[0] + 2) >> 2; + data[15] = (above[0] + 2 * above[-1] + left[0] + 2) >> 2; + data[ 1] = (above[-1] + 2 * above[0] + above[1] + 2) >> 2; + data[ 6] = (above[-1] + 2 * above[0] + above[1] + 2) >> 2; + data[11] = (above[-1] + 2 * above[0] + above[1] + 2) >> 2; + data[ 2] = (above[0] + 2 * above[1] + above[2] + 2) >> 2; + data[ 7] = (above[0] + 2 * above[1] + above[2] + 2) >> 2; + data[ 3] = (above[1] + 2 * above[2] + above[3] + 2) >> 2; + data[ 4] = (left[-1] + 2 * left[0] + left[1] + 2) >> 2; + data[ 9] = (left[-1] + 2 * left[0] + left[1] + 2) >> 2; + data[14] = (left[-1] + 2 * left[0] + left[1] + 2) >> 2; + data[ 8] = (left[0] + 2 * left[1] + left[2] + 2) >> 2; + data[13] = (left[0] + 2 * left[1] + left[2] + 2) >> 2; + data[12] = (left[1] + 2 * left[2] + left[3] + 2) >> 2; +} + +/*------------------------------------------------------------------------------ + + Function: Intra4x4VerticalRightPrediction + + Functional description: + Perform intra 4x4 vertical right prediction mode. + +------------------------------------------------------------------------------*/ + +void Intra4x4VerticalRightPrediction(u8 *data, u8 *above, u8 *left) +{ + +/* Variables */ + +/* Code */ + + ASSERT(data); + ASSERT(above); + ASSERT(left); + + data[ 0] = (above[-1] + above[0] + 1) >> 1; + data[ 9] = (above[-1] + above[0] + 1) >> 1; + data[ 5] = (above[-1] + 2 * above[0] + above[1] + 2) >> 2; + data[14] = (above[-1] + 2 * above[0] + above[1] + 2) >> 2; + data[ 4] = (above[0] + 2 * above[-1] + left[0] + 2) >> 2; + data[13] = (above[0] + 2 * above[-1] + left[0] + 2) >> 2; + data[ 1] = (above[0] + above[1] + 1) >> 1; + data[10] = (above[0] + above[1] + 1) >> 1; + data[ 6] = (above[0] + 2 * above[1] + above[2] + 2) >> 2; + data[15] = (above[0] + 2 * above[1] + above[2] + 2) >> 2; + data[ 2] = (above[1] + above[2] + 1) >> 1; + data[11] = (above[1] + above[2] + 1) >> 1; + data[ 7] = (above[1] + 2 * above[2] + above[3] + 2) >> 2; + data[ 3] = (above[2] + above[3] + 1) >> 1; + data[ 8] = (left[1] + 2 * left[0] + left[-1] + 2) >> 2; + data[12] = (left[2] + 2 * left[1] + left[0] + 2) >> 2; + +} + +/*------------------------------------------------------------------------------ + + Function: Intra4x4HorizontalDownPrediction + + Functional description: + Perform intra 4x4 horizontal down prediction mode. + +------------------------------------------------------------------------------*/ + +void Intra4x4HorizontalDownPrediction(u8 *data, u8 *above, u8 *left) +{ + +/* Variables */ + +/* Code */ + + ASSERT(data); + ASSERT(above); + ASSERT(left); + + data[ 0] = (left[-1] + left[0] + 1) >> 1; + data[ 6] = (left[-1] + left[0] + 1) >> 1; + data[ 5] = (left[-1] + 2 * left[0] + left[1] + 2) >> 2; + data[11] = (left[-1] + 2 * left[0] + left[1] + 2) >> 2; + data[ 4] = (left[0] + left[1] + 1) >> 1; + data[10] = (left[0] + left[1] + 1) >> 1; + data[ 9] = (left[0] + 2 * left[1] + left[2] + 2) >> 2; + data[15] = (left[0] + 2 * left[1] + left[2] + 2) >> 2; + data[ 8] = (left[1] + left[2] + 1) >> 1; + data[14] = (left[1] + left[2] + 1) >> 1; + data[13] = (left[1] + 2 * left[2] + left[3] + 2) >> 2; + data[12] = (left[2] + left[3] + 1) >> 1; + data[ 1] = (above[0] + 2 * above[-1] + left[0] + 2) >> 2; + data[ 7] = (above[0] + 2 * above[-1] + left[0] + 2) >> 2; + data[ 2] = (above[1] + 2 * above[0] + above[-1] + 2) >> 2; + data[ 3] = (above[2] + 2 * above[1] + above[0] + 2) >> 2; +} + +/*------------------------------------------------------------------------------ + + Function: Intra4x4VerticalLeftPrediction + + Functional description: + Perform intra 4x4 vertical left prediction mode. + +------------------------------------------------------------------------------*/ + +void Intra4x4VerticalLeftPrediction(u8 *data, u8 *above) +{ + +/* Variables */ + +/* Code */ + + ASSERT(data); + ASSERT(above); + + data[ 0] = (above[0] + above[1] + 1) >> 1; + data[ 1] = (above[1] + above[2] + 1) >> 1; + data[ 2] = (above[2] + above[3] + 1) >> 1; + data[ 3] = (above[3] + above[4] + 1) >> 1; + data[ 4] = (above[0] + 2 * above[1] + above[2] + 2) >> 2; + data[ 5] = (above[1] + 2 * above[2] + above[3] + 2) >> 2; + data[ 6] = (above[2] + 2 * above[3] + above[4] + 2) >> 2; + data[ 7] = (above[3] + 2 * above[4] + above[5] + 2) >> 2; + data[ 8] = (above[1] + above[2] + 1) >> 1; + data[ 9] = (above[2] + above[3] + 1) >> 1; + data[10] = (above[3] + above[4] + 1) >> 1; + data[11] = (above[4] + above[5] + 1) >> 1; + data[12] = (above[1] + 2 * above[2] + above[3] + 2) >> 2; + data[13] = (above[2] + 2 * above[3] + above[4] + 2) >> 2; + data[14] = (above[3] + 2 * above[4] + above[5] + 2) >> 2; + data[15] = (above[4] + 2 * above[5] + above[6] + 2) >> 2; + +} + +/*------------------------------------------------------------------------------ + + Function: Intra4x4HorizontalUpPrediction + + Functional description: + Perform intra 4x4 horizontal up prediction mode. + +------------------------------------------------------------------------------*/ + +void Intra4x4HorizontalUpPrediction(u8 *data, u8 *left) +{ + +/* Variables */ + +/* Code */ + + ASSERT(data); + ASSERT(left); + + data[ 0] = (left[0] + left[1] + 1) >> 1; + data[ 1] = (left[0] + 2 * left[1] + left[2] + 2) >> 2; + data[ 2] = (left[1] + left[2] + 1) >> 1; + data[ 3] = (left[1] + 2 * left[2] + left[3] + 2) >> 2; + data[ 4] = (left[1] + left[2] + 1) >> 1; + data[ 5] = (left[1] + 2 * left[2] + left[3] + 2) >> 2; + data[ 6] = (left[2] + left[3] + 1) >> 1; + data[ 7] = (left[2] + 3 * left[3] + 2) >> 2; + data[ 8] = (left[2] + left[3] + 1) >> 1; + data[ 9] = (left[2] + 3 * left[3] + 2) >> 2; + data[10] = left[3]; + data[11] = left[3]; + data[12] = left[3]; + data[13] = left[3]; + data[14] = left[3]; + data[15] = left[3]; + +} + +#endif /* H264DEC_OMXDL */ + +/*------------------------------------------------------------------------------ + + Function: Write4x4To16x16 + + Functional description: + Write a 4x4 block (data4x4) into correct position + in 16x16 macroblock (data). + +------------------------------------------------------------------------------*/ + +void Write4x4To16x16(u8 *data, u8 *data4x4, u32 blockNum) +{ + +/* Variables */ + + u32 x, y; + u32 *in32, *out32; + +/* Code */ + + ASSERT(data); + ASSERT(data4x4); + ASSERT(blockNum < 16); + + x = h264bsdBlockX[blockNum]; + y = h264bsdBlockY[blockNum]; + + data += y*16+x; + + ASSERT(((u32)data&0x3) == 0); + + /*lint --e(826) */ + out32 = (u32 *)data; + /*lint --e(826) */ + in32 = (u32 *)data4x4; + + out32[0] = *in32++; + out32[4] = *in32++; + out32[8] = *in32++; + out32[12] = *in32++; +} + +/*------------------------------------------------------------------------------ + + Function: DetermineIntra4x4PredMode + + Functional description: + Returns the intra 4x4 prediction mode of a block based on the + neighbouring macroblocks and information parsed from stream. + +------------------------------------------------------------------------------*/ + +u32 DetermineIntra4x4PredMode(macroblockLayer_t *pMbLayer, + u32 available, neighbour_t *nA, neighbour_t *nB, u32 index, + mbStorage_t *nMbA, mbStorage_t *nMbB) +{ + +/* Variables */ + + u32 mode1, mode2; + mbStorage_t *pMb; + +/* Code */ + + ASSERT(pMbLayer); + + /* dc only prediction? */ + if (!available) + mode1 = 2; + else + { + pMb = nMbA; + if (h264bsdMbPartPredMode(pMb->mbType) == PRED_MODE_INTRA4x4) + { + mode1 = pMb->intra4x4PredMode[nA->index]; + } + else + mode1 = 2; + + pMb = nMbB; + if (h264bsdMbPartPredMode(pMb->mbType) == PRED_MODE_INTRA4x4) + { + mode2 = pMb->intra4x4PredMode[nB->index]; + } + else + mode2 = 2; + + mode1 = MIN(mode1, mode2); + } + + if (!pMbLayer->mbPred.prevIntra4x4PredModeFlag[index]) + { + if (pMbLayer->mbPred.remIntra4x4PredMode[index] < mode1) + { + mode1 = pMbLayer->mbPred.remIntra4x4PredMode[index]; + } + else + { + mode1 = pMbLayer->mbPred.remIntra4x4PredMode[index] + 1; + } + } + + return(mode1); +} + + +/*lint +e702 */ + + diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_intra_prediction.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_intra_prediction.h new file mode 100755 index 0000000000000000000000000000000000000000..4652bd56347d7f5f39bd1535158b73304cfb90b0 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_intra_prediction.h @@ -0,0 +1,81 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. Module defines + 3. Data types + 4. Function prototypes + +------------------------------------------------------------------------------*/ + +#ifndef H264SWDEC_INTRA_PREDICTION_H +#define H264SWDEC_INTRA_PREDICTION_H + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#include "basetype.h" +#include "h264bsd_image.h" +#include "h264bsd_macroblock_layer.h" + +/*------------------------------------------------------------------------------ + 2. Module defines +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 3. Data types +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 4. Function prototypes +------------------------------------------------------------------------------*/ +#ifndef H264DEC_OMXDL +u32 h264bsdIntraPrediction(mbStorage_t *pMb, macroblockLayer_t *mbLayer, + image_t *image, u32 mbNum, u32 constrainedIntraPred, u8 *data); + +u32 h264bsdIntra4x4Prediction(mbStorage_t *pMb, u8 *data, + macroblockLayer_t *mbLayer, + u8 *above, u8 *left, u32 constrainedIntraPred); +u32 h264bsdIntra16x16Prediction(mbStorage_t *pMb, u8 *data, i32 residual[][16], + u8 *above, u8 *left, u32 constrainedIntraPred); + +u32 h264bsdIntraChromaPrediction(mbStorage_t *pMb, u8 *data, i32 residual[][16], + u8 *above, u8 *left, u32 predMode, u32 constrainedIntraPred); + +void h264bsdGetNeighbourPels(image_t *image, u8 *above, u8 *left, u32 mbNum); + +#else + +u32 h264bsdIntra4x4Prediction(mbStorage_t *pMb, u8 *data, + macroblockLayer_t *mbLayer, + u8 *pImage, u32 width, + u32 constrainedIntraPred, u32 block); + +u32 h264bsdIntra16x16Prediction(mbStorage_t *pMb, u8 *data, u8 *pImage, + u32 width, u32 constrainedIntraPred); + +u32 h264bsdIntraChromaPrediction(mbStorage_t *pMb, u8 *data, image_t *image, + u32 predMode, u32 constrainedIntraPred); + +#endif + +#endif /* #ifdef H264SWDEC_INTRA_PREDICTION_H */ + diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_macroblock_layer.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_macroblock_layer.c new file mode 100755 index 0000000000000000000000000000000000000000..2b3e7f0018c6bf819d67814ab89b9f1ef1670481 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_macroblock_layer.c @@ -0,0 +1,1446 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. External compiler flags + 3. Module defines + 4. Local function prototypes + 5. Functions + h264bsdDecodeMacroblockLayer + h264bsdMbPartPredMode + h264bsdNumMbPart + h264bsdNumSubMbPart + DecodeMbPred + DecodeSubMbPred + DecodeResidual + DetermineNc + CbpIntra16x16 + h264bsdPredModeIntra16x16 + h264bsdDecodeMacroblock + ProcessResidual + h264bsdSubMbPartMode + +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#include "h264bsd_macroblock_layer.h" +#include "h264bsd_slice_header.h" +#include "h264bsd_util.h" +#include "h264bsd_vlc.h" +#include "h264bsd_cavlc.h" +#include "h264bsd_nal_unit.h" +#include "h264bsd_neighbour.h" +#include "h264bsd_transform.h" +#include "h264bsd_intra_prediction.h" +#include "h264bsd_inter_prediction.h" + +#ifdef H264DEC_OMXDL +#include "omxtypes.h" +#include "omxVC.h" +#include "armVC.h" +#endif /* H264DEC_OMXDL */ + +/*------------------------------------------------------------------------------ + 2. External compiler flags +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- + 3. Module defines +------------------------------------------------------------------------------*/ +#ifdef H264DEC_OMXDL +static const u32 chromaIndex[8] = { 256, 260, 288, 292, 320, 324, 352, 356 }; +static const u32 lumaIndex[16] = { 0, 4, 64, 68, + 8, 12, 72, 76, + 128, 132, 192, 196, + 136, 140, 200, 204 }; +#endif +/* mapping of dc coefficients array to luma blocks */ +static const u32 dcCoeffIndex[16] = + {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15}; + +/*------------------------------------------------------------------------------ + 4. Local function prototypes +------------------------------------------------------------------------------*/ + +static u32 DecodeMbPred(strmData_t *pStrmData, mbPred_t *pMbPred, + mbType_e mbType, u32 numRefIdxActive); +static u32 DecodeSubMbPred(strmData_t *pStrmData, subMbPred_t *pSubMbPred, + mbType_e mbType, u32 numRefIdxActive); +static u32 DecodeResidual(strmData_t *pStrmData, residual_t *pResidual, + mbStorage_t *pMb, mbType_e mbType, u32 codedBlockPattern); + +#ifdef H264DEC_OMXDL +static u32 DetermineNc(mbStorage_t *pMb, u32 blockIndex, u8 *pTotalCoeff); +#else +static u32 DetermineNc(mbStorage_t *pMb, u32 blockIndex, i16 *pTotalCoeff); +#endif + +static u32 CbpIntra16x16(mbType_e mbType); +#ifdef H264DEC_OMXDL +static u32 ProcessIntra4x4Residual(mbStorage_t *pMb, u8 *data, u32 constrainedIntraPred, + macroblockLayer_t *mbLayer, const u8 **pSrc, image_t *image); +static u32 ProcessChromaResidual(mbStorage_t *pMb, u8 *data, const u8 **pSrc ); +static u32 ProcessIntra16x16Residual(mbStorage_t *pMb, u8 *data, u32 constrainedIntraPred, + u32 intraChromaPredMode, const u8 **pSrc, image_t *image); + + +#else +static u32 ProcessResidual(mbStorage_t *pMb, i32 residualLevel[][16], u32 *); +#endif + +/*------------------------------------------------------------------------------ + + Function name: h264bsdDecodeMacroblockLayer + + Functional description: + Parse macroblock specific information from bit stream. + + Inputs: + pStrmData pointer to stream data structure + pMb pointer to macroblock storage structure + sliceType type of the current slice + numRefIdxActive maximum reference index + + Outputs: + pMbLayer stores the macroblock data parsed from stream + + Returns: + HANTRO_OK success + HANTRO_NOK end of stream or error in stream + +------------------------------------------------------------------------------*/ + +u32 h264bsdDecodeMacroblockLayer(strmData_t *pStrmData, + macroblockLayer_t *pMbLayer, mbStorage_t *pMb, u32 sliceType, + u32 numRefIdxActive) +{ + +/* Variables */ + + u32 tmp, i, value; + i32 itmp; + mbPartPredMode_e partMode; + +/* Code */ + + ASSERT(pStrmData); + ASSERT(pMbLayer); + +#ifdef H264DEC_NEON + h264bsdClearMbLayer(pMbLayer, ((sizeof(macroblockLayer_t) + 63) & ~0x3F)); +#else + H264SwDecMemset(pMbLayer, 0, sizeof(macroblockLayer_t)); +#endif + + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value); + + if (IS_I_SLICE(sliceType)) + { + if ((value + 6) > 31 || tmp != HANTRO_OK) + return(HANTRO_NOK); + pMbLayer->mbType = (mbType_e)(value + 6); + } + else + { + if ((value + 1) > 31 || tmp != HANTRO_OK) + return(HANTRO_NOK); + pMbLayer->mbType = (mbType_e)(value + 1); + } + + if (pMbLayer->mbType == I_PCM) + { + i32 *level; + while( !h264bsdIsByteAligned(pStrmData) ) + { + /* pcm_alignment_zero_bit */ + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp) + return(HANTRO_NOK); + } + + level = pMbLayer->residual.level[0]; + for (i = 0; i < 384; i++) + { + value = h264bsdGetBits(pStrmData, 8); + if (value == END_OF_STREAM) + return(HANTRO_NOK); + *level++ = (i32)value; + } + } + else + { + partMode = h264bsdMbPartPredMode(pMbLayer->mbType); + if ( (partMode == PRED_MODE_INTER) && + (h264bsdNumMbPart(pMbLayer->mbType) == 4) ) + { + tmp = DecodeSubMbPred(pStrmData, &pMbLayer->subMbPred, + pMbLayer->mbType, numRefIdxActive); + } + else + { + tmp = DecodeMbPred(pStrmData, &pMbLayer->mbPred, + pMbLayer->mbType, numRefIdxActive); + } + if (tmp != HANTRO_OK) + return(tmp); + + if (partMode != PRED_MODE_INTRA16x16) + { + tmp = h264bsdDecodeExpGolombMapped(pStrmData, &value, + (u32)(partMode == PRED_MODE_INTRA4x4)); + if (tmp != HANTRO_OK) + return(tmp); + pMbLayer->codedBlockPattern = value; + } + else + { + pMbLayer->codedBlockPattern = CbpIntra16x16(pMbLayer->mbType); + } + + if ( pMbLayer->codedBlockPattern || + (partMode == PRED_MODE_INTRA16x16) ) + { + tmp = h264bsdDecodeExpGolombSigned(pStrmData, &itmp); + if (tmp != HANTRO_OK || (itmp < -26) || (itmp > 25) ) + return(HANTRO_NOK); + pMbLayer->mbQpDelta = itmp; + + tmp = DecodeResidual(pStrmData, &pMbLayer->residual, pMb, + pMbLayer->mbType, pMbLayer->codedBlockPattern); + + pStrmData->strmBuffReadBits = + (u32)(pStrmData->pStrmCurrPos - pStrmData->pStrmBuffStart) * 8 + + pStrmData->bitPosInWord; + + if (tmp != HANTRO_OK) + return(tmp); + } + } + + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdMbPartPredMode + + Functional description: + Returns the prediction mode of a macroblock type + +------------------------------------------------------------------------------*/ + +mbPartPredMode_e h264bsdMbPartPredMode(mbType_e mbType) +{ + +/* Variables */ + + +/* Code */ + + ASSERT(mbType <= 31); + + if ((mbType <= P_8x8ref0)) + return(PRED_MODE_INTER); + else if (mbType == I_4x4) + return(PRED_MODE_INTRA4x4); + else + return(PRED_MODE_INTRA16x16); + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdNumMbPart + + Functional description: + Returns the amount of macroblock partitions in a macroblock type + +------------------------------------------------------------------------------*/ + +u32 h264bsdNumMbPart(mbType_e mbType) +{ + +/* Variables */ + + +/* Code */ + + ASSERT(h264bsdMbPartPredMode(mbType) == PRED_MODE_INTER); + + switch (mbType) + { + case P_L0_16x16: + case P_Skip: + return(1); + + case P_L0_L0_16x8: + case P_L0_L0_8x16: + return(2); + + /* P_8x8 or P_8x8ref0 */ + default: + return(4); + } + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdNumSubMbPart + + Functional description: + Returns the amount of sub-partitions in a sub-macroblock type + +------------------------------------------------------------------------------*/ + +u32 h264bsdNumSubMbPart(subMbType_e subMbType) +{ + +/* Variables */ + + +/* Code */ + + ASSERT(subMbType <= P_L0_4x4); + + switch (subMbType) + { + case P_L0_8x8: + return(1); + + case P_L0_8x4: + case P_L0_4x8: + return(2); + + /* P_L0_4x4 */ + default: + return(4); + } + +} + +/*------------------------------------------------------------------------------ + + Function: DecodeMbPred + + Functional description: + Parse macroblock prediction information from bit stream and store + in 'pMbPred'. + +------------------------------------------------------------------------------*/ + +u32 DecodeMbPred(strmData_t *pStrmData, mbPred_t *pMbPred, mbType_e mbType, + u32 numRefIdxActive) +{ + +/* Variables */ + + u32 tmp, i, j, value; + i32 itmp; + +/* Code */ + + ASSERT(pStrmData); + ASSERT(pMbPred); + + switch (h264bsdMbPartPredMode(mbType)) + { + case PRED_MODE_INTER: /* PRED_MODE_INTER */ + if (numRefIdxActive > 1) + { + for (i = h264bsdNumMbPart(mbType), j = 0; i--; j++) + { + tmp = h264bsdDecodeExpGolombTruncated(pStrmData, &value, + (u32)(numRefIdxActive > 2)); + if (tmp != HANTRO_OK || value >= numRefIdxActive) + return(HANTRO_NOK); + + pMbPred->refIdxL0[j] = value; + } + } + + for (i = h264bsdNumMbPart(mbType), j = 0; i--; j++) + { + tmp = h264bsdDecodeExpGolombSigned(pStrmData, &itmp); + if (tmp != HANTRO_OK) + return(tmp); + pMbPred->mvdL0[j].hor = (i16)itmp; + + tmp = h264bsdDecodeExpGolombSigned(pStrmData, &itmp); + if (tmp != HANTRO_OK) + return(tmp); + pMbPred->mvdL0[j].ver = (i16)itmp; + } + break; + + case PRED_MODE_INTRA4x4: + for (itmp = 0, i = 0; itmp < 2; itmp++) + { + value = h264bsdShowBits32(pStrmData); + tmp = 0; + for (j = 8; j--; i++) + { + pMbPred->prevIntra4x4PredModeFlag[i] = + value & 0x80000000 ? HANTRO_TRUE : HANTRO_FALSE; + value <<= 1; + if (!pMbPred->prevIntra4x4PredModeFlag[i]) + { + pMbPred->remIntra4x4PredMode[i] = value>>29; + value <<= 3; + tmp++; + } + } + if (h264bsdFlushBits(pStrmData, 8 + 3*tmp) == END_OF_STREAM) + return(HANTRO_NOK); + } + /* fall-through */ + + case PRED_MODE_INTRA16x16: + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value); + if (tmp != HANTRO_OK || value > 3) + return(HANTRO_NOK); + pMbPred->intraChromaPredMode = value; + break; + } + + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + Function: DecodeSubMbPred + + Functional description: + Parse sub-macroblock prediction information from bit stream and + store in 'pMbPred'. + +------------------------------------------------------------------------------*/ + +u32 DecodeSubMbPred(strmData_t *pStrmData, subMbPred_t *pSubMbPred, + mbType_e mbType, u32 numRefIdxActive) +{ + +/* Variables */ + + u32 tmp, i, j, value; + i32 itmp; + +/* Code */ + + ASSERT(pStrmData); + ASSERT(pSubMbPred); + ASSERT(h264bsdMbPartPredMode(mbType) == PRED_MODE_INTER); + + for (i = 0; i < 4; i++) + { + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value); + if (tmp != HANTRO_OK || value > 3) + return(HANTRO_NOK); + pSubMbPred->subMbType[i] = (subMbType_e)value; + } + + if ( (numRefIdxActive > 1) && (mbType != P_8x8ref0) ) + { + for (i = 0; i < 4; i++) + { + tmp = h264bsdDecodeExpGolombTruncated(pStrmData, &value, + (u32)(numRefIdxActive > 2)); + if (tmp != HANTRO_OK || value >= numRefIdxActive) + return(HANTRO_NOK); + pSubMbPred->refIdxL0[i] = value; + } + } + + for (i = 0; i < 4; i++) + { + j = 0; + for (value = h264bsdNumSubMbPart(pSubMbPred->subMbType[i]); + value--; j++) + { + tmp = h264bsdDecodeExpGolombSigned(pStrmData, &itmp); + if (tmp != HANTRO_OK) + return(tmp); + pSubMbPred->mvdL0[i][j].hor = (i16)itmp; + + tmp = h264bsdDecodeExpGolombSigned(pStrmData, &itmp); + if (tmp != HANTRO_OK) + return(tmp); + pSubMbPred->mvdL0[i][j].ver = (i16)itmp; + } + } + + return(HANTRO_OK); + +} + +#ifdef H264DEC_OMXDL +/*------------------------------------------------------------------------------ + + Function: DecodeResidual + + Functional description: + Parse residual information from bit stream and store in 'pResidual'. + +------------------------------------------------------------------------------*/ + +u32 DecodeResidual(strmData_t *pStrmData, residual_t *pResidual, + mbStorage_t *pMb, mbType_e mbType, u32 codedBlockPattern) +{ + +/* Variables */ + + u32 i, j; + u32 blockCoded; + u32 blockIndex; + u32 is16x16; + OMX_INT nc; + OMXResult omxRes; + OMX_U8 *pPosCoefBuf; + + +/* Code */ + + ASSERT(pStrmData); + ASSERT(pResidual); + + pPosCoefBuf = pResidual->posCoefBuf; + + /* luma DC is at index 24 */ + if (h264bsdMbPartPredMode(mbType) == PRED_MODE_INTRA16x16) + { + nc = (OMX_INT)DetermineNc(pMb, 0, pResidual->totalCoeff); +#ifndef H264DEC_NEON + omxRes = omxVCM4P10_DecodeCoeffsToPairCAVLC( + (const OMX_U8 **) (&pStrmData->pStrmCurrPos), + (OMX_S32*) (&pStrmData->bitPosInWord), + &pResidual->totalCoeff[24], + &pPosCoefBuf, + nc, + 16); +#else + omxRes = armVCM4P10_DecodeCoeffsToPair( + (const OMX_U8 **) (&pStrmData->pStrmCurrPos), + (OMX_S32*) (&pStrmData->bitPosInWord), + &pResidual->totalCoeff[24], + &pPosCoefBuf, + nc, + 16); +#endif + if (omxRes != OMX_Sts_NoErr) + return(HANTRO_NOK); + is16x16 = HANTRO_TRUE; + } + else + is16x16 = HANTRO_FALSE; + + for (i = 4, blockIndex = 0; i--;) + { + /* luma cbp in bits 0-3 */ + blockCoded = codedBlockPattern & 0x1; + codedBlockPattern >>= 1; + if (blockCoded) + { + for (j = 4; j--; blockIndex++) + { + nc = (OMX_INT)DetermineNc(pMb,blockIndex,pResidual->totalCoeff); + if (is16x16) + { +#ifndef H264DEC_NEON + omxRes = omxVCM4P10_DecodeCoeffsToPairCAVLC( + (const OMX_U8 **) (&pStrmData->pStrmCurrPos), + (OMX_S32*) (&pStrmData->bitPosInWord), + &pResidual->totalCoeff[blockIndex], + &pPosCoefBuf, + nc, + 15); +#else + omxRes = armVCM4P10_DecodeCoeffsToPair( + (const OMX_U8 **) (&pStrmData->pStrmCurrPos), + (OMX_S32*) (&pStrmData->bitPosInWord), + &pResidual->totalCoeff[blockIndex], + &pPosCoefBuf, + nc, + 15); +#endif + } + else + { +#ifndef H264DEC_NEON + omxRes = omxVCM4P10_DecodeCoeffsToPairCAVLC( + (const OMX_U8 **) (&pStrmData->pStrmCurrPos), + (OMX_S32*) (&pStrmData->bitPosInWord), + &pResidual->totalCoeff[blockIndex], + &pPosCoefBuf, + nc, + 16); +#else + omxRes = armVCM4P10_DecodeCoeffsToPair( + (const OMX_U8 **) (&pStrmData->pStrmCurrPos), + (OMX_S32*) (&pStrmData->bitPosInWord), + &pResidual->totalCoeff[blockIndex], + &pPosCoefBuf, + nc, + 16); +#endif + } + if (omxRes != OMX_Sts_NoErr) + return(HANTRO_NOK); + } + } + else + blockIndex += 4; + } + + /* chroma DC block are at indices 25 and 26 */ + blockCoded = codedBlockPattern & 0x3; + if (blockCoded) + { +#ifndef H264DEC_NEON + omxRes = omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC( + (const OMX_U8**) (&pStrmData->pStrmCurrPos), + (OMX_S32*) (&pStrmData->bitPosInWord), + &pResidual->totalCoeff[25], + &pPosCoefBuf); +#else + omxRes = armVCM4P10_DecodeCoeffsToPair( + (const OMX_U8**) (&pStrmData->pStrmCurrPos), + (OMX_S32*) (&pStrmData->bitPosInWord), + &pResidual->totalCoeff[25], + &pPosCoefBuf, + 17, + 4); +#endif + if (omxRes != OMX_Sts_NoErr) + return(HANTRO_NOK); +#ifndef H264DEC_NEON + omxRes = omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC( + (const OMX_U8**) (&pStrmData->pStrmCurrPos), + (OMX_S32*) (&pStrmData->bitPosInWord), + &pResidual->totalCoeff[26], + &pPosCoefBuf); +#else + omxRes = armVCM4P10_DecodeCoeffsToPair( + (const OMX_U8**) (&pStrmData->pStrmCurrPos), + (OMX_S32*) (&pStrmData->bitPosInWord), + &pResidual->totalCoeff[26], + &pPosCoefBuf, + 17, + 4); +#endif + if (omxRes != OMX_Sts_NoErr) + return(HANTRO_NOK); + } + + /* chroma AC */ + blockCoded = codedBlockPattern & 0x2; + if (blockCoded) + { + for (i = 8; i--;blockIndex++) + { + nc = (OMX_INT)DetermineNc(pMb, blockIndex, pResidual->totalCoeff); +#ifndef H264DEC_NEON + omxRes = omxVCM4P10_DecodeCoeffsToPairCAVLC( + (const OMX_U8 **) (&pStrmData->pStrmCurrPos), + (OMX_S32*) (&pStrmData->bitPosInWord), + &pResidual->totalCoeff[blockIndex], + &pPosCoefBuf, + nc, + 15); +#else + omxRes = armVCM4P10_DecodeCoeffsToPair( + (const OMX_U8 **) (&pStrmData->pStrmCurrPos), + (OMX_S32*) (&pStrmData->bitPosInWord), + &pResidual->totalCoeff[blockIndex], + &pPosCoefBuf, + nc, + 15); +#endif + if (omxRes != OMX_Sts_NoErr) + return(HANTRO_NOK); + } + } + + return(HANTRO_OK); + +} + +#else +/*------------------------------------------------------------------------------ + + Function: DecodeResidual + + Functional description: + Parse residual information from bit stream and store in 'pResidual'. + +------------------------------------------------------------------------------*/ + +u32 DecodeResidual(strmData_t *pStrmData, residual_t *pResidual, + mbStorage_t *pMb, mbType_e mbType, u32 codedBlockPattern) +{ + +/* Variables */ + + u32 i, j, tmp; + i32 nc; + u32 blockCoded; + u32 blockIndex; + u32 is16x16; + i32 (*level)[16]; + +/* Code */ + + ASSERT(pStrmData); + ASSERT(pResidual); + + level = pResidual->level; + + /* luma DC is at index 24 */ + if (h264bsdMbPartPredMode(mbType) == PRED_MODE_INTRA16x16) + { + nc = (i32)DetermineNc(pMb, 0, pResidual->totalCoeff); + tmp = h264bsdDecodeResidualBlockCavlc(pStrmData, level[24], nc, 16); + if ((tmp & 0xF) != HANTRO_OK) + return(tmp); + pResidual->totalCoeff[24] = (tmp >> 4) & 0xFF; + is16x16 = HANTRO_TRUE; + } + else + is16x16 = HANTRO_FALSE; + + for (i = 4, blockIndex = 0; i--;) + { + /* luma cbp in bits 0-3 */ + blockCoded = codedBlockPattern & 0x1; + codedBlockPattern >>= 1; + if (blockCoded) + { + for (j = 4; j--; blockIndex++) + { + nc = (i32)DetermineNc(pMb, blockIndex, pResidual->totalCoeff); + if (is16x16) + { + tmp = h264bsdDecodeResidualBlockCavlc(pStrmData, + level[blockIndex] + 1, nc, 15); + pResidual->coeffMap[blockIndex] = tmp >> 15; + } + else + { + tmp = h264bsdDecodeResidualBlockCavlc(pStrmData, + level[blockIndex], nc, 16); + pResidual->coeffMap[blockIndex] = tmp >> 16; + } + if ((tmp & 0xF) != HANTRO_OK) + return(tmp); + pResidual->totalCoeff[blockIndex] = (tmp >> 4) & 0xFF; + } + } + else + blockIndex += 4; + } + + /* chroma DC block are at indices 25 and 26 */ + blockCoded = codedBlockPattern & 0x3; + if (blockCoded) + { + tmp = h264bsdDecodeResidualBlockCavlc(pStrmData, level[25], -1, 4); + if ((tmp & 0xF) != HANTRO_OK) + return(tmp); + pResidual->totalCoeff[25] = (tmp >> 4) & 0xFF; + tmp = h264bsdDecodeResidualBlockCavlc(pStrmData, level[25]+4, -1, 4); + if ((tmp & 0xF) != HANTRO_OK) + return(tmp); + pResidual->totalCoeff[26] = (tmp >> 4) & 0xFF; + } + + /* chroma AC */ + blockCoded = codedBlockPattern & 0x2; + if (blockCoded) + { + for (i = 8; i--;blockIndex++) + { + nc = (i32)DetermineNc(pMb, blockIndex, pResidual->totalCoeff); + tmp = h264bsdDecodeResidualBlockCavlc(pStrmData, + level[blockIndex] + 1, nc, 15); + if ((tmp & 0xF) != HANTRO_OK) + return(tmp); + pResidual->totalCoeff[blockIndex] = (tmp >> 4) & 0xFF; + pResidual->coeffMap[blockIndex] = (tmp >> 15); + } + } + + return(HANTRO_OK); + +} +#endif + +/*------------------------------------------------------------------------------ + + Function: DetermineNc + + Functional description: + Returns the nC of a block. + +------------------------------------------------------------------------------*/ +#ifdef H264DEC_OMXDL +u32 DetermineNc(mbStorage_t *pMb, u32 blockIndex, u8 *pTotalCoeff) +#else +u32 DetermineNc(mbStorage_t *pMb, u32 blockIndex, i16 *pTotalCoeff) +#endif +{ +/*lint -e702 */ +/* Variables */ + + u32 tmp; + i32 n; + const neighbour_t *neighbourA, *neighbourB; + u8 neighbourAindex, neighbourBindex; + +/* Code */ + + ASSERT(blockIndex < 24); + + /* if neighbour block belongs to current macroblock totalCoeff array + * mbStorage has not been set/updated yet -> use pTotalCoeff */ + neighbourA = h264bsdNeighbour4x4BlockA(blockIndex); + neighbourB = h264bsdNeighbour4x4BlockB(blockIndex); + neighbourAindex = neighbourA->index; + neighbourBindex = neighbourB->index; + if (neighbourA->mb == MB_CURR && neighbourB->mb == MB_CURR) + { + n = (pTotalCoeff[neighbourAindex] + + pTotalCoeff[neighbourBindex] + 1)>>1; + } + else if (neighbourA->mb == MB_CURR) + { + n = pTotalCoeff[neighbourAindex]; + if (h264bsdIsNeighbourAvailable(pMb, pMb->mbB)) + { + n = (n + pMb->mbB->totalCoeff[neighbourBindex] + 1) >> 1; + } + } + else if (neighbourB->mb == MB_CURR) + { + n = pTotalCoeff[neighbourBindex]; + if (h264bsdIsNeighbourAvailable(pMb, pMb->mbA)) + { + n = (n + pMb->mbA->totalCoeff[neighbourAindex] + 1) >> 1; + } + } + else + { + n = tmp = 0; + if (h264bsdIsNeighbourAvailable(pMb, pMb->mbA)) + { + n = pMb->mbA->totalCoeff[neighbourAindex]; + tmp = 1; + } + if (h264bsdIsNeighbourAvailable(pMb, pMb->mbB)) + { + if (tmp) + n = (n + pMb->mbB->totalCoeff[neighbourBindex] + 1) >> 1; + else + n = pMb->mbB->totalCoeff[neighbourBindex]; + } + } + return((u32)n); +/*lint +e702 */ +} + +/*------------------------------------------------------------------------------ + + Function: CbpIntra16x16 + + Functional description: + Returns the coded block pattern for intra 16x16 macroblock. + +------------------------------------------------------------------------------*/ + +u32 CbpIntra16x16(mbType_e mbType) +{ + +/* Variables */ + + u32 cbp; + u32 tmp; + +/* Code */ + + ASSERT(mbType >= I_16x16_0_0_0 && mbType <= I_16x16_3_2_1); + + if (mbType >= I_16x16_0_0_1) + cbp = 15; + else + cbp = 0; + + /* tmp is 0 for I_16x16_0_0_0 mb type */ + /* ignore lint warning on arithmetic on enum's */ + tmp = /*lint -e(656)*/(mbType - I_16x16_0_0_0) >> 2; + if (tmp > 2) + tmp -= 3; + + cbp += tmp << 4; + + return(cbp); + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdPredModeIntra16x16 + + Functional description: + Returns the prediction mode for intra 16x16 macroblock. + +------------------------------------------------------------------------------*/ + +u32 h264bsdPredModeIntra16x16(mbType_e mbType) +{ + +/* Variables */ + + u32 tmp; + +/* Code */ + + ASSERT(mbType >= I_16x16_0_0_0 && mbType <= I_16x16_3_2_1); + + /* tmp is 0 for I_16x16_0_0_0 mb type */ + /* ignore lint warning on arithmetic on enum's */ + tmp = /*lint -e(656)*/(mbType - I_16x16_0_0_0); + + return(tmp & 0x3); + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdDecodeMacroblock + + Functional description: + Decode one macroblock and write into output image. + + Inputs: + pMb pointer to macroblock specific information + mbLayer pointer to current macroblock data from stream + currImage pointer to output image + dpb pointer to decoded picture buffer + qpY pointer to slice QP + mbNum current macroblock number + constrainedIntraPred flag specifying if neighbouring inter + macroblocks are used in intra prediction + + Outputs: + pMb structure is updated with current macroblock + currImage decoded macroblock is written into output image + + Returns: + HANTRO_OK success + HANTRO_NOK error in macroblock decoding + +------------------------------------------------------------------------------*/ + +u32 h264bsdDecodeMacroblock(mbStorage_t *pMb, macroblockLayer_t *pMbLayer, + image_t *currImage, dpbStorage_t *dpb, i32 *qpY, u32 mbNum, + u32 constrainedIntraPredFlag, u8* data) +{ + +/* Variables */ + + u32 i, tmp; + mbType_e mbType; +#ifdef H264DEC_OMXDL + const u8 *pSrc; +#endif +/* Code */ + + ASSERT(pMb); + ASSERT(pMbLayer); + ASSERT(currImage); + ASSERT(qpY && *qpY < 52); + ASSERT(mbNum < currImage->width*currImage->height); + + mbType = pMbLayer->mbType; + pMb->mbType = mbType; + + pMb->decoded++; + + h264bsdSetCurrImageMbPointers(currImage, mbNum); + + if (mbType == I_PCM) + { + u8 *pData = (u8*)data; +#ifdef H264DEC_OMXDL + u8 *tot = pMb->totalCoeff; +#else + i16 *tot = pMb->totalCoeff; +#endif + i32 *lev = pMbLayer->residual.level[0]; + + pMb->qpY = 0; + + /* if decoded flag > 1 -> mb has already been successfully decoded and + * written to output -> do not write again */ + if (pMb->decoded > 1) + { + for (i = 24; i--;) + *tot++ = 16; + return HANTRO_OK; + } + + for (i = 24; i--;) + { + *tot++ = 16; + for (tmp = 16; tmp--;) + *pData++ = (u8)(*lev++); + } + h264bsdWriteMacroblock(currImage, (u8*)data); + + return(HANTRO_OK); + } + else + { +#ifdef H264DEC_OMXDL + if (h264bsdMbPartPredMode(mbType) == PRED_MODE_INTER) + { + tmp = h264bsdInterPrediction(pMb, pMbLayer, dpb, mbNum, + currImage, (u8*)data); + if (tmp != HANTRO_OK) return (tmp); + } +#endif + if (mbType != P_Skip) + { + H264SwDecMemcpy(pMb->totalCoeff, + pMbLayer->residual.totalCoeff, + 27*sizeof(*pMb->totalCoeff)); + + /* update qpY */ + if (pMbLayer->mbQpDelta) + { + *qpY = *qpY + pMbLayer->mbQpDelta; + if (*qpY < 0) *qpY += 52; + else if (*qpY >= 52) *qpY -= 52; + } + pMb->qpY = (u32)*qpY; + +#ifdef H264DEC_OMXDL + pSrc = pMbLayer->residual.posCoefBuf; + + if (h264bsdMbPartPredMode(mbType) == PRED_MODE_INTER) + { + OMXResult res; + u8 *p; + u8 *totalCoeff = pMb->totalCoeff; + + for (i = 0; i < 16; i++, totalCoeff++) + { + p = data + lumaIndex[i]; + if (*totalCoeff) + { + res = omxVCM4P10_DequantTransformResidualFromPairAndAdd( + &pSrc, p, 0, p, 16, 16, *qpY, *totalCoeff); + if (res != OMX_Sts_NoErr) + return (HANTRO_NOK); + } + } + + } + else if (h264bsdMbPartPredMode(mbType) == PRED_MODE_INTRA4x4) + { + tmp = ProcessIntra4x4Residual(pMb, + data, + constrainedIntraPredFlag, + pMbLayer, + &pSrc, + currImage); + if (tmp != HANTRO_OK) + return (tmp); + } + else if (h264bsdMbPartPredMode(mbType) == PRED_MODE_INTRA16x16) + { + tmp = ProcessIntra16x16Residual(pMb, + data, + constrainedIntraPredFlag, + pMbLayer->mbPred.intraChromaPredMode, + &pSrc, + currImage); + if (tmp != HANTRO_OK) + return (tmp); + } + + tmp = ProcessChromaResidual(pMb, data, &pSrc); + +#else + tmp = ProcessResidual(pMb, pMbLayer->residual.level, + pMbLayer->residual.coeffMap); +#endif + if (tmp != HANTRO_OK) + return (tmp); + } + else + { + H264SwDecMemset(pMb->totalCoeff, 0, 27*sizeof(*pMb->totalCoeff)); + pMb->qpY = (u32)*qpY; + } +#ifdef H264DEC_OMXDL + /* if decoded flag > 1 -> mb has already been successfully decoded and + * written to output -> do not write again */ + if (pMb->decoded > 1) + return HANTRO_OK; + + h264bsdWriteMacroblock(currImage, data); +#else + if (h264bsdMbPartPredMode(mbType) != PRED_MODE_INTER) + { + tmp = h264bsdIntraPrediction(pMb, pMbLayer, currImage, mbNum, + constrainedIntraPredFlag, (u8*)data); + if (tmp != HANTRO_OK) return (tmp); + } + else + { + tmp = h264bsdInterPrediction(pMb, pMbLayer, dpb, mbNum, + currImage, (u8*)data); + if (tmp != HANTRO_OK) return (tmp); + } +#endif + } + + return HANTRO_OK; +} + + +#ifdef H264DEC_OMXDL + +/*------------------------------------------------------------------------------ + + Function: ProcessChromaResidual + + Functional description: + Process the residual data of chroma with + inverse quantization and inverse transform. + +------------------------------------------------------------------------------*/ +u32 ProcessChromaResidual(mbStorage_t *pMb, u8 *data, const u8 **pSrc ) +{ + u32 i; + u32 chromaQp; + i16 *pDc; + i16 dc[4 + 4] = {0,0,0,0,0,0,0,0}; + u8 *totalCoeff; + OMXResult result; + u8 *p; + + /* chroma DC processing. First chroma dc block is block with index 25 */ + chromaQp = + h264bsdQpC[CLIP3(0, 51, (i32)pMb->qpY + pMb->chromaQpIndexOffset)]; + + if (pMb->totalCoeff[25]) + { + pDc = dc; + result = omxVCM4P10_TransformDequantChromaDCFromPair( + pSrc, + pDc, + (i32)chromaQp); + if (result != OMX_Sts_NoErr) + return (HANTRO_NOK); + } + if (pMb->totalCoeff[26]) + { + pDc = dc+4; + result = omxVCM4P10_TransformDequantChromaDCFromPair( + pSrc, + pDc, + (i32)chromaQp); + if (result != OMX_Sts_NoErr) + return (HANTRO_NOK); + } + + pDc = dc; + totalCoeff = pMb->totalCoeff + 16; + for (i = 0; i < 8; i++, pDc++, totalCoeff++) + { + /* chroma prediction */ + if (*totalCoeff || *pDc) + { + p = data + chromaIndex[i]; + result = omxVCM4P10_DequantTransformResidualFromPairAndAdd( + pSrc, + p, + pDc, + p, + 8, + 8, + (i32)chromaQp, + *totalCoeff); + if (result != OMX_Sts_NoErr) + return (HANTRO_NOK); + } + } + + return(HANTRO_OK); +} + +/*------------------------------------------------------------------------------ + + Function: ProcessIntra16x16Residual + + Functional description: + Process the residual data of luma with + inverse quantization and inverse transform. + +------------------------------------------------------------------------------*/ +u32 ProcessIntra16x16Residual(mbStorage_t *pMb, + u8 *data, + u32 constrainedIntraPred, + u32 intraChromaPredMode, + const u8** pSrc, + image_t *image) +{ + u32 i; + i16 *pDc; + i16 dc[16] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; + u8 *totalCoeff; + OMXResult result; + u8 *p; + + totalCoeff = pMb->totalCoeff; + + if (totalCoeff[24]) + { + pDc = dc; + result = omxVCM4P10_TransformDequantLumaDCFromPair( + pSrc, + pDc, + (i32)pMb->qpY); + if (result != OMX_Sts_NoErr) + return (HANTRO_NOK); + } + /* Intra 16x16 pred */ + if (h264bsdIntra16x16Prediction(pMb, data, image->luma, + image->width*16, constrainedIntraPred) != HANTRO_OK) + return(HANTRO_NOK); + for (i = 0; i < 16; i++, totalCoeff++) + { + p = data + lumaIndex[i]; + pDc = &dc[dcCoeffIndex[i]]; + if (*totalCoeff || *pDc) + { + result = omxVCM4P10_DequantTransformResidualFromPairAndAdd( + pSrc, + p, + pDc, + p, + 16, + 16, + (i32)pMb->qpY, + *totalCoeff); + if (result != OMX_Sts_NoErr) + return (HANTRO_NOK); + } + } + + if (h264bsdIntraChromaPrediction(pMb, data + 256, + image, + intraChromaPredMode, + constrainedIntraPred) != HANTRO_OK) + return(HANTRO_NOK); + + return HANTRO_OK; +} + +/*------------------------------------------------------------------------------ + + Function: ProcessIntra4x4Residual + + Functional description: + Process the residual data of luma with + inverse quantization and inverse transform. + +------------------------------------------------------------------------------*/ +u32 ProcessIntra4x4Residual(mbStorage_t *pMb, + u8 *data, + u32 constrainedIntraPred, + macroblockLayer_t *mbLayer, + const u8 **pSrc, + image_t *image) +{ + u32 i; + u8 *totalCoeff; + OMXResult result; + u8 *p; + + totalCoeff = pMb->totalCoeff; + + for (i = 0; i < 16; i++, totalCoeff++) + { + p = data + lumaIndex[i]; + if (h264bsdIntra4x4Prediction(pMb, p, mbLayer, image->luma, + image->width*16, constrainedIntraPred, i) != HANTRO_OK) + return(HANTRO_NOK); + + if (*totalCoeff) + { + result = omxVCM4P10_DequantTransformResidualFromPairAndAdd( + pSrc, + p, + NULL, + p, + 16, + 16, + (i32)pMb->qpY, + *totalCoeff); + if (result != OMX_Sts_NoErr) + return (HANTRO_NOK); + } + } + + if (h264bsdIntraChromaPrediction(pMb, data + 256, + image, + mbLayer->mbPred.intraChromaPredMode, + constrainedIntraPred) != HANTRO_OK) + return(HANTRO_NOK); + + return HANTRO_OK; +} + +#else /* H264DEC_OMXDL */ + +/*------------------------------------------------------------------------------ + + Function: ProcessResidual + + Functional description: + Process the residual data of one macroblock with + inverse quantization and inverse transform. + +------------------------------------------------------------------------------*/ + +u32 ProcessResidual(mbStorage_t *pMb, i32 residualLevel[][16], u32 *coeffMap) +{ + +/* Variables */ + + u32 i; + u32 chromaQp; + i32 (*blockData)[16]; + i32 (*blockDc)[16]; + i16 *totalCoeff; + i32 *chromaDc; + const u32 *dcCoeffIdx; + +/* Code */ + + ASSERT(pMb); + ASSERT(residualLevel); + + /* set pointers to DC coefficient blocks */ + blockDc = residualLevel + 24; + + blockData = residualLevel; + totalCoeff = pMb->totalCoeff; + if (h264bsdMbPartPredMode(pMb->mbType) == PRED_MODE_INTRA16x16) + { + if (totalCoeff[24]) + { + h264bsdProcessLumaDc(*blockDc, pMb->qpY); + } + dcCoeffIdx = dcCoeffIndex; + + for (i = 16; i--; blockData++, totalCoeff++, coeffMap++) + { + /* set dc coefficient of luma block */ + (*blockData)[0] = (*blockDc)[*dcCoeffIdx++]; + if ((*blockData)[0] || *totalCoeff) + { + if (h264bsdProcessBlock(*blockData, pMb->qpY, 1, *coeffMap) != + HANTRO_OK) + return(HANTRO_NOK); + } + else + MARK_RESIDUAL_EMPTY(*blockData); + } + } + else + { + for (i = 16; i--; blockData++, totalCoeff++, coeffMap++) + { + if (*totalCoeff) + { + if (h264bsdProcessBlock(*blockData, pMb->qpY, 0, *coeffMap) != + HANTRO_OK) + return(HANTRO_NOK); + } + else + MARK_RESIDUAL_EMPTY(*blockData); + } + } + + /* chroma DC processing. First chroma dc block is block with index 25 */ + chromaQp = + h264bsdQpC[CLIP3(0, 51, (i32)pMb->qpY + pMb->chromaQpIndexOffset)]; + if (pMb->totalCoeff[25] || pMb->totalCoeff[26]) + h264bsdProcessChromaDc(residualLevel[25], chromaQp); + chromaDc = residualLevel[25]; + for (i = 8; i--; blockData++, totalCoeff++, coeffMap++) + { + /* set dc coefficient of chroma block */ + (*blockData)[0] = *chromaDc++; + if ((*blockData)[0] || *totalCoeff) + { + if (h264bsdProcessBlock(*blockData, chromaQp, 1,*coeffMap) != + HANTRO_OK) + return(HANTRO_NOK); + } + else + MARK_RESIDUAL_EMPTY(*blockData); + } + + return(HANTRO_OK); +} +#endif /* H264DEC_OMXDL */ + +/*------------------------------------------------------------------------------ + + Function: h264bsdSubMbPartMode + + Functional description: + Returns the macroblock's sub-partition mode. + +------------------------------------------------------------------------------*/ + +subMbPartMode_e h264bsdSubMbPartMode(subMbType_e subMbType) +{ + +/* Variables */ + + +/* Code */ + + ASSERT(subMbType < 4); + + return((subMbPartMode_e)subMbType); + +} + + diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_macroblock_layer.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_macroblock_layer.h new file mode 100755 index 0000000000000000000000000000000000000000..32bc3403741ca0e15e7b010e2d0d06c6cb5451dd --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_macroblock_layer.h @@ -0,0 +1,212 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. Module defines + 3. Data types + 4. Function prototypes + +------------------------------------------------------------------------------*/ + +#ifndef H264SWDEC_MACROBLOCK_LAYER_H +#define H264SWDEC_MACROBLOCK_LAYER_H + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#include "basetype.h" +#include "h264bsd_stream.h" +#include "h264bsd_image.h" +#include "h264bsd_dpb.h" + +/*------------------------------------------------------------------------------ + 2. Module defines +------------------------------------------------------------------------------*/ + +/* Macro to determine if a mb is an intra mb */ +#define IS_INTRA_MB(a) ((a).mbType > 5) + +/* Macro to determine if a mb is an I_PCM mb */ +#define IS_I_PCM_MB(a) ((a).mbType == 31) + +typedef enum { + P_Skip = 0, + P_L0_16x16 = 1, + P_L0_L0_16x8 = 2, + P_L0_L0_8x16 = 3, + P_8x8 = 4, + P_8x8ref0 = 5, + I_4x4 = 6, + I_16x16_0_0_0 = 7, + I_16x16_1_0_0 = 8, + I_16x16_2_0_0 = 9, + I_16x16_3_0_0 = 10, + I_16x16_0_1_0 = 11, + I_16x16_1_1_0 = 12, + I_16x16_2_1_0 = 13, + I_16x16_3_1_0 = 14, + I_16x16_0_2_0 = 15, + I_16x16_1_2_0 = 16, + I_16x16_2_2_0 = 17, + I_16x16_3_2_0 = 18, + I_16x16_0_0_1 = 19, + I_16x16_1_0_1 = 20, + I_16x16_2_0_1 = 21, + I_16x16_3_0_1 = 22, + I_16x16_0_1_1 = 23, + I_16x16_1_1_1 = 24, + I_16x16_2_1_1 = 25, + I_16x16_3_1_1 = 26, + I_16x16_0_2_1 = 27, + I_16x16_1_2_1 = 28, + I_16x16_2_2_1 = 29, + I_16x16_3_2_1 = 30, + I_PCM = 31 +} mbType_e; + +typedef enum { + P_L0_8x8 = 0, + P_L0_8x4 = 1, + P_L0_4x8 = 2, + P_L0_4x4 = 3 +} subMbType_e; + +typedef enum { + MB_P_16x16 = 0, + MB_P_16x8, + MB_P_8x16, + MB_P_8x8 +} mbPartMode_e; + +typedef enum { + MB_SP_8x8 = 0, + MB_SP_8x4, + MB_SP_4x8, + MB_SP_4x4 +} subMbPartMode_e; + +typedef enum { + PRED_MODE_INTRA4x4 = 0, + PRED_MODE_INTRA16x16 , + PRED_MODE_INTER +} mbPartPredMode_e; + +/*------------------------------------------------------------------------------ + 3. Data types +------------------------------------------------------------------------------*/ + +typedef struct +{ + /* MvPrediction16x16 assumes that MVs are 16bits */ + i16 hor; + i16 ver; +} mv_t; + +typedef struct +{ + u32 prevIntra4x4PredModeFlag[16]; + u32 remIntra4x4PredMode[16]; + u32 intraChromaPredMode; + u32 refIdxL0[4]; + mv_t mvdL0[4]; +} mbPred_t; + +typedef struct +{ + subMbType_e subMbType[4]; + u32 refIdxL0[4]; + mv_t mvdL0[4][4]; +} subMbPred_t; + +typedef struct +{ +#ifdef H264DEC_OMXDL + u8 posCoefBuf[27*16*3]; + u8 totalCoeff[27]; +#else + i16 totalCoeff[27]; +#endif + i32 level[26][16]; + u32 coeffMap[24]; +} residual_t; + +typedef struct +{ + mbType_e mbType; + u32 codedBlockPattern; + i32 mbQpDelta; + mbPred_t mbPred; + subMbPred_t subMbPred; + residual_t residual; +} macroblockLayer_t; + +typedef struct mbStorage +{ + mbType_e mbType; + u32 sliceId; + u32 disableDeblockingFilterIdc; + i32 filterOffsetA; + i32 filterOffsetB; + u32 qpY; + i32 chromaQpIndexOffset; +#ifdef H264DEC_OMXDL + u8 totalCoeff[27]; +#else + i16 totalCoeff[27]; +#endif + u8 intra4x4PredMode[16]; + u32 refPic[4]; + u8* refAddr[4]; + mv_t mv[16]; + u32 decoded; + struct mbStorage *mbA; + struct mbStorage *mbB; + struct mbStorage *mbC; + struct mbStorage *mbD; +} mbStorage_t; + +/*------------------------------------------------------------------------------ + 4. Function prototypes +------------------------------------------------------------------------------*/ + +u32 h264bsdDecodeMacroblockLayer(strmData_t *pStrmData, + macroblockLayer_t *pMbLayer, mbStorage_t *pMb, u32 sliceType, + u32 numRefIdxActive); + +u32 h264bsdNumMbPart(mbType_e mbType); +u32 h264bsdNumSubMbPart(subMbType_e subMbType); + +subMbPartMode_e h264bsdSubMbPartMode(subMbType_e subMbType); + +u32 h264bsdDecodeMacroblock(mbStorage_t *pMb, macroblockLayer_t *pMbLayer, + image_t *currImage, dpbStorage_t *dpb, i32 *qpY, u32 mbNum, + u32 constrainedIntraPredFlag, u8* data); + +u32 h264bsdPredModeIntra16x16(mbType_e mbType); + +mbPartPredMode_e h264bsdMbPartPredMode(mbType_e mbType); +#ifdef H264DEC_NEON +u32 h264bsdClearMbLayer(macroblockLayer_t *pMbLayer, u32 size); +#endif + +#endif /* #ifdef H264SWDEC_MACROBLOCK_LAYER_H */ + + diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_nal_unit.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_nal_unit.c new file mode 100755 index 0000000000000000000000000000000000000000..e44c43a7cc58ad319ee79e4d5e5f162aa2bd2074 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_nal_unit.c @@ -0,0 +1,117 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. External compiler flags + 3. Module defines + 4. Local function prototypes + 5. Functions + h264bsdDecodeNalUnit + +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#include "h264bsd_nal_unit.h" +#include "h264bsd_util.h" + +/*------------------------------------------------------------------------------ + 2. External compiler flags +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- + 3. Module defines +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 4. Local function prototypes +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + + Function name: h264bsdDecodeNalUnit + + Functional description: + Decode NAL unit header information + + Inputs: + pStrmData pointer to stream data structure + + Outputs: + pNalUnit NAL unit header information is stored here + + Returns: + HANTRO_OK success + HANTRO_NOK invalid NAL unit header information + +------------------------------------------------------------------------------*/ + +u32 h264bsdDecodeNalUnit(strmData_t *pStrmData, nalUnit_t *pNalUnit) +{ + +/* Variables */ + + u32 tmp; + +/* Code */ + + ASSERT(pStrmData); + ASSERT(pNalUnit); + ASSERT(pStrmData->bitPosInWord == 0); + + /* forbidden_zero_bit (not checked to be zero, errors ignored) */ + tmp = h264bsdGetBits(pStrmData, 1); + /* Assuming that NAL unit starts from byte boundary ­> don't have to check + * following 7 bits for END_OF_STREAM */ + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + + tmp = h264bsdGetBits(pStrmData, 2); + pNalUnit->nalRefIdc = tmp; + + tmp = h264bsdGetBits(pStrmData, 5); + pNalUnit->nalUnitType = (nalUnitType_e)tmp; + + /* data partitioning NAL units not supported */ + if ( (tmp == 2) || (tmp == 3) || (tmp == 4) ) + { + return(HANTRO_NOK); + } + + /* nal_ref_idc shall not be zero for these nal_unit_types */ + if ( ( (tmp == NAL_SEQ_PARAM_SET) || (tmp == NAL_PIC_PARAM_SET) || + (tmp == NAL_CODED_SLICE_IDR) ) && (pNalUnit->nalRefIdc == 0) ) + { + return(HANTRO_NOK); + } + /* nal_ref_idc shall be zero for these nal_unit_types */ + else if ( ( (tmp == NAL_SEI) || (tmp == NAL_ACCESS_UNIT_DELIMITER) || + (tmp == NAL_END_OF_SEQUENCE) || (tmp == NAL_END_OF_STREAM) || + (tmp == NAL_FILLER_DATA) ) && (pNalUnit->nalRefIdc != 0) ) + { + return(HANTRO_NOK); + } + + return(HANTRO_OK); + +} + diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_nal_unit.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_nal_unit.h new file mode 100755 index 0000000000000000000000000000000000000000..38957bf73c6c4d365d15798fc7403b71686caa0b --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_nal_unit.h @@ -0,0 +1,76 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. Module defines + 3. Data types + 4. Function prototypes + +------------------------------------------------------------------------------*/ + +#ifndef H264SWDEC_NAL_UNIT_H +#define H264SWDEC_NAL_UNIT_H + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#include "basetype.h" +#include "h264bsd_stream.h" + +/*------------------------------------------------------------------------------ + 2. Module defines +------------------------------------------------------------------------------*/ + +/* macro to determine if NAL unit pointed by pNalUnit contains an IDR slice */ +#define IS_IDR_NAL_UNIT(pNalUnit) \ + ((pNalUnit)->nalUnitType == NAL_CODED_SLICE_IDR) + +/*------------------------------------------------------------------------------ + 3. Data types +------------------------------------------------------------------------------*/ + +typedef enum { + NAL_CODED_SLICE = 1, + NAL_CODED_SLICE_IDR = 5, + NAL_SEI = 6, + NAL_SEQ_PARAM_SET = 7, + NAL_PIC_PARAM_SET = 8, + NAL_ACCESS_UNIT_DELIMITER = 9, + NAL_END_OF_SEQUENCE = 10, + NAL_END_OF_STREAM = 11, + NAL_FILLER_DATA = 12, + NAL_MAX_TYPE_VALUE = 31 +} nalUnitType_e; + +typedef struct +{ + nalUnitType_e nalUnitType; + u32 nalRefIdc; +} nalUnit_t; + +/*------------------------------------------------------------------------------ + 4. Function prototypes +------------------------------------------------------------------------------*/ + +u32 h264bsdDecodeNalUnit(strmData_t *pStrmData, nalUnit_t *pNalUnit); + +#endif /* #ifdef H264SWDEC_NAL_UNIT_H */ + diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_neighbour.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_neighbour.c new file mode 100755 index 0000000000000000000000000000000000000000..ce5eeff76e7a05c811c7d9359f3c60b7c1a9a9a2 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_neighbour.c @@ -0,0 +1,382 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. External compiler flags + 3. Module defines + 4. Local function prototypes + 5. Functions + h264bsdInitMbNeighbours + h264bsdGetNeighbourMb + h264bsdNeighbour4x4BlockA + h264bsdNeighbour4x4BlockB + h264bsdNeighbour4x4BlockC + h264bsdNeighbour4x4BlockD + +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#include "h264bsd_neighbour.h" +#include "h264bsd_util.h" + +/*------------------------------------------------------------------------------ + 2. External compiler flags +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- + 3. Module defines +------------------------------------------------------------------------------*/ + +/* Following four tables indicate neighbours of each block of a macroblock. + * First 16 values are for luma blocks, next 4 values for Cb and last 4 + * values for Cr. Elements of the table indicate to which macroblock the + * neighbour block belongs and the index of the neighbour block in question. + * Indexing of the blocks goes as follows + * + * Y Cb Cr + * 0 1 4 5 16 17 20 21 + * 2 3 6 7 18 19 22 23 + * 8 9 12 13 + * 10 11 14 15 + */ + +/* left neighbour for each block */ +static const neighbour_t N_A_4x4B[24] = { + {MB_A,5}, {MB_CURR,0}, {MB_A,7}, {MB_CURR,2}, + {MB_CURR,1}, {MB_CURR,4}, {MB_CURR,3}, {MB_CURR,6}, + {MB_A,13}, {MB_CURR,8}, {MB_A,15}, {MB_CURR,10}, + {MB_CURR,9}, {MB_CURR,12},{MB_CURR,11},{MB_CURR,14}, + {MB_A,17}, {MB_CURR,16},{MB_A,19}, {MB_CURR,18}, + {MB_A,21}, {MB_CURR,20},{MB_A,23}, {MB_CURR,22} }; + +/* above neighbour for each block */ +static const neighbour_t N_B_4x4B[24] = { + {MB_B,10}, {MB_B,11}, {MB_CURR,0}, {MB_CURR,1}, + {MB_B,14}, {MB_B,15}, {MB_CURR,4}, {MB_CURR,5}, + {MB_CURR,2}, {MB_CURR,3}, {MB_CURR,8}, {MB_CURR,9}, + {MB_CURR,6}, {MB_CURR,7}, {MB_CURR,12},{MB_CURR,13}, + {MB_B,18}, {MB_B,19}, {MB_CURR,16},{MB_CURR,17}, + {MB_B,22}, {MB_B,23}, {MB_CURR,20},{MB_CURR,21} }; + +/* above-right neighbour for each block */ +static const neighbour_t N_C_4x4B[24] = { + {MB_B,11}, {MB_B,14}, {MB_CURR,1}, {MB_NA,4}, + {MB_B,15}, {MB_C,10}, {MB_CURR,5}, {MB_NA,0}, + {MB_CURR,3}, {MB_CURR,6}, {MB_CURR,9}, {MB_NA,12}, + {MB_CURR,7}, {MB_NA,2}, {MB_CURR,13},{MB_NA,8}, + {MB_B,19}, {MB_C,18}, {MB_CURR,17},{MB_NA,16}, + {MB_B,23}, {MB_C,22}, {MB_CURR,21},{MB_NA,20} }; + +/* above-left neighbour for each block */ +static const neighbour_t N_D_4x4B[24] = { + {MB_D,15}, {MB_B,10}, {MB_A,5}, {MB_CURR,0}, + {MB_B,11}, {MB_B,14}, {MB_CURR,1}, {MB_CURR,4}, + {MB_A,7}, {MB_CURR,2}, {MB_A,13}, {MB_CURR,8}, + {MB_CURR,3}, {MB_CURR,6}, {MB_CURR,9}, {MB_CURR,12}, + {MB_D,19}, {MB_B,18}, {MB_A,17}, {MB_CURR,16}, + {MB_D,23}, {MB_B,22}, {MB_A,21}, {MB_CURR,20} }; + +/*------------------------------------------------------------------------------ + 4. Local function prototypes +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + + Function: h264bsdInitMbNeighbours + + Functional description: + Initialize macroblock neighbours. Function sets neighbour + macroblock pointers in macroblock structures to point to + macroblocks on the left, above, above-right and above-left. + Pointers are set NULL if the neighbour does not fit into the + picture. + + Inputs: + picWidth width of the picture in macroblocks + picSizeInMbs no need to clarify + + Outputs: + pMbStorage neighbour pointers of each mbStorage structure + stored here + + Returns: + none + +------------------------------------------------------------------------------*/ + +void h264bsdInitMbNeighbours(mbStorage_t *pMbStorage, u32 picWidth, + u32 picSizeInMbs) +{ + +/* Variables */ + + u32 i, row, col; + +/* Code */ + + ASSERT(pMbStorage); + ASSERT(picWidth); + ASSERT(picWidth <= picSizeInMbs); + ASSERT(((picSizeInMbs / picWidth) * picWidth) == picSizeInMbs); + + row = col = 0; + + for (i = 0; i < picSizeInMbs; i++) + { + + if (col) + pMbStorage[i].mbA = pMbStorage + i - 1; + else + pMbStorage[i].mbA = NULL; + + if (row) + pMbStorage[i].mbB = pMbStorage + i - picWidth; + else + pMbStorage[i].mbB = NULL; + + if (row && (col < picWidth - 1)) + pMbStorage[i].mbC = pMbStorage + i - (picWidth - 1); + else + pMbStorage[i].mbC = NULL; + + if (row && col) + pMbStorage[i].mbD = pMbStorage + i - (picWidth + 1); + else + pMbStorage[i].mbD = NULL; + + col++; + if (col == picWidth) + { + col = 0; + row++; + } + } + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdGetNeighbourMb + + Functional description: + Get pointer to neighbour macroblock. + + Inputs: + pMb pointer to macroblock structure of the macroblock + whose neighbour is wanted + neighbour indicates which neighbour is wanted + + Outputs: + none + + Returns: + pointer to neighbour macroblock + NULL if not available + +------------------------------------------------------------------------------*/ + +mbStorage_t* h264bsdGetNeighbourMb(mbStorage_t *pMb, neighbourMb_e neighbour) +{ + +/* Variables */ + + +/* Code */ + + ASSERT((neighbour <= MB_CURR) || (neighbour == MB_NA)); + + if (neighbour == MB_A) + return(pMb->mbA); + else if (neighbour == MB_B) + return(pMb->mbB); + else if (neighbour == MB_C) + return(pMb->mbC); + else if (neighbour == MB_D) + return(pMb->mbD); + else if (neighbour == MB_CURR) + return(pMb); + else + return(NULL); + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdNeighbour4x4BlockA + + Functional description: + Get left neighbour of the block. Function returns pointer to + the table defined in the beginning of the file. + + Inputs: + blockIndex indicates the block whose neighbours are wanted + + Outputs: + + Returns: + pointer to neighbour structure + +------------------------------------------------------------------------------*/ + +const neighbour_t* h264bsdNeighbour4x4BlockA(u32 blockIndex) +{ + +/* Variables */ + +/* Code */ + + ASSERT(blockIndex < 24); + + return(N_A_4x4B+blockIndex); + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdNeighbour4x4BlockB + + Functional description: + Get above neighbour of the block. Function returns pointer to + the table defined in the beginning of the file. + + Inputs: + blockIndex indicates the block whose neighbours are wanted + + Outputs: + + Returns: + pointer to neighbour structure + +------------------------------------------------------------------------------*/ + +const neighbour_t* h264bsdNeighbour4x4BlockB(u32 blockIndex) +{ + +/* Variables */ + +/* Code */ + + ASSERT(blockIndex < 24); + + return(N_B_4x4B+blockIndex); + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdNeighbour4x4BlockC + + Functional description: + Get above-right neighbour of the block. Function returns pointer + to the table defined in the beginning of the file. + + Inputs: + blockIndex indicates the block whose neighbours are wanted + + Outputs: + + Returns: + pointer to neighbour structure + +------------------------------------------------------------------------------*/ + +const neighbour_t* h264bsdNeighbour4x4BlockC(u32 blockIndex) +{ + +/* Variables */ + +/* Code */ + + ASSERT(blockIndex < 24); + + return(N_C_4x4B+blockIndex); + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdNeighbour4x4BlockD + + Functional description: + Get above-left neighbour of the block. Function returns pointer to + the table defined in the beginning of the file. + + Inputs: + blockIndex indicates the block whose neighbours are wanted + + Outputs: + + Returns: + pointer to neighbour structure + +------------------------------------------------------------------------------*/ + +const neighbour_t* h264bsdNeighbour4x4BlockD(u32 blockIndex) +{ + +/* Variables */ + +/* Code */ + + ASSERT(blockIndex < 24); + + return(N_D_4x4B+blockIndex); + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdIsNeighbourAvailable + + Functional description: + Check if neighbour macroblock is available. Neighbour macroblock + is considered available if it is within the picture and belongs + to the same slice as the current macroblock. + + Inputs: + pMb pointer to the current macroblock + pNeighbour pointer to the neighbour macroblock + + Outputs: + none + + Returns: + TRUE neighbour is available + FALSE neighbour is not available + +------------------------------------------------------------------------------*/ + +u32 h264bsdIsNeighbourAvailable(mbStorage_t *pMb, mbStorage_t *pNeighbour) +{ + +/* Variables */ + +/* Code */ + + if ( (pNeighbour == NULL) || (pMb->sliceId != pNeighbour->sliceId) ) + return(HANTRO_FALSE); + else + return(HANTRO_TRUE); + +} + diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_neighbour.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_neighbour.h new file mode 100755 index 0000000000000000000000000000000000000000..fce0ad19ef06d4d1a9ac3298160656a962eb14e1 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_neighbour.h @@ -0,0 +1,78 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. Module defines + 3. Data types + 4. Function prototypes + +------------------------------------------------------------------------------*/ + +#ifndef H264SWDEC_NEIGHBOUR_H +#define H264SWDEC_NEIGHBOUR_H + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#include "basetype.h" +#include "h264bsd_macroblock_layer.h" + +/*------------------------------------------------------------------------------ + 2. Module defines +------------------------------------------------------------------------------*/ + +typedef enum { + MB_A = 0, + MB_B, + MB_C, + MB_D, + MB_CURR, + MB_NA = 0xFF +} neighbourMb_e; + +/*------------------------------------------------------------------------------ + 3. Data types +------------------------------------------------------------------------------*/ + +typedef struct +{ + neighbourMb_e mb; + u8 index; +} neighbour_t; + +/*------------------------------------------------------------------------------ + 4. Function prototypes +------------------------------------------------------------------------------*/ + +void h264bsdInitMbNeighbours(mbStorage_t *pMbStorage, u32 picWidth, + u32 picSizeInMbs); + +mbStorage_t* h264bsdGetNeighbourMb(mbStorage_t *pMb, neighbourMb_e neighbour); + +u32 h264bsdIsNeighbourAvailable(mbStorage_t *pMb, mbStorage_t *pNeighbour); + +const neighbour_t* h264bsdNeighbour4x4BlockA(u32 blockIndex); +const neighbour_t* h264bsdNeighbour4x4BlockB(u32 blockIndex); +const neighbour_t* h264bsdNeighbour4x4BlockC(u32 blockIndex); +const neighbour_t* h264bsdNeighbour4x4BlockD(u32 blockIndex); + +#endif /* #ifdef H264SWDEC_NEIGHBOUR_H */ + diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_pic_order_cnt.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_pic_order_cnt.c new file mode 100755 index 0000000000000000000000000000000000000000..fb23352d2b2abb47deac63850d7ff11aa4848437 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_pic_order_cnt.c @@ -0,0 +1,347 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. External compiler flags + 3. Module defines + 4. Local function prototypes + 5. Functions + h264bsdDecodePicOrderCnt + +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#include "h264bsd_util.h" +#include "h264bsd_pic_order_cnt.h" + +/*------------------------------------------------------------------------------ + 2. External compiler flags +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- + 3. Module defines +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 4. Local function prototypes +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + + Function: h264bsdDecodePicOrderCnt + + Functional description: + Compute picture order count for a picture. Function implements + computation of all POC types (0, 1 and 2), type is obtained from + sps. See standard for description of the POC types and how POC is + computed for each type. + + Function returns the minimum of top field and bottom field pic + order counts. + + Inputs: + poc pointer to previous results + sps pointer to sequence parameter set + slicHeader pointer to current slice header, frame number and + other params needed for POC computation + pNalUnit pointer to current NAL unit structrue, function needs + to know if this is an IDR picture and also if this is + a reference picture + + Outputs: + poc results stored here for computation of next POC + + Returns: + picture order count + +------------------------------------------------------------------------------*/ + +i32 h264bsdDecodePicOrderCnt(pocStorage_t *poc, seqParamSet_t *sps, + sliceHeader_t *pSliceHeader, nalUnit_t *pNalUnit) +{ + +/* Variables */ + + u32 i; + i32 picOrderCnt; + u32 frameNumOffset, absFrameNum, picOrderCntCycleCnt; + u32 frameNumInPicOrderCntCycle; + i32 expectedDeltaPicOrderCntCycle; + u32 containsMmco5; + +/* Code */ + + ASSERT(poc); + ASSERT(sps); + ASSERT(pSliceHeader); + ASSERT(pNalUnit); + ASSERT(sps->picOrderCntType <= 2); + +#if 0 + /* JanSa: I don't think this is necessary, don't see any reason to + * increment prevFrameNum one by one instead of one big increment. + * However, standard specifies that this should be done -> if someone + * figures out any case when the outcome would be different for step by + * step increment, this part of the code should be enabled */ + + /* if there was a gap in frame numbering and picOrderCntType is 1 or 2 -> + * "compute" pic order counts for non-existing frames. These are not + * actually computed, but process needs to be done to update the + * prevFrameNum and prevFrameNumOffset */ + if ( sps->picOrderCntType > 0 && + pSliceHeader->frameNum != poc->prevFrameNum && + pSliceHeader->frameNum != ((poc->prevFrameNum + 1) % sps->maxFrameNum)) + { + + /* use variable i for unUsedShortTermFrameNum */ + i = (poc->prevFrameNum + 1) % sps->maxFrameNum; + + do + { + if (poc->prevFrameNum > i) + frameNumOffset = poc->prevFrameNumOffset + sps->maxFrameNum; + else + frameNumOffset = poc->prevFrameNumOffset; + + poc->prevFrameNumOffset = frameNumOffset; + poc->prevFrameNum = i; + + i = (i + 1) % sps->maxFrameNum; + + } while (i != pSliceHeader->frameNum); + } +#endif + + /* check if current slice includes mmco equal to 5 */ + containsMmco5 = HANTRO_FALSE; + if (pSliceHeader->decRefPicMarking.adaptiveRefPicMarkingModeFlag) + { + i = 0; + while (pSliceHeader->decRefPicMarking.operation[i]. + memoryManagementControlOperation) + { + if (pSliceHeader->decRefPicMarking.operation[i]. + memoryManagementControlOperation == 5) + { + containsMmco5 = HANTRO_TRUE; + break; + } + i++; + } + } + switch (sps->picOrderCntType) + { + + case 0: + /* set prevPicOrderCnt values for IDR frame */ + if (IS_IDR_NAL_UNIT(pNalUnit)) + { + poc->prevPicOrderCntMsb = 0; + poc->prevPicOrderCntLsb = 0; + } + + /* compute picOrderCntMsb (stored in picOrderCnt variable) */ + if ( (pSliceHeader->picOrderCntLsb < poc->prevPicOrderCntLsb) && + ((poc->prevPicOrderCntLsb - pSliceHeader->picOrderCntLsb) >= + sps->maxPicOrderCntLsb/2) ) + { + picOrderCnt = poc->prevPicOrderCntMsb + + (i32)sps->maxPicOrderCntLsb; + } + else if ((pSliceHeader->picOrderCntLsb > poc->prevPicOrderCntLsb) && + ((pSliceHeader->picOrderCntLsb - poc->prevPicOrderCntLsb) > + sps->maxPicOrderCntLsb/2) ) + { + picOrderCnt = poc->prevPicOrderCntMsb - + (i32)sps->maxPicOrderCntLsb; + } + else + picOrderCnt = poc->prevPicOrderCntMsb; + + /* standard specifies that prevPicOrderCntMsb is from previous + * rererence frame -> replace old value only if current frame is + * rererence frame */ + if (pNalUnit->nalRefIdc) + poc->prevPicOrderCntMsb = picOrderCnt; + + /* compute top field order cnt (stored in picOrderCnt) */ + picOrderCnt += (i32)pSliceHeader->picOrderCntLsb; + + /* if delta for bottom field is negative -> bottom will be the + * minimum pic order count */ + if (pSliceHeader->deltaPicOrderCntBottom < 0) + picOrderCnt += pSliceHeader->deltaPicOrderCntBottom; + + /* standard specifies that prevPicOrderCntLsb is from previous + * rererence frame -> replace old value only if current frame is + * rererence frame */ + if (pNalUnit->nalRefIdc) + { + /* if current frame contains mmco5 -> modify values to be + * stored */ + if (containsMmco5) + { + poc->prevPicOrderCntMsb = 0; + /* prevPicOrderCntLsb should be the top field picOrderCnt + * if previous frame included mmco5. Top field picOrderCnt + * for frames containing mmco5 is obtained by subtracting + * the picOrderCnt from original top field order count -> + * value is zero if top field was the minimum, i.e. delta + * for bottom was positive, otherwise value is + * -deltaPicOrderCntBottom */ + if (pSliceHeader->deltaPicOrderCntBottom < 0) + poc->prevPicOrderCntLsb = + (u32)(-pSliceHeader->deltaPicOrderCntBottom); + else + poc->prevPicOrderCntLsb = 0; + picOrderCnt = 0; + } + else + { + poc->prevPicOrderCntLsb = pSliceHeader->picOrderCntLsb; + } + } + + break; + + case 1: + + /* step 1 (in the description in the standard) */ + if (IS_IDR_NAL_UNIT(pNalUnit)) + frameNumOffset = 0; + else if (poc->prevFrameNum > pSliceHeader->frameNum) + frameNumOffset = poc->prevFrameNumOffset + sps->maxFrameNum; + else + frameNumOffset = poc->prevFrameNumOffset; + + /* step 2 */ + if (sps->numRefFramesInPicOrderCntCycle) + absFrameNum = frameNumOffset + pSliceHeader->frameNum; + else + absFrameNum = 0; + + if (pNalUnit->nalRefIdc == 0 && absFrameNum > 0) + absFrameNum -= 1; + + /* step 3 */ + if (absFrameNum > 0) + { + picOrderCntCycleCnt = + (absFrameNum - 1)/sps->numRefFramesInPicOrderCntCycle; + frameNumInPicOrderCntCycle = + (absFrameNum - 1)%sps->numRefFramesInPicOrderCntCycle; + } + + /* step 4 */ + expectedDeltaPicOrderCntCycle = 0; + for (i = 0; i < sps->numRefFramesInPicOrderCntCycle; i++) + expectedDeltaPicOrderCntCycle += sps->offsetForRefFrame[i]; + + /* step 5 (picOrderCnt used to store expectedPicOrderCnt) */ + /*lint -esym(644,picOrderCntCycleCnt) always initialized */ + /*lint -esym(644,frameNumInPicOrderCntCycle) always initialized */ + if (absFrameNum > 0) + { + picOrderCnt = + (i32)picOrderCntCycleCnt * expectedDeltaPicOrderCntCycle; + for (i = 0; i <= frameNumInPicOrderCntCycle; i++) + picOrderCnt += sps->offsetForRefFrame[i]; + } + else + picOrderCnt = 0; + + if (pNalUnit->nalRefIdc == 0) + picOrderCnt += sps->offsetForNonRefPic; + + /* step 6 (picOrderCnt is top field order cnt if delta for bottom + * is positive, otherwise it is bottom field order cnt) */ + picOrderCnt += pSliceHeader->deltaPicOrderCnt[0]; + + if ( (sps->offsetForTopToBottomField + + pSliceHeader->deltaPicOrderCnt[1]) < 0 ) + { + picOrderCnt += sps->offsetForTopToBottomField + + pSliceHeader->deltaPicOrderCnt[1]; + } + + /* if current picture contains mmco5 -> set prevFrameNumOffset and + * prevFrameNum to 0 for computation of picOrderCnt of next + * frame, otherwise store frameNum and frameNumOffset to poc + * structure */ + if (!containsMmco5) + { + poc->prevFrameNumOffset = frameNumOffset; + poc->prevFrameNum = pSliceHeader->frameNum; + } + else + { + poc->prevFrameNumOffset = 0; + poc->prevFrameNum = 0; + picOrderCnt = 0; + } + break; + + default: /* case 2 */ + /* derive frameNumOffset */ + if (IS_IDR_NAL_UNIT(pNalUnit)) + frameNumOffset = 0; + else if (poc->prevFrameNum > pSliceHeader->frameNum) + frameNumOffset = poc->prevFrameNumOffset + sps->maxFrameNum; + else + frameNumOffset = poc->prevFrameNumOffset; + + /* derive picOrderCnt (type 2 has same value for top and bottom + * field order cnts) */ + if (IS_IDR_NAL_UNIT(pNalUnit)) + picOrderCnt = 0; + else if (pNalUnit->nalRefIdc == 0) + picOrderCnt = + 2 * (i32)(frameNumOffset + pSliceHeader->frameNum) - 1; + else + picOrderCnt = + 2 * (i32)(frameNumOffset + pSliceHeader->frameNum); + + /* if current picture contains mmco5 -> set prevFrameNumOffset and + * prevFrameNum to 0 for computation of picOrderCnt of next + * frame, otherwise store frameNum and frameNumOffset to poc + * structure */ + if (!containsMmco5) + { + poc->prevFrameNumOffset = frameNumOffset; + poc->prevFrameNum = pSliceHeader->frameNum; + } + else + { + poc->prevFrameNumOffset = 0; + poc->prevFrameNum = 0; + picOrderCnt = 0; + } + break; + + } + + /*lint -esym(644,picOrderCnt) always initialized */ + return(picOrderCnt); + +} + diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_pic_order_cnt.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_pic_order_cnt.h new file mode 100755 index 0000000000000000000000000000000000000000..19741eb3161fa56da0440afe35a73391a59d54ff --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_pic_order_cnt.h @@ -0,0 +1,68 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. Module defines + 3. Data types + 4. Function prototypes + +------------------------------------------------------------------------------*/ + +#ifndef H264SWDEC_PIC_ORDER_CNT_H +#define H264SWDEC_PIC_ORDER_CNT_H + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#include "basetype.h" +#include "h264bsd_seq_param_set.h" +#include "h264bsd_slice_header.h" +#include "h264bsd_nal_unit.h" + +/*------------------------------------------------------------------------------ + 2. Module defines +------------------------------------------------------------------------------*/ + + +/*------------------------------------------------------------------------------ + 3. Data types +------------------------------------------------------------------------------*/ + +/* structure to store information computed for previous picture, needed for + * POC computation of a picture. Two first fields for POC type 0, last two + * for types 1 and 2 */ +typedef struct +{ + u32 prevPicOrderCntLsb; + i32 prevPicOrderCntMsb; + u32 prevFrameNum; + u32 prevFrameNumOffset; +} pocStorage_t; + +/*------------------------------------------------------------------------------ + 4. Function prototypes +------------------------------------------------------------------------------*/ + +i32 h264bsdDecodePicOrderCnt(pocStorage_t *poc, seqParamSet_t *sps, + sliceHeader_t *sliceHeader, nalUnit_t *pNalUnit); + +#endif /* #ifdef H264SWDEC_PIC_ORDER_CNT_H */ + diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_pic_param_set.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_pic_param_set.c new file mode 100755 index 0000000000000000000000000000000000000000..e04dea4b4ea957b220db681040ed09f13453b4cb --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_pic_param_set.c @@ -0,0 +1,335 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. External compiler flags + 3. Module defines + 4. Local function prototypes + 5. Functions + h264bsdDecodePicParamSet + +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#include "h264bsd_pic_param_set.h" +#include "h264bsd_util.h" +#include "h264bsd_vlc.h" +#include "h264bsd_cfg.h" + +/*------------------------------------------------------------------------------ + 2. External compiler flags +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- + 3. Module defines +------------------------------------------------------------------------------*/ + +/* lookup table for ceil(log2(numSliceGroups)), i.e. number of bits needed to + * represent range [0, numSliceGroups) + * + * NOTE: if MAX_NUM_SLICE_GROUPS is higher than 8 this table has to be resized + * accordingly */ +static const u32 CeilLog2NumSliceGroups[8] = {1, 1, 2, 2, 3, 3, 3, 3}; + +/*------------------------------------------------------------------------------ + 4. Local function prototypes +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + + Function name: h264bsdDecodePicParamSet + + Functional description: + Decode picture parameter set information from the stream. + + Function allocates memory for + - run lengths if slice group map type is 0 + - top-left and bottom-right arrays if map type is 2 + - for slice group ids if map type is 6 + + Validity of some of the slice group mapping information depends + on the image dimensions which are not known here. Therefore the + validity has to be checked afterwards, currently in the parameter + set activation phase. + + Inputs: + pStrmData pointer to stream data structure + + Outputs: + pPicParamSet decoded information is stored here + + Returns: + HANTRO_OK success + HANTRO_NOK failure, invalid information or end of stream + MEMORY_ALLOCATION_ERROR for memory allocation failure + + +------------------------------------------------------------------------------*/ + +u32 h264bsdDecodePicParamSet(strmData_t *pStrmData, picParamSet_t *pPicParamSet) +{ + +/* Variables */ + + u32 tmp, i, value; + i32 itmp; + +/* Code */ + + ASSERT(pStrmData); + ASSERT(pPicParamSet); + + + H264SwDecMemset(pPicParamSet, 0, sizeof(picParamSet_t)); + + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, + &pPicParamSet->picParameterSetId); + if (tmp != HANTRO_OK) + return(tmp); + if (pPicParamSet->picParameterSetId >= MAX_NUM_PIC_PARAM_SETS) + { + EPRINT("pic_parameter_set_id"); + return(HANTRO_NOK); + } + + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, + &pPicParamSet->seqParameterSetId); + if (tmp != HANTRO_OK) + return(tmp); + if (pPicParamSet->seqParameterSetId >= MAX_NUM_SEQ_PARAM_SETS) + { + EPRINT("seq_param_set_id"); + return(HANTRO_NOK); + } + + /* entropy_coding_mode_flag, shall be 0 for baseline profile */ + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp) + { + EPRINT("entropy_coding_mode_flag"); + return(HANTRO_NOK); + } + + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pPicParamSet->picOrderPresentFlag = (tmp == 1) ? HANTRO_TRUE : HANTRO_FALSE; + + /* num_slice_groups_minus1 */ + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value); + if (tmp != HANTRO_OK) + return(tmp); + pPicParamSet->numSliceGroups = value + 1; + if (pPicParamSet->numSliceGroups > MAX_NUM_SLICE_GROUPS) + { + EPRINT("num_slice_groups_minus1"); + return(HANTRO_NOK); + } + + /* decode slice group mapping information if more than one slice groups */ + if (pPicParamSet->numSliceGroups > 1) + { + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, + &pPicParamSet->sliceGroupMapType); + if (tmp != HANTRO_OK) + return(tmp); + if (pPicParamSet->sliceGroupMapType > 6) + { + EPRINT("slice_group_map_type"); + return(HANTRO_NOK); + } + + if (pPicParamSet->sliceGroupMapType == 0) + { + ALLOCATE(pPicParamSet->runLength, + pPicParamSet->numSliceGroups, u32); + if (pPicParamSet->runLength == NULL) + return(MEMORY_ALLOCATION_ERROR); + for (i = 0; i < pPicParamSet->numSliceGroups; i++) + { + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value); + if (tmp != HANTRO_OK) + return(tmp); + pPicParamSet->runLength[i] = value+1; + /* param values checked in CheckPps() */ + } + } + else if (pPicParamSet->sliceGroupMapType == 2) + { + ALLOCATE(pPicParamSet->topLeft, + pPicParamSet->numSliceGroups - 1, u32); + ALLOCATE(pPicParamSet->bottomRight, + pPicParamSet->numSliceGroups - 1, u32); + if (pPicParamSet->topLeft == NULL || + pPicParamSet->bottomRight == NULL) + return(MEMORY_ALLOCATION_ERROR); + for (i = 0; i < pPicParamSet->numSliceGroups - 1; i++) + { + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value); + if (tmp != HANTRO_OK) + return(tmp); + pPicParamSet->topLeft[i] = value; + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value); + if (tmp != HANTRO_OK) + return(tmp); + pPicParamSet->bottomRight[i] = value; + /* param values checked in CheckPps() */ + } + } + else if ( (pPicParamSet->sliceGroupMapType == 3) || + (pPicParamSet->sliceGroupMapType == 4) || + (pPicParamSet->sliceGroupMapType == 5) ) + { + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pPicParamSet->sliceGroupChangeDirectionFlag = + (tmp == 1) ? HANTRO_TRUE : HANTRO_FALSE; + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value); + if (tmp != HANTRO_OK) + return(tmp); + pPicParamSet->sliceGroupChangeRate = value + 1; + /* param value checked in CheckPps() */ + } + else if (pPicParamSet->sliceGroupMapType == 6) + { + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value); + if (tmp != HANTRO_OK) + return(tmp); + pPicParamSet->picSizeInMapUnits = value + 1; + + ALLOCATE(pPicParamSet->sliceGroupId, + pPicParamSet->picSizeInMapUnits, u32); + if (pPicParamSet->sliceGroupId == NULL) + return(MEMORY_ALLOCATION_ERROR); + + /* determine number of bits needed to represent range + * [0, numSliceGroups) */ + tmp = CeilLog2NumSliceGroups[pPicParamSet->numSliceGroups-1]; + + for (i = 0; i < pPicParamSet->picSizeInMapUnits; i++) + { + pPicParamSet->sliceGroupId[i] = h264bsdGetBits(pStrmData, tmp); + if ( pPicParamSet->sliceGroupId[i] >= + pPicParamSet->numSliceGroups ) + { + EPRINT("slice_group_id"); + return(HANTRO_NOK); + } + } + } + } + + /* num_ref_idx_l0_active_minus1 */ + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value); + if (tmp != HANTRO_OK) + return(tmp); + if (value > 31) + { + EPRINT("num_ref_idx_l0_active_minus1"); + return(HANTRO_NOK); + } + pPicParamSet->numRefIdxL0Active = value + 1; + + /* num_ref_idx_l1_active_minus1 */ + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value); + if (tmp != HANTRO_OK) + return(tmp); + if (value > 31) + { + EPRINT("num_ref_idx_l1_active_minus1"); + return(HANTRO_NOK); + } + + /* weighted_pred_flag, this shall be 0 for baseline profile */ + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp) + { + EPRINT("weighted_pred_flag"); + return(HANTRO_NOK); + } + + /* weighted_bipred_idc */ + tmp = h264bsdGetBits(pStrmData, 2); + if (tmp > 2) + { + EPRINT("weighted_bipred_idc"); + return(HANTRO_NOK); + } + + /* pic_init_qp_minus26 */ + tmp = h264bsdDecodeExpGolombSigned(pStrmData, &itmp); + if (tmp != HANTRO_OK) + return(tmp); + if ((itmp < -26) || (itmp > 25)) + { + EPRINT("pic_init_qp_minus26"); + return(HANTRO_NOK); + } + pPicParamSet->picInitQp = (u32)(itmp + 26); + + /* pic_init_qs_minus26 */ + tmp = h264bsdDecodeExpGolombSigned(pStrmData, &itmp); + if (tmp != HANTRO_OK) + return(tmp); + if ((itmp < -26) || (itmp > 25)) + { + EPRINT("pic_init_qs_minus26"); + return(HANTRO_NOK); + } + + tmp = h264bsdDecodeExpGolombSigned(pStrmData, &itmp); + if (tmp != HANTRO_OK) + return(tmp); + if ((itmp < -12) || (itmp > 12)) + { + EPRINT("chroma_qp_index_offset"); + return(HANTRO_NOK); + } + pPicParamSet->chromaQpIndexOffset = itmp; + + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pPicParamSet->deblockingFilterControlPresentFlag = + (tmp == 1) ? HANTRO_TRUE : HANTRO_FALSE; + + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pPicParamSet->constrainedIntraPredFlag = (tmp == 1) ? + HANTRO_TRUE : HANTRO_FALSE; + + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pPicParamSet->redundantPicCntPresentFlag = (tmp == 1) ? + HANTRO_TRUE : HANTRO_FALSE; + + tmp = h264bsdRbspTrailingBits(pStrmData); + + /* ignore possible errors in trailing bits of parameters sets */ + return(HANTRO_OK); + +} + diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_pic_param_set.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_pic_param_set.h new file mode 100755 index 0000000000000000000000000000000000000000..63286380221752110a1b0c1445869b133ad34124 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_pic_param_set.h @@ -0,0 +1,77 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. Module defines + 3. Data types + 4. Function prototypes + +------------------------------------------------------------------------------*/ + +#ifndef H264SWDEC_PIC_PARAM_SET_H +#define H264SWDEC_PIC_PARAM_SET_H + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#include "basetype.h" +#include "h264bsd_stream.h" + +/*------------------------------------------------------------------------------ + 2. Module defines +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 3. Data types +------------------------------------------------------------------------------*/ + +/* data structure to store PPS information decoded from the stream */ +typedef struct +{ + u32 picParameterSetId; + u32 seqParameterSetId; + u32 picOrderPresentFlag; + u32 numSliceGroups; + u32 sliceGroupMapType; + u32 *runLength; + u32 *topLeft; + u32 *bottomRight; + u32 sliceGroupChangeDirectionFlag; + u32 sliceGroupChangeRate; + u32 picSizeInMapUnits; + u32 *sliceGroupId; + u32 numRefIdxL0Active; + u32 picInitQp; + i32 chromaQpIndexOffset; + u32 deblockingFilterControlPresentFlag; + u32 constrainedIntraPredFlag; + u32 redundantPicCntPresentFlag; +} picParamSet_t; + +/*------------------------------------------------------------------------------ + 4. Function prototypes +------------------------------------------------------------------------------*/ + +u32 h264bsdDecodePicParamSet(strmData_t *pStrmData, + picParamSet_t *pPicParamSet); + +#endif /* #ifdef H264SWDEC_PIC_PARAM_SET_H */ + diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_reconstruct.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_reconstruct.c new file mode 100755 index 0000000000000000000000000000000000000000..c94877623ddfe8addd4680b6f245d75178f1f083 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_reconstruct.c @@ -0,0 +1,2315 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. External compiler flags + 3. Module defines + 4. Local function prototypes + 5. Functions + +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#include "basetype.h" +#include "h264bsd_reconstruct.h" +#include "h264bsd_macroblock_layer.h" +#include "h264bsd_image.h" +#include "h264bsd_util.h" + +#ifdef H264DEC_OMXDL +#include "omxtypes.h" +#include "omxVC.h" +#include "armVC.h" +#endif /* H264DEC_OMXDL */ + +/*------------------------------------------------------------------------------ + 2. External compiler flags +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- + 3. Module defines +------------------------------------------------------------------------------*/ + +/* Switch off the following Lint messages for this file: + * Info 701: Shift left of signed quantity (int) + * Info 702: Shift right of signed quantity (int) + */ +/*lint -e701 -e702 */ + +/* Luma fractional-sample positions + * + * G a b c H + * d e f g + * h i j k m + * n p q r + * M s N + * + * G, H, M and N are integer sample positions + * a-s are fractional samples that need to be interpolated. + */ +#ifndef H264DEC_OMXDL +static const u32 lumaFracPos[4][4] = { + /* G d h n a e i p b f j q c g k r */ + {0, 1, 2, 3}, {4, 5, 6, 7}, {8, 9, 10, 11}, {12, 13, 14, 15}}; +#endif /* H264DEC_OMXDL */ + +/* clipping table, defined in h264bsd_intra_prediction.c */ +extern const u8 h264bsdClip[]; + +/*------------------------------------------------------------------------------ + 4. Local function prototypes +------------------------------------------------------------------------------*/ + +#ifndef H264DEC_OMXDL + +/*------------------------------------------------------------------------------ + + Function: h264bsdInterpolateChromaHor + + Functional description: + This function performs chroma interpolation in horizontal direction. + Overfilling is done only if needed. Reference image (pRef) is + read at correct position and the predicted part is written to + macroblock's chrominance (predPartChroma) + Inputs: + pRef pointer to reference frame Cb top-left corner + x0 integer x-coordinate for prediction + y0 integer y-coordinate for prediction + width width of the reference frame chrominance in pixels + height height of the reference frame chrominance in pixels + xFrac horizontal fraction for prediction in 1/8 pixels + chromaPartWidth width of the predicted part in pixels + chromaPartHeight height of the predicted part in pixels + Outputs: + predPartChroma pointer where predicted part is written + +------------------------------------------------------------------------------*/ +#ifndef H264DEC_ARM11 +void h264bsdInterpolateChromaHor( + u8 *pRef, + u8 *predPartChroma, + i32 x0, + i32 y0, + u32 width, + u32 height, + u32 xFrac, + u32 chromaPartWidth, + u32 chromaPartHeight) +{ + +/* Variables */ + + u32 x, y, tmp1, tmp2, tmp3, tmp4, c, val; + u8 *ptrA, *cbr; + u32 comp; + u8 block[9*8*2]; + +/* Code */ + + ASSERT(predPartChroma); + ASSERT(chromaPartWidth); + ASSERT(chromaPartHeight); + ASSERT(xFrac < 8); + ASSERT(pRef); + + if ((x0 < 0) || ((u32)x0+chromaPartWidth+1 > width) || + (y0 < 0) || ((u32)y0+chromaPartHeight > height)) + { + h264bsdFillBlock(pRef, block, x0, y0, width, height, + chromaPartWidth + 1, chromaPartHeight, chromaPartWidth + 1); + pRef += width * height; + h264bsdFillBlock(pRef, block + (chromaPartWidth+1)*chromaPartHeight, + x0, y0, width, height, chromaPartWidth + 1, + chromaPartHeight, chromaPartWidth + 1); + + pRef = block; + x0 = 0; + y0 = 0; + width = chromaPartWidth+1; + height = chromaPartHeight; + } + + val = 8 - xFrac; + + for (comp = 0; comp <= 1; comp++) + { + + ptrA = pRef + (comp * height + (u32)y0) * width + x0; + cbr = predPartChroma + comp * 8 * 8; + + /* 2x2 pels per iteration + * bilinear horizontal interpolation */ + for (y = (chromaPartHeight >> 1); y; y--) + { + for (x = (chromaPartWidth >> 1); x; x--) + { + tmp1 = ptrA[width]; + tmp2 = *ptrA++; + tmp3 = ptrA[width]; + tmp4 = *ptrA++; + c = ((val * tmp1 + xFrac * tmp3) << 3) + 32; + c >>= 6; + cbr[8] = (u8)c; + c = ((val * tmp2 + xFrac * tmp4) << 3) + 32; + c >>= 6; + *cbr++ = (u8)c; + tmp1 = ptrA[width]; + tmp2 = *ptrA; + c = ((val * tmp3 + xFrac * tmp1) << 3) + 32; + c >>= 6; + cbr[8] = (u8)c; + c = ((val * tmp4 + xFrac * tmp2) << 3) + 32; + c >>= 6; + *cbr++ = (u8)c; + } + cbr += 2*8 - chromaPartWidth; + ptrA += 2*width - chromaPartWidth; + } + } + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdInterpolateChromaVer + + Functional description: + This function performs chroma interpolation in vertical direction. + Overfilling is done only if needed. Reference image (pRef) is + read at correct position and the predicted part is written to + macroblock's chrominance (predPartChroma) + +------------------------------------------------------------------------------*/ + +void h264bsdInterpolateChromaVer( + u8 *pRef, + u8 *predPartChroma, + i32 x0, + i32 y0, + u32 width, + u32 height, + u32 yFrac, + u32 chromaPartWidth, + u32 chromaPartHeight) +{ + +/* Variables */ + + u32 x, y, tmp1, tmp2, tmp3, c, val; + u8 *ptrA, *cbr; + u32 comp; + u8 block[9*8*2]; + +/* Code */ + + ASSERT(predPartChroma); + ASSERT(chromaPartWidth); + ASSERT(chromaPartHeight); + ASSERT(yFrac < 8); + ASSERT(pRef); + + if ((x0 < 0) || ((u32)x0+chromaPartWidth > width) || + (y0 < 0) || ((u32)y0+chromaPartHeight+1 > height)) + { + h264bsdFillBlock(pRef, block, x0, y0, width, height, chromaPartWidth, + chromaPartHeight + 1, chromaPartWidth); + pRef += width * height; + h264bsdFillBlock(pRef, block + chromaPartWidth*(chromaPartHeight+1), + x0, y0, width, height, chromaPartWidth, + chromaPartHeight + 1, chromaPartWidth); + + pRef = block; + x0 = 0; + y0 = 0; + width = chromaPartWidth; + height = chromaPartHeight+1; + } + + val = 8 - yFrac; + + for (comp = 0; comp <= 1; comp++) + { + + ptrA = pRef + (comp * height + (u32)y0) * width + x0; + cbr = predPartChroma + comp * 8 * 8; + + /* 2x2 pels per iteration + * bilinear vertical interpolation */ + for (y = (chromaPartHeight >> 1); y; y--) + { + for (x = (chromaPartWidth >> 1); x; x--) + { + tmp3 = ptrA[width*2]; + tmp2 = ptrA[width]; + tmp1 = *ptrA++; + c = ((val * tmp2 + yFrac * tmp3) << 3) + 32; + c >>= 6; + cbr[8] = (u8)c; + c = ((val * tmp1 + yFrac * tmp2) << 3) + 32; + c >>= 6; + *cbr++ = (u8)c; + tmp3 = ptrA[width*2]; + tmp2 = ptrA[width]; + tmp1 = *ptrA++; + c = ((val * tmp2 + yFrac * tmp3) << 3) + 32; + c >>= 6; + cbr[8] = (u8)c; + c = ((val * tmp1 + yFrac * tmp2) << 3) + 32; + c >>= 6; + *cbr++ = (u8)c; + } + cbr += 2*8 - chromaPartWidth; + ptrA += 2*width - chromaPartWidth; + } + } + +} +#endif +/*------------------------------------------------------------------------------ + + Function: h264bsdInterpolateChromaHorVer + + Functional description: + This function performs chroma interpolation in horizontal and + vertical direction. Overfilling is done only if needed. Reference + image (ref) is read at correct position and the predicted part + is written to macroblock's chrominance (predPartChroma) + +------------------------------------------------------------------------------*/ + +void h264bsdInterpolateChromaHorVer( + u8 *ref, + u8 *predPartChroma, + i32 x0, + i32 y0, + u32 width, + u32 height, + u32 xFrac, + u32 yFrac, + u32 chromaPartWidth, + u32 chromaPartHeight) +{ + u8 block[9*9*2]; + u32 x, y, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, valX, valY, plus32 = 32; + u32 comp; + u8 *ptrA, *cbr; + +/* Code */ + + ASSERT(predPartChroma); + ASSERT(chromaPartWidth); + ASSERT(chromaPartHeight); + ASSERT(xFrac < 8); + ASSERT(yFrac < 8); + ASSERT(ref); + + if ((x0 < 0) || ((u32)x0+chromaPartWidth+1 > width) || + (y0 < 0) || ((u32)y0+chromaPartHeight+1 > height)) + { + h264bsdFillBlock(ref, block, x0, y0, width, height, + chromaPartWidth + 1, chromaPartHeight + 1, chromaPartWidth + 1); + ref += width * height; + h264bsdFillBlock(ref, block + (chromaPartWidth+1)*(chromaPartHeight+1), + x0, y0, width, height, chromaPartWidth + 1, + chromaPartHeight + 1, chromaPartWidth + 1); + + ref = block; + x0 = 0; + y0 = 0; + width = chromaPartWidth+1; + height = chromaPartHeight+1; + } + + valX = 8 - xFrac; + valY = 8 - yFrac; + + for (comp = 0; comp <= 1; comp++) + { + + ptrA = ref + (comp * height + (u32)y0) * width + x0; + cbr = predPartChroma + comp * 8 * 8; + + /* 2x2 pels per iteration + * bilinear vertical and horizontal interpolation */ + for (y = (chromaPartHeight >> 1); y; y--) + { + tmp1 = *ptrA; + tmp3 = ptrA[width]; + tmp5 = ptrA[width*2]; + tmp1 *= valY; + tmp1 += tmp3 * yFrac; + tmp3 *= valY; + tmp3 += tmp5 * yFrac; + for (x = (chromaPartWidth >> 1); x; x--) + { + tmp2 = *++ptrA; + tmp4 = ptrA[width]; + tmp6 = ptrA[width*2]; + tmp2 *= valY; + tmp2 += tmp4 * yFrac; + tmp4 *= valY; + tmp4 += tmp6 * yFrac; + tmp1 = tmp1 * valX + plus32; + tmp3 = tmp3 * valX + plus32; + tmp1 += tmp2 * xFrac; + tmp1 >>= 6; + tmp3 += tmp4 * xFrac; + tmp3 >>= 6; + cbr[8] = (u8)tmp3; + *cbr++ = (u8)tmp1; + + tmp1 = *++ptrA; + tmp3 = ptrA[width]; + tmp5 = ptrA[width*2]; + tmp1 *= valY; + tmp1 += tmp3 * yFrac; + tmp3 *= valY; + tmp3 += tmp5 * yFrac; + tmp2 = tmp2 * valX + plus32; + tmp4 = tmp4 * valX + plus32; + tmp2 += tmp1 * xFrac; + tmp2 >>= 6; + tmp4 += tmp3 * xFrac; + tmp4 >>= 6; + cbr[8] = (u8)tmp4; + *cbr++ = (u8)tmp2; + } + cbr += 2*8 - chromaPartWidth; + ptrA += 2*width - chromaPartWidth; + } + } + +} + +/*------------------------------------------------------------------------------ + + Function: PredictChroma + + Functional description: + Top level chroma prediction function that calls the appropriate + interpolation function. The output is written to macroblock array. + +------------------------------------------------------------------------------*/ + +static void PredictChroma( + u8 *mbPartChroma, + u32 xAL, + u32 yAL, + u32 partWidth, + u32 partHeight, + mv_t *mv, + image_t *refPic) +{ + +/* Variables */ + + u32 xFrac, yFrac, width, height, chromaPartWidth, chromaPartHeight; + i32 xInt, yInt; + u8 *ref; + +/* Code */ + + ASSERT(mv); + ASSERT(refPic); + ASSERT(refPic->data); + ASSERT(refPic->width); + ASSERT(refPic->height); + + width = 8 * refPic->width; + height = 8 * refPic->height; + + xInt = (xAL >> 1) + (mv->hor >> 3); + yInt = (yAL >> 1) + (mv->ver >> 3); + xFrac = mv->hor & 0x7; + yFrac = mv->ver & 0x7; + + chromaPartWidth = partWidth >> 1; + chromaPartHeight = partHeight >> 1; + ref = refPic->data + 256 * refPic->width * refPic->height; + + if (xFrac && yFrac) + { + h264bsdInterpolateChromaHorVer(ref, mbPartChroma, xInt, yInt, width, + height, xFrac, yFrac, chromaPartWidth, chromaPartHeight); + } + else if (xFrac) + { + h264bsdInterpolateChromaHor(ref, mbPartChroma, xInt, yInt, width, + height, xFrac, chromaPartWidth, chromaPartHeight); + } + else if (yFrac) + { + h264bsdInterpolateChromaVer(ref, mbPartChroma, xInt, yInt, width, + height, yFrac, chromaPartWidth, chromaPartHeight); + } + else + { + h264bsdFillBlock(ref, mbPartChroma, xInt, yInt, width, height, + chromaPartWidth, chromaPartHeight, 8); + ref += width * height; + h264bsdFillBlock(ref, mbPartChroma + 8*8, xInt, yInt, width, height, + chromaPartWidth, chromaPartHeight, 8); + } + +} + + +/*------------------------------------------------------------------------------ + + Function: h264bsdInterpolateVerHalf + + Functional description: + Function to perform vertical interpolation of pixel position 'h' + for a block. Overfilling is done only if needed. Reference + image (ref) is read at correct position and the predicted part + is written to macroblock array (mb) + +------------------------------------------------------------------------------*/ +#ifndef H264DEC_ARM11 +void h264bsdInterpolateVerHalf( + u8 *ref, + u8 *mb, + i32 x0, + i32 y0, + u32 width, + u32 height, + u32 partWidth, + u32 partHeight) +{ + u32 p1[21*21/4+1]; + u32 i, j; + i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + u8 *ptrC, *ptrV; + const u8 *clp = h264bsdClip + 512; + + /* Code */ + + ASSERT(ref); + ASSERT(mb); + + if ((x0 < 0) || ((u32)x0+partWidth > width) || + (y0 < 0) || ((u32)y0+partHeight+5 > height)) + { + h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height, + partWidth, partHeight+5, partWidth); + + x0 = 0; + y0 = 0; + ref = (u8*)p1; + width = partWidth; + } + + ref += (u32)y0 * width + (u32)x0; + + ptrC = ref + width; + ptrV = ptrC + 5*width; + + /* 4 pixels per iteration, interpolate using 5 vertical samples */ + for (i = (partHeight >> 2); i; i--) + { + /* h1 = (16 + A + 16(G+M) + 4(G+M) - 4(C+R) - (C+R) + T) >> 5 */ + for (j = partWidth; j; j--) + { + tmp4 = ptrV[-(i32)width*2]; + tmp5 = ptrV[-(i32)width]; + tmp1 = ptrV[width]; + tmp2 = ptrV[width*2]; + tmp6 = *ptrV++; + + tmp7 = tmp4 + tmp1; + tmp2 -= (tmp7 << 2); + tmp2 -= tmp7; + tmp2 += 16; + tmp7 = tmp5 + tmp6; + tmp3 = ptrC[width*2]; + tmp2 += (tmp7 << 4); + tmp2 += (tmp7 << 2); + tmp2 += tmp3; + tmp2 = clp[tmp2>>5]; + tmp1 += 16; + mb[48] = (u8)tmp2; + + tmp7 = tmp3 + tmp6; + tmp1 -= (tmp7 << 2); + tmp1 -= tmp7; + tmp7 = tmp4 + tmp5; + tmp2 = ptrC[width]; + tmp1 += (tmp7 << 4); + tmp1 += (tmp7 << 2); + tmp1 += tmp2; + tmp1 = clp[tmp1>>5]; + tmp6 += 16; + mb[32] = (u8)tmp1; + + tmp7 = tmp2 + tmp5; + tmp6 -= (tmp7 << 2); + tmp6 -= tmp7; + tmp7 = tmp4 + tmp3; + tmp1 = *ptrC; + tmp6 += (tmp7 << 4); + tmp6 += (tmp7 << 2); + tmp6 += tmp1; + tmp6 = clp[tmp6>>5]; + tmp5 += 16; + mb[16] = (u8)tmp6; + + tmp1 += tmp4; + tmp5 -= (tmp1 << 2); + tmp5 -= tmp1; + tmp3 += tmp2; + tmp6 = ptrC[-(i32)width]; + tmp5 += (tmp3 << 4); + tmp5 += (tmp3 << 2); + tmp5 += tmp6; + tmp5 = clp[tmp5>>5]; + *mb++ = (u8)tmp5; + ptrC++; + } + ptrC += 4*width - partWidth; + ptrV += 4*width - partWidth; + mb += 4*16 - partWidth; + } + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdInterpolateVerQuarter + + Functional description: + Function to perform vertical interpolation of pixel position 'd' + or 'n' for a block. Overfilling is done only if needed. Reference + image (ref) is read at correct position and the predicted part + is written to macroblock array (mb) + +------------------------------------------------------------------------------*/ + +void h264bsdInterpolateVerQuarter( + u8 *ref, + u8 *mb, + i32 x0, + i32 y0, + u32 width, + u32 height, + u32 partWidth, + u32 partHeight, + u32 verOffset) /* 0 for pixel d, 1 for pixel n */ +{ + u32 p1[21*21/4+1]; + u32 i, j; + i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + u8 *ptrC, *ptrV, *ptrInt; + const u8 *clp = h264bsdClip + 512; + + /* Code */ + + ASSERT(ref); + ASSERT(mb); + + if ((x0 < 0) || ((u32)x0+partWidth > width) || + (y0 < 0) || ((u32)y0+partHeight+5 > height)) + { + h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height, + partWidth, partHeight+5, partWidth); + + x0 = 0; + y0 = 0; + ref = (u8*)p1; + width = partWidth; + } + + ref += (u32)y0 * width + (u32)x0; + + ptrC = ref + width; + ptrV = ptrC + 5*width; + + /* Pointer to integer sample position, either M or R */ + ptrInt = ptrC + (2+verOffset)*width; + + /* 4 pixels per iteration + * interpolate using 5 vertical samples and average between + * interpolated value and integer sample value */ + for (i = (partHeight >> 2); i; i--) + { + /* h1 = (16 + A + 16(G+M) + 4(G+M) - 4(C+R) - (C+R) + T) >> 5 */ + for (j = partWidth; j; j--) + { + tmp4 = ptrV[-(i32)width*2]; + tmp5 = ptrV[-(i32)width]; + tmp1 = ptrV[width]; + tmp2 = ptrV[width*2]; + tmp6 = *ptrV++; + + tmp7 = tmp4 + tmp1; + tmp2 -= (tmp7 << 2); + tmp2 -= tmp7; + tmp2 += 16; + tmp7 = tmp5 + tmp6; + tmp3 = ptrC[width*2]; + tmp2 += (tmp7 << 4); + tmp2 += (tmp7 << 2); + tmp2 += tmp3; + tmp2 = clp[tmp2>>5]; + tmp7 = ptrInt[width*2]; + tmp1 += 16; + tmp2++; + mb[48] = (u8)((tmp2 + tmp7) >> 1); + + tmp7 = tmp3 + tmp6; + tmp1 -= (tmp7 << 2); + tmp1 -= tmp7; + tmp7 = tmp4 + tmp5; + tmp2 = ptrC[width]; + tmp1 += (tmp7 << 4); + tmp1 += (tmp7 << 2); + tmp1 += tmp2; + tmp1 = clp[tmp1>>5]; + tmp7 = ptrInt[width]; + tmp6 += 16; + tmp1++; + mb[32] = (u8)((tmp1 + tmp7) >> 1); + + tmp7 = tmp2 + tmp5; + tmp6 -= (tmp7 << 2); + tmp6 -= tmp7; + tmp7 = tmp4 + tmp3; + tmp1 = *ptrC; + tmp6 += (tmp7 << 4); + tmp6 += (tmp7 << 2); + tmp6 += tmp1; + tmp6 = clp[tmp6>>5]; + tmp7 = *ptrInt; + tmp5 += 16; + tmp6++; + mb[16] = (u8)((tmp6 + tmp7) >> 1); + + tmp1 += tmp4; + tmp5 -= (tmp1 << 2); + tmp5 -= tmp1; + tmp3 += tmp2; + tmp6 = ptrC[-(i32)width]; + tmp5 += (tmp3 << 4); + tmp5 += (tmp3 << 2); + tmp5 += tmp6; + tmp5 = clp[tmp5>>5]; + tmp7 = ptrInt[-(i32)width]; + tmp5++; + *mb++ = (u8)((tmp5 + tmp7) >> 1); + ptrC++; + ptrInt++; + } + ptrC += 4*width - partWidth; + ptrV += 4*width - partWidth; + ptrInt += 4*width - partWidth; + mb += 4*16 - partWidth; + } + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdInterpolateHorHalf + + Functional description: + Function to perform horizontal interpolation of pixel position 'b' + for a block. Overfilling is done only if needed. Reference + image (ref) is read at correct position and the predicted part + is written to macroblock array (mb) + +------------------------------------------------------------------------------*/ + +void h264bsdInterpolateHorHalf( + u8 *ref, + u8 *mb, + i32 x0, + i32 y0, + u32 width, + u32 height, + u32 partWidth, + u32 partHeight) +{ + u32 p1[21*21/4+1]; + u8 *ptrJ; + u32 x, y; + i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + const u8 *clp = h264bsdClip + 512; + + /* Code */ + + ASSERT(ref); + ASSERT(mb); + ASSERT((partWidth&0x3) == 0); + ASSERT((partHeight&0x3) == 0); + + if ((x0 < 0) || ((u32)x0+partWidth+5 > width) || + (y0 < 0) || ((u32)y0+partHeight > height)) + { + h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height, + partWidth+5, partHeight, partWidth+5); + + x0 = 0; + y0 = 0; + ref = (u8*)p1; + width = partWidth + 5; + } + + ref += (u32)y0 * width + (u32)x0; + + ptrJ = ref + 5; + + for (y = partHeight; y; y--) + { + tmp6 = *(ptrJ - 5); + tmp5 = *(ptrJ - 4); + tmp4 = *(ptrJ - 3); + tmp3 = *(ptrJ - 2); + tmp2 = *(ptrJ - 1); + + /* calculate 4 pels per iteration */ + for (x = (partWidth >> 2); x; x--) + { + /* First pixel */ + tmp6 += 16; + tmp7 = tmp3 + tmp4; + tmp6 += (tmp7 << 4); + tmp6 += (tmp7 << 2); + tmp7 = tmp2 + tmp5; + tmp1 = *ptrJ++; + tmp6 -= (tmp7 << 2); + tmp6 -= tmp7; + tmp6 += tmp1; + tmp6 = clp[tmp6>>5]; + /* Second pixel */ + tmp5 += 16; + tmp7 = tmp2 + tmp3; + *mb++ = (u8)tmp6; + tmp5 += (tmp7 << 4); + tmp5 += (tmp7 << 2); + tmp7 = tmp1 + tmp4; + tmp6 = *ptrJ++; + tmp5 -= (tmp7 << 2); + tmp5 -= tmp7; + tmp5 += tmp6; + tmp5 = clp[tmp5>>5]; + /* Third pixel */ + tmp4 += 16; + tmp7 = tmp1 + tmp2; + *mb++ = (u8)tmp5; + tmp4 += (tmp7 << 4); + tmp4 += (tmp7 << 2); + tmp7 = tmp6 + tmp3; + tmp5 = *ptrJ++; + tmp4 -= (tmp7 << 2); + tmp4 -= tmp7; + tmp4 += tmp5; + tmp4 = clp[tmp4>>5]; + /* Fourth pixel */ + tmp3 += 16; + tmp7 = tmp6 + tmp1; + *mb++ = (u8)tmp4; + tmp3 += (tmp7 << 4); + tmp3 += (tmp7 << 2); + tmp7 = tmp5 + tmp2; + tmp4 = *ptrJ++; + tmp3 -= (tmp7 << 2); + tmp3 -= tmp7; + tmp3 += tmp4; + tmp3 = clp[tmp3>>5]; + tmp7 = tmp4; + tmp4 = tmp6; + tmp6 = tmp2; + tmp2 = tmp7; + *mb++ = (u8)tmp3; + tmp3 = tmp5; + tmp5 = tmp1; + } + ptrJ += width - partWidth; + mb += 16 - partWidth; + } + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdInterpolateHorQuarter + + Functional description: + Function to perform horizontal interpolation of pixel position 'a' + or 'c' for a block. Overfilling is done only if needed. Reference + image (ref) is read at correct position and the predicted part + is written to macroblock array (mb) + +------------------------------------------------------------------------------*/ + +void h264bsdInterpolateHorQuarter( + u8 *ref, + u8 *mb, + i32 x0, + i32 y0, + u32 width, + u32 height, + u32 partWidth, + u32 partHeight, + u32 horOffset) /* 0 for pixel a, 1 for pixel c */ +{ + u32 p1[21*21/4+1]; + u8 *ptrJ; + u32 x, y; + i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + const u8 *clp = h264bsdClip + 512; + + /* Code */ + + ASSERT(ref); + ASSERT(mb); + + if ((x0 < 0) || ((u32)x0+partWidth+5 > width) || + (y0 < 0) || ((u32)y0+partHeight > height)) + { + h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height, + partWidth+5, partHeight, partWidth+5); + + x0 = 0; + y0 = 0; + ref = (u8*)p1; + width = partWidth + 5; + } + + ref += (u32)y0 * width + (u32)x0; + + ptrJ = ref + 5; + + for (y = partHeight; y; y--) + { + tmp6 = *(ptrJ - 5); + tmp5 = *(ptrJ - 4); + tmp4 = *(ptrJ - 3); + tmp3 = *(ptrJ - 2); + tmp2 = *(ptrJ - 1); + + /* calculate 4 pels per iteration */ + for (x = (partWidth >> 2); x; x--) + { + /* First pixel */ + tmp6 += 16; + tmp7 = tmp3 + tmp4; + tmp6 += (tmp7 << 4); + tmp6 += (tmp7 << 2); + tmp7 = tmp2 + tmp5; + tmp1 = *ptrJ++; + tmp6 -= (tmp7 << 2); + tmp6 -= tmp7; + tmp6 += tmp1; + tmp6 = clp[tmp6>>5]; + tmp5 += 16; + if (!horOffset) + tmp6 += tmp4; + else + tmp6 += tmp3; + *mb++ = (u8)((tmp6 + 1) >> 1); + /* Second pixel */ + tmp7 = tmp2 + tmp3; + tmp5 += (tmp7 << 4); + tmp5 += (tmp7 << 2); + tmp7 = tmp1 + tmp4; + tmp6 = *ptrJ++; + tmp5 -= (tmp7 << 2); + tmp5 -= tmp7; + tmp5 += tmp6; + tmp5 = clp[tmp5>>5]; + tmp4 += 16; + if (!horOffset) + tmp5 += tmp3; + else + tmp5 += tmp2; + *mb++ = (u8)((tmp5 + 1) >> 1); + /* Third pixel */ + tmp7 = tmp1 + tmp2; + tmp4 += (tmp7 << 4); + tmp4 += (tmp7 << 2); + tmp7 = tmp6 + tmp3; + tmp5 = *ptrJ++; + tmp4 -= (tmp7 << 2); + tmp4 -= tmp7; + tmp4 += tmp5; + tmp4 = clp[tmp4>>5]; + tmp3 += 16; + if (!horOffset) + tmp4 += tmp2; + else + tmp4 += tmp1; + *mb++ = (u8)((tmp4 + 1) >> 1); + /* Fourth pixel */ + tmp7 = tmp6 + tmp1; + tmp3 += (tmp7 << 4); + tmp3 += (tmp7 << 2); + tmp7 = tmp5 + tmp2; + tmp4 = *ptrJ++; + tmp3 -= (tmp7 << 2); + tmp3 -= tmp7; + tmp3 += tmp4; + tmp3 = clp[tmp3>>5]; + if (!horOffset) + tmp3 += tmp1; + else + tmp3 += tmp6; + *mb++ = (u8)((tmp3 + 1) >> 1); + tmp3 = tmp5; + tmp5 = tmp1; + tmp7 = tmp4; + tmp4 = tmp6; + tmp6 = tmp2; + tmp2 = tmp7; + } + ptrJ += width - partWidth; + mb += 16 - partWidth; + } + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdInterpolateHorVerQuarter + + Functional description: + Function to perform horizontal and vertical interpolation of pixel + position 'e', 'g', 'p' or 'r' for a block. Overfilling is done only + if needed. Reference image (ref) is read at correct position and + the predicted part is written to macroblock array (mb) + +------------------------------------------------------------------------------*/ + +void h264bsdInterpolateHorVerQuarter( + u8 *ref, + u8 *mb, + i32 x0, + i32 y0, + u32 width, + u32 height, + u32 partWidth, + u32 partHeight, + u32 horVerOffset) /* 0 for pixel e, 1 for pixel g, + 2 for pixel p, 3 for pixel r */ +{ + u32 p1[21*21/4+1]; + u8 *ptrC, *ptrJ, *ptrV; + u32 x, y; + i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + const u8 *clp = h264bsdClip + 512; + + /* Code */ + + ASSERT(ref); + ASSERT(mb); + + if ((x0 < 0) || ((u32)x0+partWidth+5 > width) || + (y0 < 0) || ((u32)y0+partHeight+5 > height)) + { + h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height, + partWidth+5, partHeight+5, partWidth+5); + + x0 = 0; + y0 = 0; + ref = (u8*)p1; + width = partWidth+5; + } + + /* Ref points to G + (-2, -2) */ + ref += (u32)y0 * width + (u32)x0; + + /* ptrJ points to either J or Q, depending on vertical offset */ + ptrJ = ref + (((horVerOffset & 0x2) >> 1) + 2) * width + 5; + + /* ptrC points to either C or D, depending on horizontal offset */ + ptrC = ref + width + 2 + (horVerOffset & 0x1); + + for (y = partHeight; y; y--) + { + tmp6 = *(ptrJ - 5); + tmp5 = *(ptrJ - 4); + tmp4 = *(ptrJ - 3); + tmp3 = *(ptrJ - 2); + tmp2 = *(ptrJ - 1); + + /* Horizontal interpolation, calculate 4 pels per iteration */ + for (x = (partWidth >> 2); x; x--) + { + /* First pixel */ + tmp6 += 16; + tmp7 = tmp3 + tmp4; + tmp6 += (tmp7 << 4); + tmp6 += (tmp7 << 2); + tmp7 = tmp2 + tmp5; + tmp1 = *ptrJ++; + tmp6 -= (tmp7 << 2); + tmp6 -= tmp7; + tmp6 += tmp1; + tmp6 = clp[tmp6>>5]; + /* Second pixel */ + tmp5 += 16; + tmp7 = tmp2 + tmp3; + *mb++ = (u8)tmp6; + tmp5 += (tmp7 << 4); + tmp5 += (tmp7 << 2); + tmp7 = tmp1 + tmp4; + tmp6 = *ptrJ++; + tmp5 -= (tmp7 << 2); + tmp5 -= tmp7; + tmp5 += tmp6; + tmp5 = clp[tmp5>>5]; + /* Third pixel */ + tmp4 += 16; + tmp7 = tmp1 + tmp2; + *mb++ = (u8)tmp5; + tmp4 += (tmp7 << 4); + tmp4 += (tmp7 << 2); + tmp7 = tmp6 + tmp3; + tmp5 = *ptrJ++; + tmp4 -= (tmp7 << 2); + tmp4 -= tmp7; + tmp4 += tmp5; + tmp4 = clp[tmp4>>5]; + /* Fourth pixel */ + tmp3 += 16; + tmp7 = tmp6 + tmp1; + *mb++ = (u8)tmp4; + tmp3 += (tmp7 << 4); + tmp3 += (tmp7 << 2); + tmp7 = tmp5 + tmp2; + tmp4 = *ptrJ++; + tmp3 -= (tmp7 << 2); + tmp3 -= tmp7; + tmp3 += tmp4; + tmp3 = clp[tmp3>>5]; + tmp7 = tmp4; + tmp4 = tmp6; + tmp6 = tmp2; + tmp2 = tmp7; + *mb++ = (u8)tmp3; + tmp3 = tmp5; + tmp5 = tmp1; + } + ptrJ += width - partWidth; + mb += 16 - partWidth; + } + + mb -= 16*partHeight; + ptrV = ptrC + 5*width; + + for (y = (partHeight >> 2); y; y--) + { + /* Vertical interpolation and averaging, 4 pels per iteration */ + for (x = partWidth; x; x--) + { + tmp4 = ptrV[-(i32)width*2]; + tmp5 = ptrV[-(i32)width]; + tmp1 = ptrV[width]; + tmp2 = ptrV[width*2]; + tmp6 = *ptrV++; + + tmp7 = tmp4 + tmp1; + tmp2 -= (tmp7 << 2); + tmp2 -= tmp7; + tmp2 += 16; + tmp7 = tmp5 + tmp6; + tmp3 = ptrC[width*2]; + tmp2 += (tmp7 << 4); + tmp2 += (tmp7 << 2); + tmp2 += tmp3; + tmp7 = clp[tmp2>>5]; + tmp2 = mb[48]; + tmp1 += 16; + tmp7++; + mb[48] = (u8)((tmp2 + tmp7) >> 1); + + tmp7 = tmp3 + tmp6; + tmp1 -= (tmp7 << 2); + tmp1 -= tmp7; + tmp7 = tmp4 + tmp5; + tmp2 = ptrC[width]; + tmp1 += (tmp7 << 4); + tmp1 += (tmp7 << 2); + tmp1 += tmp2; + tmp7 = clp[tmp1>>5]; + tmp1 = mb[32]; + tmp6 += 16; + tmp7++; + mb[32] = (u8)((tmp1 + tmp7) >> 1); + + tmp1 = *ptrC; + tmp7 = tmp2 + tmp5; + tmp6 -= (tmp7 << 2); + tmp6 -= tmp7; + tmp7 = tmp4 + tmp3; + tmp6 += (tmp7 << 4); + tmp6 += (tmp7 << 2); + tmp6 += tmp1; + tmp7 = clp[tmp6>>5]; + tmp6 = mb[16]; + tmp5 += 16; + tmp7++; + mb[16] = (u8)((tmp6 + tmp7) >> 1); + + tmp6 = ptrC[-(i32)width]; + tmp1 += tmp4; + tmp5 -= (tmp1 << 2); + tmp5 -= tmp1; + tmp3 += tmp2; + tmp5 += (tmp3 << 4); + tmp5 += (tmp3 << 2); + tmp5 += tmp6; + tmp7 = clp[tmp5>>5]; + tmp5 = *mb; + tmp7++; + *mb++ = (u8)((tmp5 + tmp7) >> 1); + ptrC++; + + } + ptrC += 4*width - partWidth; + ptrV += 4*width - partWidth; + mb += 4*16 - partWidth; + } + +} +#endif + +/*------------------------------------------------------------------------------ + + Function: h264bsdInterpolateMidHalf + + Functional description: + Function to perform horizontal and vertical interpolation of pixel + position 'j' for a block. Overfilling is done only if needed. + Reference image (ref) is read at correct position and the predicted + part is written to macroblock array (mb) + +------------------------------------------------------------------------------*/ + +void h264bsdInterpolateMidHalf( + u8 *ref, + u8 *mb, + i32 x0, + i32 y0, + u32 width, + u32 height, + u32 partWidth, + u32 partHeight) +{ + u32 p1[21*21/4+1]; + u32 x, y; + i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + i32 *ptrC, *ptrV, *b1; + u8 *ptrJ; + i32 table[21*16]; + const u8 *clp = h264bsdClip + 512; + + /* Code */ + + ASSERT(ref); + ASSERT(mb); + + if ((x0 < 0) || ((u32)x0+partWidth+5 > width) || + (y0 < 0) || ((u32)y0+partHeight+5 > height)) + { + h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height, + partWidth+5, partHeight+5, partWidth+5); + + x0 = 0; + y0 = 0; + ref = (u8*)p1; + width = partWidth+5; + } + + ref += (u32)y0 * width + (u32)x0; + + b1 = table; + ptrJ = ref + 5; + + /* First step: calculate intermediate values for + * horizontal interpolation */ + for (y = partHeight + 5; y; y--) + { + tmp6 = *(ptrJ - 5); + tmp5 = *(ptrJ - 4); + tmp4 = *(ptrJ - 3); + tmp3 = *(ptrJ - 2); + tmp2 = *(ptrJ - 1); + + /* 4 pels per iteration */ + for (x = (partWidth >> 2); x; x--) + { + /* First pixel */ + tmp7 = tmp3 + tmp4; + tmp6 += (tmp7 << 4); + tmp6 += (tmp7 << 2); + tmp7 = tmp2 + tmp5; + tmp1 = *ptrJ++; + tmp6 -= (tmp7 << 2); + tmp6 -= tmp7; + tmp6 += tmp1; + *b1++ = tmp6; + /* Second pixel */ + tmp7 = tmp2 + tmp3; + tmp5 += (tmp7 << 4); + tmp5 += (tmp7 << 2); + tmp7 = tmp1 + tmp4; + tmp6 = *ptrJ++; + tmp5 -= (tmp7 << 2); + tmp5 -= tmp7; + tmp5 += tmp6; + *b1++ = tmp5; + /* Third pixel */ + tmp7 = tmp1 + tmp2; + tmp4 += (tmp7 << 4); + tmp4 += (tmp7 << 2); + tmp7 = tmp6 + tmp3; + tmp5 = *ptrJ++; + tmp4 -= (tmp7 << 2); + tmp4 -= tmp7; + tmp4 += tmp5; + *b1++ = tmp4; + /* Fourth pixel */ + tmp7 = tmp6 + tmp1; + tmp3 += (tmp7 << 4); + tmp3 += (tmp7 << 2); + tmp7 = tmp5 + tmp2; + tmp4 = *ptrJ++; + tmp3 -= (tmp7 << 2); + tmp3 -= tmp7; + tmp3 += tmp4; + *b1++ = tmp3; + tmp7 = tmp4; + tmp4 = tmp6; + tmp6 = tmp2; + tmp2 = tmp7; + tmp3 = tmp5; + tmp5 = tmp1; + } + ptrJ += width - partWidth; + } + + /* Second step: calculate vertical interpolation */ + ptrC = table + partWidth; + ptrV = ptrC + 5*partWidth; + for (y = (partHeight >> 2); y; y--) + { + /* 4 pels per iteration */ + for (x = partWidth; x; x--) + { + tmp4 = ptrV[-(i32)partWidth*2]; + tmp5 = ptrV[-(i32)partWidth]; + tmp1 = ptrV[partWidth]; + tmp2 = ptrV[partWidth*2]; + tmp6 = *ptrV++; + + tmp7 = tmp4 + tmp1; + tmp2 -= (tmp7 << 2); + tmp2 -= tmp7; + tmp2 += 512; + tmp7 = tmp5 + tmp6; + tmp3 = ptrC[partWidth*2]; + tmp2 += (tmp7 << 4); + tmp2 += (tmp7 << 2); + tmp2 += tmp3; + tmp7 = clp[tmp2>>10]; + tmp1 += 512; + mb[48] = (u8)tmp7; + + tmp7 = tmp3 + tmp6; + tmp1 -= (tmp7 << 2); + tmp1 -= tmp7; + tmp7 = tmp4 + tmp5; + tmp2 = ptrC[partWidth]; + tmp1 += (tmp7 << 4); + tmp1 += (tmp7 << 2); + tmp1 += tmp2; + tmp7 = clp[tmp1>>10]; + tmp6 += 512; + mb[32] = (u8)tmp7; + + tmp1 = *ptrC; + tmp7 = tmp2 + tmp5; + tmp6 -= (tmp7 << 2); + tmp6 -= tmp7; + tmp7 = tmp4 + tmp3; + tmp6 += (tmp7 << 4); + tmp6 += (tmp7 << 2); + tmp6 += tmp1; + tmp7 = clp[tmp6>>10]; + tmp5 += 512; + mb[16] = (u8)tmp7; + + tmp6 = ptrC[-(i32)partWidth]; + tmp1 += tmp4; + tmp5 -= (tmp1 << 2); + tmp5 -= tmp1; + tmp3 += tmp2; + tmp5 += (tmp3 << 4); + tmp5 += (tmp3 << 2); + tmp5 += tmp6; + tmp7 = clp[tmp5>>10]; + *mb++ = (u8)tmp7; + ptrC++; + } + mb += 4*16 - partWidth; + ptrC += 3*partWidth; + ptrV += 3*partWidth; + } + +} + + +/*------------------------------------------------------------------------------ + + Function: h264bsdInterpolateMidVerQuarter + + Functional description: + Function to perform horizontal and vertical interpolation of pixel + position 'f' or 'q' for a block. Overfilling is done only if needed. + Reference image (ref) is read at correct position and the predicted + part is written to macroblock array (mb) + +------------------------------------------------------------------------------*/ + +void h264bsdInterpolateMidVerQuarter( + u8 *ref, + u8 *mb, + i32 x0, + i32 y0, + u32 width, + u32 height, + u32 partWidth, + u32 partHeight, + u32 verOffset) /* 0 for pixel f, 1 for pixel q */ +{ + u32 p1[21*21/4+1]; + u32 x, y; + i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + i32 *ptrC, *ptrV, *ptrInt, *b1; + u8 *ptrJ; + i32 table[21*16]; + const u8 *clp = h264bsdClip + 512; + + /* Code */ + + ASSERT(ref); + ASSERT(mb); + + if ((x0 < 0) || ((u32)x0+partWidth+5 > width) || + (y0 < 0) || ((u32)y0+partHeight+5 > height)) + { + h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height, + partWidth+5, partHeight+5, partWidth+5); + + x0 = 0; + y0 = 0; + ref = (u8*)p1; + width = partWidth+5; + } + + ref += (u32)y0 * width + (u32)x0; + + b1 = table; + ptrJ = ref + 5; + + /* First step: calculate intermediate values for + * horizontal interpolation */ + for (y = partHeight + 5; y; y--) + { + tmp6 = *(ptrJ - 5); + tmp5 = *(ptrJ - 4); + tmp4 = *(ptrJ - 3); + tmp3 = *(ptrJ - 2); + tmp2 = *(ptrJ - 1); + for (x = (partWidth >> 2); x; x--) + { + /* First pixel */ + tmp7 = tmp3 + tmp4; + tmp6 += (tmp7 << 4); + tmp6 += (tmp7 << 2); + tmp7 = tmp2 + tmp5; + tmp1 = *ptrJ++; + tmp6 -= (tmp7 << 2); + tmp6 -= tmp7; + tmp6 += tmp1; + *b1++ = tmp6; + /* Second pixel */ + tmp7 = tmp2 + tmp3; + tmp5 += (tmp7 << 4); + tmp5 += (tmp7 << 2); + tmp7 = tmp1 + tmp4; + tmp6 = *ptrJ++; + tmp5 -= (tmp7 << 2); + tmp5 -= tmp7; + tmp5 += tmp6; + *b1++ = tmp5; + /* Third pixel */ + tmp7 = tmp1 + tmp2; + tmp4 += (tmp7 << 4); + tmp4 += (tmp7 << 2); + tmp7 = tmp6 + tmp3; + tmp5 = *ptrJ++; + tmp4 -= (tmp7 << 2); + tmp4 -= tmp7; + tmp4 += tmp5; + *b1++ = tmp4; + /* Fourth pixel */ + tmp7 = tmp6 + tmp1; + tmp3 += (tmp7 << 4); + tmp3 += (tmp7 << 2); + tmp7 = tmp5 + tmp2; + tmp4 = *ptrJ++; + tmp3 -= (tmp7 << 2); + tmp3 -= tmp7; + tmp3 += tmp4; + *b1++ = tmp3; + tmp7 = tmp4; + tmp4 = tmp6; + tmp6 = tmp2; + tmp2 = tmp7; + tmp3 = tmp5; + tmp5 = tmp1; + } + ptrJ += width - partWidth; + } + + /* Second step: calculate vertical interpolation and average */ + ptrC = table + partWidth; + ptrV = ptrC + 5*partWidth; + /* Pointer to integer sample position, either M or R */ + ptrInt = ptrC + (2+verOffset)*partWidth; + for (y = (partHeight >> 2); y; y--) + { + for (x = partWidth; x; x--) + { + tmp4 = ptrV[-(i32)partWidth*2]; + tmp5 = ptrV[-(i32)partWidth]; + tmp1 = ptrV[partWidth]; + tmp2 = ptrV[partWidth*2]; + tmp6 = *ptrV++; + + tmp7 = tmp4 + tmp1; + tmp2 -= (tmp7 << 2); + tmp2 -= tmp7; + tmp2 += 512; + tmp7 = tmp5 + tmp6; + tmp3 = ptrC[partWidth*2]; + tmp2 += (tmp7 << 4); + tmp2 += (tmp7 << 2); + tmp7 = ptrInt[partWidth*2]; + tmp2 += tmp3; + tmp2 = clp[tmp2>>10]; + tmp7 += 16; + tmp7 = clp[tmp7>>5]; + tmp1 += 512; + tmp2++; + mb[48] = (u8)((tmp7 + tmp2) >> 1); + + tmp7 = tmp3 + tmp6; + tmp1 -= (tmp7 << 2); + tmp1 -= tmp7; + tmp7 = tmp4 + tmp5; + tmp2 = ptrC[partWidth]; + tmp1 += (tmp7 << 4); + tmp1 += (tmp7 << 2); + tmp7 = ptrInt[partWidth]; + tmp1 += tmp2; + tmp1 = clp[tmp1>>10]; + tmp7 += 16; + tmp7 = clp[tmp7>>5]; + tmp6 += 512; + tmp1++; + mb[32] = (u8)((tmp7 + tmp1) >> 1); + + tmp1 = *ptrC; + tmp7 = tmp2 + tmp5; + tmp6 -= (tmp7 << 2); + tmp6 -= tmp7; + tmp7 = tmp4 + tmp3; + tmp6 += (tmp7 << 4); + tmp6 += (tmp7 << 2); + tmp7 = *ptrInt; + tmp6 += tmp1; + tmp6 = clp[tmp6>>10]; + tmp7 += 16; + tmp7 = clp[tmp7>>5]; + tmp5 += 512; + tmp6++; + mb[16] = (u8)((tmp7 + tmp6) >> 1); + + tmp6 = ptrC[-(i32)partWidth]; + tmp1 += tmp4; + tmp5 -= (tmp1 << 2); + tmp5 -= tmp1; + tmp3 += tmp2; + tmp5 += (tmp3 << 4); + tmp5 += (tmp3 << 2); + tmp7 = ptrInt[-(i32)partWidth]; + tmp5 += tmp6; + tmp5 = clp[tmp5>>10]; + tmp7 += 16; + tmp7 = clp[tmp7>>5]; + tmp5++; + *mb++ = (u8)((tmp7 + tmp5) >> 1); + ptrC++; + ptrInt++; + } + mb += 4*16 - partWidth; + ptrC += 3*partWidth; + ptrV += 3*partWidth; + ptrInt += 3*partWidth; + } + +} + + +/*------------------------------------------------------------------------------ + + Function: h264bsdInterpolateMidHorQuarter + + Functional description: + Function to perform horizontal and vertical interpolation of pixel + position 'i' or 'k' for a block. Overfilling is done only if needed. + Reference image (ref) is read at correct position and the predicted + part is written to macroblock array (mb) + +------------------------------------------------------------------------------*/ + +void h264bsdInterpolateMidHorQuarter( + u8 *ref, + u8 *mb, + i32 x0, + i32 y0, + u32 width, + u32 height, + u32 partWidth, + u32 partHeight, + u32 horOffset) /* 0 for pixel i, 1 for pixel k */ +{ + u32 p1[21*21/4+1]; + u32 x, y; + i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + i32 *ptrJ, *ptrInt, *h1; + u8 *ptrC, *ptrV; + i32 table[21*16]; + i32 tableWidth = (i32)partWidth+5; + const u8 *clp = h264bsdClip + 512; + + /* Code */ + + ASSERT(ref); + ASSERT(mb); + + if ((x0 < 0) || ((u32)x0+partWidth+5 > width) || + (y0 < 0) || ((u32)y0+partHeight+5 > height)) + { + h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height, + partWidth+5, partHeight+5, partWidth+5); + + x0 = 0; + y0 = 0; + ref = (u8*)p1; + width = partWidth+5; + } + + ref += (u32)y0 * width + (u32)x0; + + h1 = table + tableWidth; + ptrC = ref + width; + ptrV = ptrC + 5*width; + + /* First step: calculate intermediate values for + * vertical interpolation */ + for (y = (partHeight >> 2); y; y--) + { + for (x = (u32)tableWidth; x; x--) + { + tmp4 = ptrV[-(i32)width*2]; + tmp5 = ptrV[-(i32)width]; + tmp1 = ptrV[width]; + tmp2 = ptrV[width*2]; + tmp6 = *ptrV++; + + tmp7 = tmp4 + tmp1; + tmp2 -= (tmp7 << 2); + tmp2 -= tmp7; + tmp7 = tmp5 + tmp6; + tmp3 = ptrC[width*2]; + tmp2 += (tmp7 << 4); + tmp2 += (tmp7 << 2); + tmp2 += tmp3; + h1[tableWidth*2] = tmp2; + + tmp7 = tmp3 + tmp6; + tmp1 -= (tmp7 << 2); + tmp1 -= tmp7; + tmp7 = tmp4 + tmp5; + tmp2 = ptrC[width]; + tmp1 += (tmp7 << 4); + tmp1 += (tmp7 << 2); + tmp1 += tmp2; + h1[tableWidth] = tmp1; + + tmp1 = *ptrC; + tmp7 = tmp2 + tmp5; + tmp6 -= (tmp7 << 2); + tmp6 -= tmp7; + tmp7 = tmp4 + tmp3; + tmp6 += (tmp7 << 4); + tmp6 += (tmp7 << 2); + tmp6 += tmp1; + *h1 = tmp6; + + tmp6 = ptrC[-(i32)width]; + tmp1 += tmp4; + tmp5 -= (tmp1 << 2); + tmp5 -= tmp1; + tmp3 += tmp2; + tmp5 += (tmp3 << 4); + tmp5 += (tmp3 << 2); + tmp5 += tmp6; + h1[-tableWidth] = tmp5; + h1++; + ptrC++; + } + ptrC += 4*width - partWidth - 5; + ptrV += 4*width - partWidth - 5; + h1 += 3*tableWidth; + } + + /* Second step: calculate horizontal interpolation and average */ + ptrJ = table + 5; + /* Pointer to integer sample position, either G or H */ + ptrInt = table + 2 + horOffset; + for (y = partHeight; y; y--) + { + tmp6 = *(ptrJ - 5); + tmp5 = *(ptrJ - 4); + tmp4 = *(ptrJ - 3); + tmp3 = *(ptrJ - 2); + tmp2 = *(ptrJ - 1); + for (x = (partWidth>>2); x; x--) + { + /* First pixel */ + tmp6 += 512; + tmp7 = tmp3 + tmp4; + tmp6 += (tmp7 << 4); + tmp6 += (tmp7 << 2); + tmp7 = tmp2 + tmp5; + tmp1 = *ptrJ++; + tmp6 -= (tmp7 << 2); + tmp6 -= tmp7; + tmp7 = *ptrInt++; + tmp6 += tmp1; + tmp6 = clp[tmp6 >> 10]; + tmp7 += 16; + tmp7 = clp[tmp7 >> 5]; + tmp5 += 512; + tmp6++; + *mb++ = (u8)((tmp6 + tmp7) >> 1); + /* Second pixel */ + tmp7 = tmp2 + tmp3; + tmp5 += (tmp7 << 4); + tmp5 += (tmp7 << 2); + tmp7 = tmp1 + tmp4; + tmp6 = *ptrJ++; + tmp5 -= (tmp7 << 2); + tmp5 -= tmp7; + tmp7 = *ptrInt++; + tmp5 += tmp6; + tmp5 = clp[tmp5 >> 10]; + tmp7 += 16; + tmp7 = clp[tmp7 >> 5]; + tmp4 += 512; + tmp5++; + *mb++ = (u8)((tmp5 + tmp7) >> 1); + /* Third pixel */ + tmp7 = tmp1 + tmp2; + tmp4 += (tmp7 << 4); + tmp4 += (tmp7 << 2); + tmp7 = tmp6 + tmp3; + tmp5 = *ptrJ++; + tmp4 -= (tmp7 << 2); + tmp4 -= tmp7; + tmp7 = *ptrInt++; + tmp4 += tmp5; + tmp4 = clp[tmp4 >> 10]; + tmp7 += 16; + tmp7 = clp[tmp7 >> 5]; + tmp3 += 512; + tmp4++; + *mb++ = (u8)((tmp4 + tmp7) >> 1); + /* Fourth pixel */ + tmp7 = tmp6 + tmp1; + tmp3 += (tmp7 << 4); + tmp3 += (tmp7 << 2); + tmp7 = tmp5 + tmp2; + tmp4 = *ptrJ++; + tmp3 -= (tmp7 << 2); + tmp3 -= tmp7; + tmp7 = *ptrInt++; + tmp3 += tmp4; + tmp3 = clp[tmp3 >> 10]; + tmp7 += 16; + tmp7 = clp[tmp7 >> 5]; + tmp3++; + *mb++ = (u8)((tmp3 + tmp7) >> 1); + tmp3 = tmp5; + tmp5 = tmp1; + tmp7 = tmp4; + tmp4 = tmp6; + tmp6 = tmp2; + tmp2 = tmp7; + } + ptrJ += 5; + ptrInt += 5; + mb += 16 - partWidth; + } + +} + + +/*------------------------------------------------------------------------------ + + Function: h264bsdPredictSamples + + Functional description: + This function reconstructs a prediction for a macroblock partition. + The prediction is either copied or interpolated using the reference + frame and the motion vector. Both luminance and chrominance parts are + predicted. The prediction is stored in given macroblock array (data). + Inputs: + data pointer to macroblock array (384 bytes) for output + mv pointer to motion vector used for prediction + refPic pointer to reference picture structure + xA x-coordinate for current macroblock + yA y-coordinate for current macroblock + partX x-offset for partition in macroblock + partY y-offset for partition in macroblock + partWidth width of partition + partHeight height of partition + Outputs: + data macroblock array (16x16+8x8+8x8) where predicted + partition is stored at correct position + +------------------------------------------------------------------------------*/ + +void h264bsdPredictSamples( + u8 *data, + mv_t *mv, + image_t *refPic, + u32 xA, + u32 yA, + u32 partX, + u32 partY, + u32 partWidth, + u32 partHeight) + +{ + +/* Variables */ + + u32 xFrac, yFrac, width, height; + i32 xInt, yInt; + u8 *lumaPartData; + +/* Code */ + + ASSERT(data); + ASSERT(mv); + ASSERT(partWidth); + ASSERT(partHeight); + ASSERT(refPic); + ASSERT(refPic->data); + ASSERT(refPic->width); + ASSERT(refPic->height); + + /* luma */ + lumaPartData = data + 16*partY + partX; + + xFrac = mv->hor & 0x3; + yFrac = mv->ver & 0x3; + + width = 16 * refPic->width; + height = 16 * refPic->height; + + xInt = (i32)xA + (i32)partX + (mv->hor >> 2); + yInt = (i32)yA + (i32)partY + (mv->ver >> 2); + + ASSERT(lumaFracPos[xFrac][yFrac] < 16); + + switch (lumaFracPos[xFrac][yFrac]) + { + case 0: /* G */ + h264bsdFillBlock(refPic->data, lumaPartData, + xInt,yInt,width,height,partWidth,partHeight,16); + break; + case 1: /* d */ + h264bsdInterpolateVerQuarter(refPic->data, lumaPartData, + xInt, yInt-2, width, height, partWidth, partHeight, 0); + break; + case 2: /* h */ + h264bsdInterpolateVerHalf(refPic->data, lumaPartData, + xInt, yInt-2, width, height, partWidth, partHeight); + break; + case 3: /* n */ + h264bsdInterpolateVerQuarter(refPic->data, lumaPartData, + xInt, yInt-2, width, height, partWidth, partHeight, 1); + break; + case 4: /* a */ + h264bsdInterpolateHorQuarter(refPic->data, lumaPartData, + xInt-2, yInt, width, height, partWidth, partHeight, 0); + break; + case 5: /* e */ + h264bsdInterpolateHorVerQuarter(refPic->data, lumaPartData, + xInt-2, yInt-2, width, height, partWidth, partHeight, 0); + break; + case 6: /* i */ + h264bsdInterpolateMidHorQuarter(refPic->data, lumaPartData, + xInt-2, yInt-2, width, height, partWidth, partHeight, 0); + break; + case 7: /* p */ + h264bsdInterpolateHorVerQuarter(refPic->data, lumaPartData, + xInt-2, yInt-2, width, height, partWidth, partHeight, 2); + break; + case 8: /* b */ + h264bsdInterpolateHorHalf(refPic->data, lumaPartData, + xInt-2, yInt, width, height, partWidth, partHeight); + break; + case 9: /* f */ + h264bsdInterpolateMidVerQuarter(refPic->data, lumaPartData, + xInt-2, yInt-2, width, height, partWidth, partHeight, 0); + break; + case 10: /* j */ + h264bsdInterpolateMidHalf(refPic->data, lumaPartData, + xInt-2, yInt-2, width, height, partWidth, partHeight); + break; + case 11: /* q */ + h264bsdInterpolateMidVerQuarter(refPic->data, lumaPartData, + xInt-2, yInt-2, width, height, partWidth, partHeight, 1); + break; + case 12: /* c */ + h264bsdInterpolateHorQuarter(refPic->data, lumaPartData, + xInt-2, yInt, width, height, partWidth, partHeight, 1); + break; + case 13: /* g */ + h264bsdInterpolateHorVerQuarter(refPic->data, lumaPartData, + xInt-2, yInt-2, width, height, partWidth, partHeight, 1); + break; + case 14: /* k */ + h264bsdInterpolateMidHorQuarter(refPic->data, lumaPartData, + xInt-2, yInt-2, width, height, partWidth, partHeight, 1); + break; + default: /* case 15, r */ + h264bsdInterpolateHorVerQuarter(refPic->data, lumaPartData, + xInt-2, yInt-2, width, height, partWidth, partHeight, 3); + break; + } + + /* chroma */ + PredictChroma( + data + 16*16 + (partY>>1)*8 + (partX>>1), + xA + partX, + yA + partY, + partWidth, + partHeight, + mv, + refPic); + +} + +#else /* H264DEC_OMXDL */ +/*------------------------------------------------------------------------------ + + Function: h264bsdPredictSamples + + Functional description: + This function reconstructs a prediction for a macroblock partition. + The prediction is either copied or interpolated using the reference + frame and the motion vector. Both luminance and chrominance parts are + predicted. The prediction is stored in given macroblock array (data). + Inputs: + data pointer to macroblock array (384 bytes) for output + mv pointer to motion vector used for prediction + refPic pointer to reference picture structure + xA x-coordinate for current macroblock + yA y-coordinate for current macroblock + partX x-offset for partition in macroblock + partY y-offset for partition in macroblock + partWidth width of partition + partHeight height of partition + Outputs: + data macroblock array (16x16+8x8+8x8) where predicted + partition is stored at correct position + +------------------------------------------------------------------------------*/ + +/*lint -e{550} Symbol 'res' not accessed */ +void h264bsdPredictSamples( + u8 *data, + mv_t *mv, + image_t *refPic, + u32 colAndRow, + u32 part, + u8 *pFill) + +{ + +/* Variables */ + + u32 xFrac, yFrac; + u32 width, height; + i32 xInt, yInt, x0, y0; + u8 *partData, *ref; + OMXSize roi; + u32 fillWidth; + u32 fillHeight; + OMXResult res; + u32 xA, yA; + u32 partX, partY; + u32 partWidth, partHeight; + +/* Code */ + + ASSERT(data); + ASSERT(mv); + ASSERT(refPic); + ASSERT(refPic->data); + ASSERT(refPic->width); + ASSERT(refPic->height); + + xA = (colAndRow & 0xFFFF0000) >> 16; + yA = (colAndRow & 0x0000FFFF); + + partX = (part & 0xFF000000) >> 24; + partY = (part & 0x00FF0000) >> 16; + partWidth = (part & 0x0000FF00) >> 8; + partHeight = (part & 0x000000FF); + + ASSERT(partWidth); + ASSERT(partHeight); + + /* luma */ + partData = data + 16*partY + partX; + + xFrac = mv->hor & 0x3; + yFrac = mv->ver & 0x3; + + width = 16 * refPic->width; + height = 16 * refPic->height; + + xInt = (i32)xA + (i32)partX + (mv->hor >> 2); + yInt = (i32)yA + (i32)partY + (mv->ver >> 2); + + x0 = (xFrac) ? xInt-2 : xInt; + y0 = (yFrac) ? yInt-2 : yInt; + + if (xFrac) + { + if (partWidth == 16) + fillWidth = 32; + else + fillWidth = 16; + } + else + fillWidth = (partWidth*2); + if (yFrac) + fillHeight = partHeight+5; + else + fillHeight = partHeight; + + + if ((x0 < 0) || ((u32)x0+fillWidth > width) || + (y0 < 0) || ((u32)y0+fillHeight > height)) + { + h264bsdFillBlock(refPic->data, (u8*)pFill, x0, y0, width, height, + fillWidth, fillHeight, fillWidth); + + x0 = 0; + y0 = 0; + ref = pFill; + width = fillWidth; + if (yFrac) + ref += 2*width; + if (xFrac) + ref += 2; + } + else + { + /*lint --e(737) Loss of sign */ + ref = refPic->data + yInt*width + xInt; + } + /* Luma interpolation */ + roi.width = (i32)partWidth; + roi.height = (i32)partHeight; + + res = omxVCM4P10_InterpolateLuma(ref, (i32)width, partData, 16, + (i32)xFrac, (i32)yFrac, roi); + ASSERT(res == 0); + + /* Chroma */ + width = 8 * refPic->width; + height = 8 * refPic->height; + + x0 = ((xA + partX) >> 1) + (mv->hor >> 3); + y0 = ((yA + partY) >> 1) + (mv->ver >> 3); + xFrac = mv->hor & 0x7; + yFrac = mv->ver & 0x7; + + ref = refPic->data + 256 * refPic->width * refPic->height; + + roi.width = (i32)(partWidth >> 1); + fillWidth = ((partWidth >> 1) + 8) & ~0x7; + roi.height = (i32)(partHeight >> 1); + fillHeight = (partHeight >> 1) + 1; + + if ((x0 < 0) || ((u32)x0+fillWidth > width) || + (y0 < 0) || ((u32)y0+fillHeight > height)) + { + h264bsdFillBlock(ref, pFill, x0, y0, width, height, + fillWidth, fillHeight, fillWidth); + ref += width * height; + h264bsdFillBlock(ref, pFill + fillWidth*fillHeight, + x0, y0, width, height, fillWidth, + fillHeight, fillWidth); + + ref = pFill; + x0 = 0; + y0 = 0; + width = fillWidth; + height = fillHeight; + } + + partData = data + 16*16 + (partY>>1)*8 + (partX>>1); + + /* Chroma interpolation */ + /*lint --e(737) Loss of sign */ + ref += y0 * width + x0; + res = armVCM4P10_Interpolate_Chroma(ref, width, partData, 8, + (u32)roi.width, (u32)roi.height, xFrac, yFrac); + ASSERT(res == 0); + partData += 8 * 8; + ref += height * width; + res = armVCM4P10_Interpolate_Chroma(ref, width, partData, 8, + (u32)roi.width, (u32)roi.height, xFrac, yFrac); + ASSERT(res == 0); + +} + +#endif /* H264DEC_OMXDL */ + + +/*------------------------------------------------------------------------------ + + Function: FillRow1 + + Functional description: + This function gets a row of reference pels in a 'normal' case when no + overfilling is necessary. + +------------------------------------------------------------------------------*/ + +static void FillRow1( + u8 *ref, + u8 *fill, + i32 left, + i32 center, + i32 right) +{ + + ASSERT(ref); + ASSERT(fill); + + H264SwDecMemcpy(fill, ref, (u32)center); + + /*lint -e(715) */ +} + + +/*------------------------------------------------------------------------------ + + Function: h264bsdFillRow7 + + Functional description: + This function gets a row of reference pels when horizontal coordinate + is partly negative or partly greater than reference picture width + (overfilling some pels on left and/or right edge). + Inputs: + ref pointer to reference samples + left amount of pixels to overfill on left-edge + center amount of pixels to copy + right amount of pixels to overfill on right-edge + Outputs: + fill pointer where samples are stored + +------------------------------------------------------------------------------*/ +#ifndef H264DEC_NEON +void h264bsdFillRow7( + u8 *ref, + u8 *fill, + i32 left, + i32 center, + i32 right) +{ + u8 tmp; + + ASSERT(ref); + ASSERT(fill); + + if (left) + tmp = *ref; + + for ( ; left; left--) + /*lint -esym(644,tmp) tmp is initialized if used */ + *fill++ = tmp; + + for ( ; center; center--) + *fill++ = *ref++; + + if (right) + tmp = ref[-1]; + + for ( ; right; right--) + /*lint -esym(644,tmp) tmp is initialized if used */ + *fill++ = tmp; +} +#endif +/*------------------------------------------------------------------------------ + + Function: h264bsdFillBlock + + Functional description: + This function gets a block of reference pels. It determines whether + overfilling is needed or not and repeatedly calls an appropriate + function (by using a function pointer) that fills one row the block. + Inputs: + ref pointer to reference frame + x0 x-coordinate for block + y0 y-coordinate for block + width width of reference frame + height height of reference frame + blockWidth width of block + blockHeight height of block + fillScanLength length of a line in output array (pixels) + Outputs: + fill pointer to array where output block is written + +------------------------------------------------------------------------------*/ + +void h264bsdFillBlock( + u8 *ref, + u8 *fill, + i32 x0, + i32 y0, + u32 width, + u32 height, + u32 blockWidth, + u32 blockHeight, + u32 fillScanLength) + +{ + +/* Variables */ + + i32 xstop, ystop; + void (*fp)(u8*, u8*, i32, i32, i32); + i32 left, x, right; + i32 top, y, bottom; + +/* Code */ + + ASSERT(ref); + ASSERT(fill); + ASSERT(width); + ASSERT(height); + ASSERT(fill); + ASSERT(blockWidth); + ASSERT(blockHeight); + + xstop = x0 + (i32)blockWidth; + ystop = y0 + (i32)blockHeight; + + /* Choose correct function whether overfilling on left-edge or right-edge + * is needed or not */ + if (x0 >= 0 && xstop <= (i32)width) + fp = FillRow1; + else + fp = h264bsdFillRow7; + + if (ystop < 0) + y0 = -(i32)blockHeight; + + if (xstop < 0) + x0 = -(i32)blockWidth; + + if (y0 > (i32)height) + y0 = (i32)height; + + if (x0 > (i32)width) + x0 = (i32)width; + + xstop = x0 + (i32)blockWidth; + ystop = y0 + (i32)blockHeight; + + if (x0 > 0) + ref += x0; + + if (y0 > 0) + ref += y0 * (i32)width; + + left = x0 < 0 ? -x0 : 0; + right = xstop > (i32)width ? xstop - (i32)width : 0; + x = (i32)blockWidth - left - right; + + top = y0 < 0 ? -y0 : 0; + bottom = ystop > (i32)height ? ystop - (i32)height : 0; + y = (i32)blockHeight - top - bottom; + + /* Top-overfilling */ + for ( ; top; top-- ) + { + (*fp)(ref, fill, left, x, right); + fill += fillScanLength; + } + + /* Lines inside reference image */ + for ( ; y; y-- ) + { + (*fp)(ref, fill, left, x, right); + ref += width; + fill += fillScanLength; + } + + ref -= width; + + /* Bottom-overfilling */ + for ( ; bottom; bottom-- ) + { + (*fp)(ref, fill, left, x, right); + fill += fillScanLength; + } +} + +/*lint +e701 +e702 */ + + diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_reconstruct.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_reconstruct.h new file mode 100755 index 0000000000000000000000000000000000000000..5a1a140fbfa32d3e0a1f5c0bbfd577526484be1d --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_reconstruct.h @@ -0,0 +1,210 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. Module defines + 3. Data types + 4. Function prototypes + +------------------------------------------------------------------------------*/ + +#ifndef H264SWDEC_RECONSTRUCT_H +#define H264SWDEC_RECONSTRUCT_H + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#include "basetype.h" +#include "h264bsd_macroblock_layer.h" +#include "h264bsd_image.h" + +/*------------------------------------------------------------------------------ + 2. Module defines +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 3. Data types +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 4. Function prototypes +------------------------------------------------------------------------------*/ +#ifndef H264DEC_OMXDL +void h264bsdPredictSamples( + u8 *data, + mv_t *mv, + image_t *refPic, + u32 xA, + u32 yA, + u32 partX, + u32 partY, + u32 partWidth, + u32 partHeight); +#else +void h264bsdPredictSamples( + u8 *data, + mv_t *mv, + image_t *refPic, + u32 colAndRow,/* packaged data | column | row |*/ + u32 part, /* packaged data |partX|partY|partWidth|partHeight|*/ + u8 *pFill); +#endif + +void h264bsdFillBlock( + u8 * ref, + u8 * fill, + i32 x0, + i32 y0, + u32 width, + u32 height, + u32 blockWidth, + u32 blockHeight, + u32 fillScanLength); + +void h264bsdInterpolateChromaHor( + u8 *pRef, + u8 *predPartChroma, + i32 x0, + i32 y0, + u32 width, + u32 height, + u32 xFrac, + u32 chromaPartWidth, + u32 chromaPartHeight); + +void h264bsdInterpolateChromaVer( + u8 *pRef, + u8 *predPartChroma, + i32 x0, + i32 y0, + u32 width, + u32 height, + u32 yFrac, + u32 chromaPartWidth, + u32 chromaPartHeight); + +void h264bsdInterpolateChromaHorVer( + u8 *ref, + u8 *predPartChroma, + i32 x0, + i32 y0, + u32 width, + u32 height, + u32 xFrac, + u32 yFrac, + u32 chromaPartWidth, + u32 chromaPartHeight); + +void h264bsdInterpolateVerHalf( + u8 *ref, + u8 *mb, + i32 x0, + i32 y0, + u32 width, + u32 height, + u32 partWidth, + u32 partHeight); + +void h264bsdInterpolateVerQuarter( + u8 *ref, + u8 *mb, + i32 x0, + i32 y0, + u32 width, + u32 height, + u32 partWidth, + u32 partHeight, + u32 verOffset); + +void h264bsdInterpolateHorHalf( + u8 *ref, + u8 *mb, + i32 x0, + i32 y0, + u32 width, + u32 height, + u32 partWidth, + u32 partHeight); + +void h264bsdInterpolateHorQuarter( + u8 *ref, + u8 *mb, + i32 x0, + i32 y0, + u32 width, + u32 height, + u32 partWidth, + u32 partHeight, + u32 horOffset); + +void h264bsdInterpolateHorVerQuarter( + u8 *ref, + u8 *mb, + i32 x0, + i32 y0, + u32 width, + u32 height, + u32 partWidth, + u32 partHeight, + u32 horVerOffset); + +void h264bsdInterpolateMidHalf( + u8 *ref, + u8 *mb, + i32 x0, + i32 y0, + u32 width, + u32 height, + u32 partWidth, + u32 partHeight); + +void h264bsdInterpolateMidVerQuarter( + u8 *ref, + u8 *mb, + i32 x0, + i32 y0, + u32 width, + u32 height, + u32 partWidth, + u32 partHeight, + u32 verOffset); + +void h264bsdInterpolateMidHorQuarter( + u8 *ref, + u8 *mb, + i32 x0, + i32 y0, + u32 width, + u32 height, + u32 partWidth, + u32 partHeight, + u32 horOffset); + + +void h264bsdFillRow7( + u8 *ref, + u8 *fill, + i32 left, + i32 center, + i32 right); + +#endif /* #ifdef H264SWDEC_RECONSTRUCT_H */ + diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_sei.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_sei.c new file mode 100755 index 0000000000000000000000000000000000000000..0756c4734fb61c54f2eee71bedb4940d3f9843d9 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_sei.c @@ -0,0 +1,1692 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. External compiler flags + 3. Module defines + 4. Local function prototypes + 5. Functions + h264bsdDecodeSeiMessage + DecodeBufferingPeriod + DecodePictureTiming + DecodePanScanRectangle + DecodeFillerPayload + DecodeUserDataRegisteredITuTT35 + DecodeUserDataUnregistered + DecodeRecoveryPoint + DecodeDecRefPicMarkingRepetition + DecodeSparePic + DecodeSceneInfo + DecodeSubSeqInfo + DecodeSubSeqLayerCharacteristics + DecodeSubSeqCharacteristics + DecodeFullFrameFreeze + DecodeFullFrameSnapshot + DecodeProgressiveRefinementSegmentStart + DecodeProgressiveRefinementSegmentEnd + DecodeMotionConstrainedSliceGroupSet + DecodeReservedSeiMessage + +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#include "h264bsd_sei.h" +#include "basetype.h" +#include "h264bsd_util.h" +#include "h264bsd_stream.h" +#include "h264bsd_vlc.h" +#include "h264bsd_seq_param_set.h" +#include "h264bsd_slice_header.h" + +/*------------------------------------------------------------------------------ + 2. External compiler flags +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- + 3. Module defines +------------------------------------------------------------------------------*/ + +static const u32 numClockTS[9] = {1,1,1,2,2,3,3,2,3}; +static const u32 ceilLog2NumSliceGroups[9] = {0,1,1,2,2,3,3,3,3}; + +/*------------------------------------------------------------------------------ + 4. Local function prototypes +------------------------------------------------------------------------------*/ + +static u32 DecodeBufferingPeriod( + strmData_t *pStrmData, + seiBufferingPeriod_t *pBufferingPeriod, + u32 cpbCnt, + u32 initialCpbRemovalDelayLength, + u32 nalHrdBpPresentFlag, + u32 vclHrdBpPresentFlag); + +static u32 DecodePictureTiming( + strmData_t *pStrmData, + seiPicTiming_t *pPicTiming, + u32 cpbRemovalDelayLength, + u32 dpbOutputDelayLength, + u32 timeOffsetLength, + u32 cpbDpbDelaysPresentFlag, + u32 picStructPresentFlag); + +static u32 DecodePanScanRectangle( + strmData_t *pStrmData, + seiPanScanRect_t *pPanScanRectangle); + +static u32 DecodeFillerPayload(strmData_t *pStrmData, u32 payloadSize); + +static u32 DecodeUserDataRegisteredITuTT35( + strmData_t *pStrmData, + seiUserDataRegisteredItuTT35_t *pUserDataRegisteredItuTT35, + u32 payloadSize); + +static u32 DecodeUserDataUnregistered( + strmData_t *pStrmData, + seiUserDataUnregistered_t *pUserDataUnregistered, + u32 payloadSize); + +static u32 DecodeRecoveryPoint( + strmData_t *pStrmData, + seiRecoveryPoint_t *pRecoveryPoint); + +static u32 DecodeDecRefPicMarkingRepetition( + strmData_t *pStrmData, + seiDecRefPicMarkingRepetition_t *pDecRefPicMarkingRepetition, + u32 numRefFrames); + +static u32 DecodeSparePic( + strmData_t *pStrmData, + seiSparePic_t *pSparePic, + u32 picSizeInMapUnits); + +static u32 DecodeSceneInfo( + strmData_t *pStrmData, + seiSceneInfo_t *pSceneInfo); + +static u32 DecodeSubSeqInfo( + strmData_t *pStrmData, + seiSubSeqInfo_t *pSubSeqInfo); + +static u32 DecodeSubSeqLayerCharacteristics( + strmData_t *pStrmData, + seiSubSeqLayerCharacteristics_t *pSubSeqLayerCharacteristics); + +static u32 DecodeSubSeqCharacteristics( + strmData_t *pStrmData, + seiSubSeqCharacteristics_t *pSubSeqCharacteristics); + +static u32 DecodeFullFrameFreeze( + strmData_t *pStrmData, + seiFullFrameFreeze_t *pFullFrameFreeze); + +static u32 DecodeFullFrameSnapshot( + strmData_t *pStrmData, + seiFullFrameSnapshot_t *pFullFrameSnapshot); + +static u32 DecodeProgressiveRefinementSegmentStart( + strmData_t *pStrmData, + seiProgressiveRefinementSegmentStart_t *pProgressiveRefinementSegmentStart); + +static u32 DecodeProgressiveRefinementSegmentEnd( + strmData_t *pStrmData, + seiProgressiveRefinementSegmentEnd_t *pProgressiveRefinementSegmentEnd); + +static u32 DecodeMotionConstrainedSliceGroupSet( + strmData_t *pStrmData, + seiMotionConstrainedSliceGroupSet_t *pMotionConstrainedSliceGroupSet, + u32 numSliceGroups); + +static u32 DecodeReservedSeiMessage( + strmData_t *pStrmData, + seiReservedSeiMessage_t *pReservedSeiMessage, + u32 payloadSize); + +/*------------------------------------------------------------------------------ + + Function: h264bsdDecodeSeiMessage + + Functional description: + <++> + Inputs: + <++> + Outputs: + <++> + +------------------------------------------------------------------------------*/ + +u32 h264bsdDecodeSeiMessage( + strmData_t *pStrmData, + seqParamSet_t *pSeqParamSet, + seiMessage_t *pSeiMessage, + u32 numSliceGroups) +{ + +/* Variables */ + + u32 tmp, payloadType, payloadSize, status; + +/* Code */ + + ASSERT(pStrmData); + ASSERT(pSeiMessage); + + + H264SwDecMemset(pSeiMessage, 0, sizeof(seiMessage_t)); + + do + { + payloadType = 0; + while((tmp = h264bsdGetBits(pStrmData, 8)) == 0xFF) + { + payloadType += 255; + } + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + payloadType += tmp; + + payloadSize = 0; + while((tmp = h264bsdGetBits(pStrmData, 8)) == 0xFF) + { + payloadSize += 255; + } + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + payloadSize += tmp; + + pSeiMessage->payloadType = payloadType; + + switch (payloadType) + { + case 0: + ASSERT(pSeqParamSet); + status = DecodeBufferingPeriod( + pStrmData, + &pSeiMessage->bufferingPeriod, + pSeqParamSet->vuiParameters->vclHrdParameters.cpbCnt, + pSeqParamSet->vuiParameters->vclHrdParameters. + initialCpbRemovalDelayLength, + pSeqParamSet->vuiParameters->nalHrdParametersPresentFlag, + pSeqParamSet->vuiParameters->vclHrdParametersPresentFlag); + break; + + case 1: + ASSERT(pSeqParamSet->vuiParametersPresentFlag); + status = DecodePictureTiming( + pStrmData, + &pSeiMessage->picTiming, + pSeqParamSet->vuiParameters->vclHrdParameters. + cpbRemovalDelayLength, + pSeqParamSet->vuiParameters->vclHrdParameters. + dpbOutputDelayLength, + pSeqParamSet->vuiParameters->vclHrdParameters. + timeOffsetLength, + pSeqParamSet->vuiParameters->nalHrdParametersPresentFlag || + pSeqParamSet->vuiParameters->vclHrdParametersPresentFlag ? + HANTRO_TRUE : HANTRO_FALSE, + pSeqParamSet->vuiParameters->picStructPresentFlag); + break; + + case 2: + status = DecodePanScanRectangle( + pStrmData, + &pSeiMessage->panScanRect); + break; + + case 3: + status = DecodeFillerPayload(pStrmData, payloadSize); + break; + + case 4: + status = DecodeUserDataRegisteredITuTT35( + pStrmData, + &pSeiMessage->userDataRegisteredItuTT35, + payloadSize); + break; + + case 5: + status = DecodeUserDataUnregistered( + pStrmData, + &pSeiMessage->userDataUnregistered, + payloadSize); + break; + + case 6: + status = DecodeRecoveryPoint( + pStrmData, + &pSeiMessage->recoveryPoint); + break; + + case 7: + status = DecodeDecRefPicMarkingRepetition( + pStrmData, + &pSeiMessage->decRefPicMarkingRepetition, + pSeqParamSet->numRefFrames); + break; + + case 8: + ASSERT(pSeqParamSet); + status = DecodeSparePic( + pStrmData, + &pSeiMessage->sparePic, + pSeqParamSet->picWidthInMbs * pSeqParamSet->picHeightInMbs); + break; + + case 9: + status = DecodeSceneInfo( + pStrmData, + &pSeiMessage->sceneInfo); + break; + + case 10: + status = DecodeSubSeqInfo( + pStrmData, + &pSeiMessage->subSeqInfo); + break; + + case 11: + status = DecodeSubSeqLayerCharacteristics( + pStrmData, + &pSeiMessage->subSeqLayerCharacteristics); + break; + + case 12: + status = DecodeSubSeqCharacteristics( + pStrmData, + &pSeiMessage->subSeqCharacteristics); + break; + + case 13: + status = DecodeFullFrameFreeze( + pStrmData, + &pSeiMessage->fullFrameFreeze); + break; + + case 14: /* This SEI does not contain data, what to do ??? */ + status = HANTRO_OK; + break; + + case 15: + status = DecodeFullFrameSnapshot( + pStrmData, + &pSeiMessage->fullFrameSnapshot); + break; + + case 16: + status = DecodeProgressiveRefinementSegmentStart( + pStrmData, + &pSeiMessage->progressiveRefinementSegmentStart); + break; + + case 17: + status = DecodeProgressiveRefinementSegmentEnd( + pStrmData, + &pSeiMessage->progressiveRefinementSegmentEnd); + break; + + case 18: + ASSERT(numSliceGroups); + status = DecodeMotionConstrainedSliceGroupSet( + pStrmData, + &pSeiMessage->motionConstrainedSliceGroupSet, + numSliceGroups); + break; + + default: + status = DecodeReservedSeiMessage( + pStrmData, + &pSeiMessage->reservedSeiMessage, + payloadSize); + break; + } + + if (status != HANTRO_OK) + return(status); + + while (!h264bsdIsByteAligned(pStrmData)) + { + if (h264bsdGetBits(pStrmData, 1) != 1) + return(HANTRO_NOK); + while (!h264bsdIsByteAligned(pStrmData)) + { + if (h264bsdGetBits(pStrmData, 1) != 0) + return(HANTRO_NOK); + } + } + } while (h264bsdMoreRbspData(pStrmData)); + + return(h264bsdRbspTrailingBits(pStrmData)); + +} + +/*------------------------------------------------------------------------------ + + Function: DecodeBufferingPeriod + + Functional description: + <++> + Inputs: + <++> + Outputs: + <++> + +------------------------------------------------------------------------------*/ + +static u32 DecodeBufferingPeriod( + strmData_t *pStrmData, + seiBufferingPeriod_t *pBufferingPeriod, + u32 cpbCnt, + u32 initialCpbRemovalDelayLength, + u32 nalHrdBpPresentFlag, + u32 vclHrdBpPresentFlag) +{ + +/* Variables */ + + u32 tmp, i; + +/* Code */ + + ASSERT(pStrmData); + ASSERT(pBufferingPeriod); + ASSERT(cpbCnt); + ASSERT(initialCpbRemovalDelayLength); + + + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, + &pBufferingPeriod->seqParameterSetId); + if (tmp != HANTRO_OK) + return(tmp); + if (pBufferingPeriod->seqParameterSetId > 31) + return(HANTRO_NOK); + + if (nalHrdBpPresentFlag) + { + for (i = 0; i < cpbCnt; i++) + { + tmp = h264bsdGetBits(pStrmData, initialCpbRemovalDelayLength); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + if (tmp == 0) + return(HANTRO_NOK); + pBufferingPeriod->initialCpbRemovalDelay[i] = tmp; + + tmp = h264bsdGetBits(pStrmData, initialCpbRemovalDelayLength); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pBufferingPeriod->initialCpbRemovalDelayOffset[i] = tmp; + } + } + + if (vclHrdBpPresentFlag) + { + for (i = 0; i < cpbCnt; i++) + { + tmp = h264bsdGetBits(pStrmData, initialCpbRemovalDelayLength); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pBufferingPeriod->initialCpbRemovalDelay[i] = tmp; + + tmp = h264bsdGetBits(pStrmData, initialCpbRemovalDelayLength); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pBufferingPeriod->initialCpbRemovalDelayOffset[i] = tmp; + } + } + + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + Function: DecodePictureTiming + + Functional description: + <++> + Inputs: + <++> + Outputs: + <++> + +------------------------------------------------------------------------------*/ + +static u32 DecodePictureTiming( + strmData_t *pStrmData, + seiPicTiming_t *pPicTiming, + u32 cpbRemovalDelayLength, + u32 dpbOutputDelayLength, + u32 timeOffsetLength, + u32 cpbDpbDelaysPresentFlag, + u32 picStructPresentFlag) +{ + +/* Variables */ + + u32 tmp, i; + i32 itmp; + +/* Code */ + + ASSERT(pStrmData); + ASSERT(pPicTiming); + + + if (cpbDpbDelaysPresentFlag) + { + tmp = h264bsdGetBits(pStrmData, cpbRemovalDelayLength); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pPicTiming->cpbRemovalDelay = tmp; + + tmp = h264bsdGetBits(pStrmData, dpbOutputDelayLength); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pPicTiming->dpbOutputDelay = tmp; + } + + if (picStructPresentFlag) + { + tmp = h264bsdGetBits(pStrmData, 4); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + if (tmp > 8) + return(HANTRO_NOK); + pPicTiming->picStruct = tmp; + + for (i = 0; i < numClockTS[pPicTiming->picStruct]; i++) + { + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pPicTiming->clockTimeStampFlag[i] = tmp == 1 ? + HANTRO_TRUE : HANTRO_FALSE; + + if (pPicTiming->clockTimeStampFlag[i]) + { + tmp = h264bsdGetBits(pStrmData, 2); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pPicTiming->ctType[i] = tmp; + + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pPicTiming->nuitFieldBasedFlag[i] = tmp == 1 ? + HANTRO_TRUE : HANTRO_FALSE; + + tmp = h264bsdGetBits(pStrmData, 5); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + if (tmp > 6) + return(HANTRO_NOK); + pPicTiming->countingType[i] = tmp; + + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pPicTiming->fullTimeStampFlag[i] = tmp == 1 ? + HANTRO_TRUE : HANTRO_FALSE; + + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pPicTiming->discontinuityFlag[i] = tmp == 1 ? + HANTRO_TRUE : HANTRO_FALSE; + + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pPicTiming->cntDroppedFlag[i] = tmp == 1 ? + HANTRO_TRUE : HANTRO_FALSE; + + tmp = h264bsdGetBits(pStrmData, 8); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pPicTiming->nFrames[i] = tmp; + + if (pPicTiming->fullTimeStampFlag[i]) + { + tmp = h264bsdGetBits(pStrmData, 6); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + if (tmp > 59) + return(HANTRO_NOK); + pPicTiming->secondsValue[i] = tmp; + + tmp = h264bsdGetBits(pStrmData, 6); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + if (tmp > 59) + return(HANTRO_NOK); + pPicTiming->minutesValue[i] = tmp; + + tmp = h264bsdGetBits(pStrmData, 5); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + if (tmp > 23) + return(HANTRO_NOK); + pPicTiming->hoursValue[i] = tmp; + } + else + { + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pPicTiming->secondsFlag[i] = tmp == 1 ? + HANTRO_TRUE : HANTRO_FALSE; + + if (pPicTiming->secondsFlag[i]) + { + tmp = h264bsdGetBits(pStrmData, 6); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + if (tmp > 59) + return(HANTRO_NOK); + pPicTiming->secondsValue[i] = tmp; + + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pPicTiming->minutesFlag[i] = tmp == 1 ? + HANTRO_TRUE : HANTRO_FALSE; + + if (pPicTiming->minutesFlag[i]) + { + tmp = h264bsdGetBits(pStrmData, 6); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + if (tmp > 59) + return(HANTRO_NOK); + pPicTiming->minutesValue[i] = tmp; + + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pPicTiming->hoursFlag[i] = tmp == 1 ? + HANTRO_TRUE : HANTRO_FALSE; + + if (pPicTiming->hoursFlag[i]) + { + tmp = h264bsdGetBits(pStrmData, 5); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + if (tmp > 23) + return(HANTRO_NOK); + pPicTiming->hoursValue[i] = tmp; + } + } + } + } + if (timeOffsetLength) + { + tmp = h264bsdGetBits(pStrmData, timeOffsetLength); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + itmp = (i32)tmp; + /* following "converts" timeOffsetLength-bit signed + * integer into i32 */ + /*lint -save -e701 -e702 */ + itmp <<= (32 - timeOffsetLength); + itmp >>= (32 - timeOffsetLength); + /*lint -restore */ + pPicTiming->timeOffset[i] = itmp; + } + else + pPicTiming->timeOffset[i] = 0; + } + } + } + + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + Function: DecodePanScanRectangle + + Functional description: + <++> + Inputs: + <++> + Outputs: + <++> + +------------------------------------------------------------------------------*/ + +static u32 DecodePanScanRectangle( + strmData_t *pStrmData, + seiPanScanRect_t *pPanScanRectangle) +{ + +/* Variables */ + + u32 tmp, i; + +/* Code */ + + ASSERT(pStrmData); + ASSERT(pPanScanRectangle); + + + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, + &pPanScanRectangle->panScanRectId); + if (tmp != HANTRO_OK) + return(tmp); + + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pPanScanRectangle->panScanRectCancelFlag = tmp == 1 ? + HANTRO_TRUE : HANTRO_FALSE; + + if (!pPanScanRectangle->panScanRectCancelFlag) + { + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, + &pPanScanRectangle->panScanCnt); + if (tmp != HANTRO_OK) + return(tmp); + if (pPanScanRectangle->panScanCnt > 2) + return(HANTRO_NOK); + pPanScanRectangle->panScanCnt++; + + for (i = 0; i < pPanScanRectangle->panScanCnt; i++) + { + tmp = h264bsdDecodeExpGolombSigned(pStrmData, + &pPanScanRectangle->panScanRectLeftOffset[i]); + if (tmp != HANTRO_OK) + return(tmp); + + tmp = h264bsdDecodeExpGolombSigned(pStrmData, + &pPanScanRectangle->panScanRectRightOffset[i]); + if (tmp != HANTRO_OK) + return(tmp); + + tmp = h264bsdDecodeExpGolombSigned(pStrmData, + &pPanScanRectangle->panScanRectTopOffset[i]); + if (tmp != HANTRO_OK) + return(tmp); + + tmp = h264bsdDecodeExpGolombSigned(pStrmData, + &pPanScanRectangle->panScanRectBottomOffset[i]); + if (tmp != HANTRO_OK) + return(tmp); + } + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, + &pPanScanRectangle->panScanRectRepetitionPeriod); + if (tmp != HANTRO_OK) + return(tmp); + if (pPanScanRectangle->panScanRectRepetitionPeriod > 16384) + return(HANTRO_NOK); + if (pPanScanRectangle->panScanCnt > 1 && + pPanScanRectangle->panScanRectRepetitionPeriod > 1) + return(HANTRO_NOK); + } + + return(HANTRO_OK); +} + +/*------------------------------------------------------------------------------ + + Function: DecodeFillerPayload + + Functional description: + <++> + Inputs: + <++> + Outputs: + <++> + +------------------------------------------------------------------------------*/ + +static u32 DecodeFillerPayload(strmData_t *pStrmData, u32 payloadSize) +{ + +/* Variables */ + +/* Code */ + + ASSERT(pStrmData); + + + if (payloadSize) + if (h264bsdFlushBits(pStrmData, 8 * payloadSize) == END_OF_STREAM) + return(HANTRO_NOK); + + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + Function: DecodeUserDataRegisteredITuTT35 + + Functional description: + <++> + Inputs: + <++> + Outputs: + <++> + +------------------------------------------------------------------------------*/ + +static u32 DecodeUserDataRegisteredITuTT35( + strmData_t *pStrmData, + seiUserDataRegisteredItuTT35_t *pUserDataRegisteredItuTT35, + u32 payloadSize) +{ + +/* Variables */ + + u32 tmp, i, j; + +/* Code */ + + ASSERT(pStrmData); + ASSERT(pUserDataRegisteredItuTT35); + ASSERT(payloadSize); + + tmp = h264bsdGetBits(pStrmData, 8); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pUserDataRegisteredItuTT35->ituTT35CountryCode = tmp; + + if (pUserDataRegisteredItuTT35->ituTT35CountryCode != 0xFF) + i = 1; + else + { + tmp = h264bsdGetBits(pStrmData, 8); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pUserDataRegisteredItuTT35->ituTT35CountryCodeExtensionByte = tmp; + i = 2; + } + + /* where corresponding FREE() ??? */ + ALLOCATE(pUserDataRegisteredItuTT35->ituTT35PayloadByte,payloadSize-i,u8); + pUserDataRegisteredItuTT35->numPayloadBytes = payloadSize - i; + if (pUserDataRegisteredItuTT35->ituTT35PayloadByte == NULL) + return(MEMORY_ALLOCATION_ERROR); + + j = 0; + do + { + tmp = h264bsdGetBits(pStrmData, 8); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pUserDataRegisteredItuTT35->ituTT35PayloadByte[j] = (u8)tmp; + i++; + j++; + } while (i < payloadSize); + + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + Function: DecodeUserDataUnregistered + + Functional description: + <++> + Inputs: + <++> + Outputs: + <++> + +------------------------------------------------------------------------------*/ + +static u32 DecodeUserDataUnregistered( + strmData_t *pStrmData, + seiUserDataUnregistered_t *pUserDataUnregistered, + u32 payloadSize) +{ + +/* Variables */ + + u32 i, tmp; + +/* Code */ + + ASSERT(pStrmData); + ASSERT(pUserDataUnregistered); + + + for (i = 0; i < 4; i++) + { + pUserDataUnregistered->uuidIsoIec11578[i] = h264bsdShowBits32(pStrmData); + if (h264bsdFlushBits(pStrmData,32) == END_OF_STREAM) + return(HANTRO_NOK); + } + + /* where corresponding FREE() ??? */ + ALLOCATE(pUserDataUnregistered->userDataPayloadByte, payloadSize - 16, u8); + if (pUserDataUnregistered->userDataPayloadByte == NULL) + return(MEMORY_ALLOCATION_ERROR); + + pUserDataUnregistered->numPayloadBytes = payloadSize - 16; + + for (i = 0; i < payloadSize - 16; i++) + { + tmp = h264bsdGetBits(pStrmData, 8); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pUserDataUnregistered->userDataPayloadByte[i] = (u8)tmp; + } + + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + Function: DecodeRecoveryPoint + + Functional description: + <++> + Inputs: + <++> + Outputs: + <++> + +------------------------------------------------------------------------------*/ + +static u32 DecodeRecoveryPoint( + strmData_t *pStrmData, + seiRecoveryPoint_t *pRecoveryPoint) +{ + +/* Variables */ + + u32 tmp; + +/* Code */ + + ASSERT(pStrmData); + ASSERT(pRecoveryPoint); + + + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, + &pRecoveryPoint->recoveryFrameCnt); + if (tmp != HANTRO_OK) + return(tmp); + + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pRecoveryPoint->exactMatchFlag = tmp == 1 ? HANTRO_TRUE : HANTRO_FALSE; + + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pRecoveryPoint->brokenLinkFlag = tmp == 1 ? HANTRO_TRUE : HANTRO_FALSE; + + tmp = h264bsdGetBits(pStrmData, 2); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + if (tmp > 2) + return(HANTRO_NOK); + pRecoveryPoint->changingSliceGroupIdc = tmp; + + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + Function: DecodeDecRefPicMarkingRepetition + + Functional description: + <++> + Inputs: + <++> + Outputs: + <++> + +------------------------------------------------------------------------------*/ + +static u32 DecodeDecRefPicMarkingRepetition( + strmData_t *pStrmData, + seiDecRefPicMarkingRepetition_t *pDecRefPicMarkingRepetition, + u32 numRefFrames) +{ + +/* Variables */ + + u32 tmp; + +/* Code */ + + ASSERT(pStrmData); + ASSERT(pDecRefPicMarkingRepetition); + + + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pDecRefPicMarkingRepetition->originalIdrFlag = tmp == 1 ? HANTRO_TRUE : HANTRO_FALSE; + + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, + &pDecRefPicMarkingRepetition->originalFrameNum); + if (tmp != HANTRO_OK) + return(tmp); + + /* frame_mbs_only_flag assumed always true so some field related syntax + * elements are skipped, see H.264 standard */ + tmp = h264bsdDecRefPicMarking(pStrmData, + &pDecRefPicMarkingRepetition->decRefPicMarking, NAL_SEI, numRefFrames); + + return(tmp); + +} + +/*------------------------------------------------------------------------------ + + Function: DecodeSparePic + + Functional description: + <++> + Inputs: + <++> + Outputs: + <++> + +------------------------------------------------------------------------------*/ + +static u32 DecodeSparePic( + strmData_t *pStrmData, + seiSparePic_t *pSparePic, + u32 picSizeInMapUnits) +{ + +/* Variables */ + + u32 tmp, i, j, mapUnitCnt; + +/* Code */ + + ASSERT(pStrmData); + ASSERT(pSparePic); + + + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, + &pSparePic->targetFrameNum); + if (tmp != HANTRO_OK) + return(tmp); + + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pSparePic->spareFieldFlag = tmp == 1 ? HANTRO_TRUE : HANTRO_FALSE; + /* do not accept fields */ + if (pSparePic->spareFieldFlag) + return(HANTRO_NOK); + + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &pSparePic->numSparePics); + if (tmp != HANTRO_OK) + return(tmp); + pSparePic->numSparePics++; + if (pSparePic->numSparePics > MAX_NUM_SPARE_PICS) + return(HANTRO_NOK); + + for (i = 0; i < pSparePic->numSparePics; i++) + { + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, + &pSparePic->deltaSpareFrameNum[i]); + if (tmp != HANTRO_OK) + return(tmp); + + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, + &pSparePic->spareAreaIdc[i]); + if (tmp != HANTRO_OK) + return(tmp); + if (pSparePic->spareAreaIdc[i] > 2) + return(HANTRO_NOK); + + if (pSparePic->spareAreaIdc[i] == 1) + { + /* where corresponding FREE() ??? */ + ALLOCATE(pSparePic->spareUnitFlag[i], picSizeInMapUnits, u32); + if (pSparePic->spareUnitFlag[i] == NULL) + return(MEMORY_ALLOCATION_ERROR); + pSparePic->zeroRunLength[i] = NULL; + + for (j = 0; j < picSizeInMapUnits; j++) + { + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pSparePic->spareUnitFlag[i][j] = tmp == 1 ? + HANTRO_TRUE : HANTRO_FALSE; + } + } + else if (pSparePic->spareAreaIdc[i] == 2) + { + /* where corresponding FREE() ??? */ + ALLOCATE(pSparePic->zeroRunLength[i], picSizeInMapUnits, u32); + if (pSparePic->zeroRunLength[i] == NULL) + return(MEMORY_ALLOCATION_ERROR); + pSparePic->spareUnitFlag[i] = NULL; + + for (j = 0, mapUnitCnt = 0; mapUnitCnt < picSizeInMapUnits; j++) + { + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, + &pSparePic->zeroRunLength[i][j]); + if (tmp != HANTRO_OK) + return(tmp); + mapUnitCnt += pSparePic->zeroRunLength[i][j] + 1; + } + } + } + + /* set rest to null */ + for (i = pSparePic->numSparePics; i < MAX_NUM_SPARE_PICS; i++) + { + pSparePic->spareUnitFlag[i] = NULL; + pSparePic->zeroRunLength[i] = NULL; + } + + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + Function: DecodeSceneInfo + + Functional description: + <++> + Inputs: + <++> + Outputs: + <++> + +------------------------------------------------------------------------------*/ + +static u32 DecodeSceneInfo( + strmData_t *pStrmData, + seiSceneInfo_t *pSceneInfo) +{ + +/* Variables */ + + u32 tmp; + +/* Code */ + + ASSERT(pStrmData); + ASSERT(pSceneInfo); + + + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pSceneInfo->sceneInfoPresentFlag = tmp == 1 ? HANTRO_TRUE : HANTRO_FALSE; + + if (pSceneInfo->sceneInfoPresentFlag) + { + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &pSceneInfo->sceneId); + if (tmp != HANTRO_OK) + return(tmp); + + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, + &pSceneInfo->sceneTransitionType); + if (tmp != HANTRO_OK) + return(tmp); + if (pSceneInfo->sceneTransitionType > 6) + return(HANTRO_NOK); + + if (pSceneInfo->sceneTransitionType) + { + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, + &pSceneInfo->secondSceneId); + if (tmp != HANTRO_OK) + return(tmp); + } + + } + + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + Function: DecodeSubSeqInfo + + Functional description: + <++> + Inputs: + <++> + Outputs: + <++> + +-----------------------------------------------------------------------------*/ + +static u32 DecodeSubSeqInfo( + strmData_t *pStrmData, + seiSubSeqInfo_t *pSubSeqInfo) +{ + +/* Variables */ + + u32 tmp; + +/* Code */ + + ASSERT(pStrmData); + ASSERT(pSubSeqInfo); + + + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, + &pSubSeqInfo->subSeqLayerNum); + if (tmp != HANTRO_OK) + return(tmp); + if (pSubSeqInfo->subSeqLayerNum > 255) + return(HANTRO_NOK); + + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &pSubSeqInfo->subSeqId); + if (tmp != HANTRO_OK) + return(tmp); + if (pSubSeqInfo->subSeqId > 65535) + return(HANTRO_NOK); + + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pSubSeqInfo->firstRefPicFlag = tmp == 1 ? HANTRO_TRUE : HANTRO_FALSE; + + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pSubSeqInfo->leadingNonRefPicFlag = tmp == 1 ? HANTRO_TRUE : HANTRO_FALSE; + + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pSubSeqInfo->lastPicFlag = tmp == 1 ? HANTRO_TRUE : HANTRO_FALSE; + + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pSubSeqInfo->subSeqFrameNumFlag = tmp == 1 ? HANTRO_TRUE : HANTRO_FALSE; + + if (pSubSeqInfo->subSeqFrameNumFlag) + { + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, + &pSubSeqInfo->subSeqFrameNum); + if (tmp != HANTRO_OK) + return(tmp); + } + + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + Function: DecodeSubSeqLayerCharacteristics + + Functional description: + <++> + Inputs: + <++> + Outputs: + <++> + +------------------------------------------------------------------------------*/ + +static u32 DecodeSubSeqLayerCharacteristics( + strmData_t *pStrmData, + seiSubSeqLayerCharacteristics_t *pSubSeqLayerCharacteristics) +{ + +/* Variables */ + + u32 tmp, i; + +/* Code */ + + ASSERT(pStrmData); + ASSERT(pSubSeqLayerCharacteristics); + + + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, + &pSubSeqLayerCharacteristics->numSubSeqLayers); + if (tmp != HANTRO_OK) + return(tmp); + pSubSeqLayerCharacteristics->numSubSeqLayers++; + if (pSubSeqLayerCharacteristics->numSubSeqLayers > MAX_NUM_SUB_SEQ_LAYERS) + return(HANTRO_NOK); + + for (i = 0; i < pSubSeqLayerCharacteristics->numSubSeqLayers; i++) + { + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pSubSeqLayerCharacteristics->accurateStatisticsFlag[i] = + tmp == 1 ? HANTRO_TRUE : HANTRO_FALSE; + + tmp = h264bsdGetBits(pStrmData, 16); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pSubSeqLayerCharacteristics->averageBitRate[i] = tmp; + + tmp = h264bsdGetBits(pStrmData, 16); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pSubSeqLayerCharacteristics->averageFrameRate[i] = tmp; + } + + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + Function: DecodeSubSeqCharacteristics + + Functional description: + <++> + Inputs: + <++> + Outputs: + <++> + +------------------------------------------------------------------------------*/ + +static u32 DecodeSubSeqCharacteristics( + strmData_t *pStrmData, + seiSubSeqCharacteristics_t *pSubSeqCharacteristics) +{ + +/* Variables */ + + u32 tmp, i; + +/* Code */ + + ASSERT(pStrmData); + ASSERT(pSubSeqCharacteristics); + + + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, + &pSubSeqCharacteristics->subSeqLayerNum); + if (tmp != HANTRO_OK) + return(tmp); + if (pSubSeqCharacteristics->subSeqLayerNum > MAX_NUM_SUB_SEQ_LAYERS-1) + return(HANTRO_NOK); + + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, + &pSubSeqCharacteristics->subSeqId); + if (tmp != HANTRO_OK) + return(tmp); + if (pSubSeqCharacteristics->subSeqId > 65535) + return(HANTRO_NOK); + + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pSubSeqCharacteristics->durationFlag = tmp == 1 ? + HANTRO_TRUE : HANTRO_FALSE; + + if (pSubSeqCharacteristics->durationFlag) + { + pSubSeqCharacteristics->subSeqDuration = h264bsdShowBits32(pStrmData); + if (h264bsdFlushBits(pStrmData,32) == END_OF_STREAM) + return(HANTRO_NOK); + } + + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pSubSeqCharacteristics->averageRateFlag = tmp == 1 ? + HANTRO_TRUE : HANTRO_FALSE; + + if (pSubSeqCharacteristics->averageRateFlag) + { + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pSubSeqCharacteristics->accurateStatisticsFlag = + tmp == 1 ? HANTRO_TRUE : HANTRO_FALSE; + + tmp = h264bsdGetBits(pStrmData, 16); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pSubSeqCharacteristics->averageBitRate = tmp; + + tmp = h264bsdGetBits(pStrmData, 16); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pSubSeqCharacteristics->averageFrameRate = tmp; + } + + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, + &pSubSeqCharacteristics->numReferencedSubseqs); + if (tmp != HANTRO_OK) + return(tmp); + if (pSubSeqCharacteristics->numReferencedSubseqs > MAX_NUM_SUB_SEQ_LAYERS-1) + return(HANTRO_NOK); + + for (i = 0; i < pSubSeqCharacteristics->numReferencedSubseqs; i++) + { + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, + &pSubSeqCharacteristics->refSubSeqLayerNum[i]); + if (tmp != HANTRO_OK) + return(tmp); + + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, + &pSubSeqCharacteristics->refSubSeqId[i]); + if (tmp != HANTRO_OK) + return(tmp); + + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pSubSeqCharacteristics->refSubSeqDirection[i] = tmp; + } + + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + Function: DecodeFullFrameFreeze + + Functional description: + <++> + Inputs: + <++> + Outputs: + <++> + +------------------------------------------------------------------------------*/ + +static u32 DecodeFullFrameFreeze( + strmData_t *pStrmData, + seiFullFrameFreeze_t *pFullFrameFreeze) +{ + +/* Variables */ + + u32 tmp; + +/* Code */ + + ASSERT(pStrmData); + ASSERT(pFullFrameFreeze); + + + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, + &pFullFrameFreeze->fullFrameFreezeRepetitionPeriod); + if (tmp != HANTRO_OK) + return(tmp); + if (pFullFrameFreeze->fullFrameFreezeRepetitionPeriod > 16384) + return(HANTRO_NOK); + + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + Function: DecodeFullFrameSnapshot + + Functional description: + <++> + Inputs: + <++> + Outputs: + <++> + +------------------------------------------------------------------------------*/ + +static u32 DecodeFullFrameSnapshot( + strmData_t *pStrmData, + seiFullFrameSnapshot_t *pFullFrameSnapshot) +{ + +/* Variables */ + + u32 tmp; + +/* Code */ + + ASSERT(pStrmData); + ASSERT(pFullFrameSnapshot); + + + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, + &pFullFrameSnapshot->snapShotId); + if (tmp != HANTRO_OK) + return(tmp); + + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + Function: DecodeProgressiveRefinementSegmentStart + + Functional description: + <++> + Inputs: + <++> + Outputs: + <++> + +------------------------------------------------------------------------------*/ + +static u32 DecodeProgressiveRefinementSegmentStart( + strmData_t *pStrmData, + seiProgressiveRefinementSegmentStart_t *pProgressiveRefinementSegmentStart) +{ + +/* Variables */ + + u32 tmp; + +/* Code */ + + ASSERT(pStrmData); + ASSERT(pProgressiveRefinementSegmentStart); + + + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, + &pProgressiveRefinementSegmentStart->progressiveRefinementId); + if (tmp != HANTRO_OK) + return(tmp); + + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, + &pProgressiveRefinementSegmentStart->numRefinementSteps); + if (tmp != HANTRO_OK) + return(tmp); + pProgressiveRefinementSegmentStart->numRefinementSteps++; + + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + Function: DecodeProgressiveRefinementSegmentEnd + + Functional description: + <++> + Inputs: + <++> + Outputs: + <++> + +------------------------------------------------------------------------------*/ + +static u32 DecodeProgressiveRefinementSegmentEnd( + strmData_t *pStrmData, + seiProgressiveRefinementSegmentEnd_t *pProgressiveRefinementSegmentEnd) +{ + +/* Variables */ + + u32 tmp; + +/* Code */ + + ASSERT(pStrmData); + ASSERT(pProgressiveRefinementSegmentEnd); + + + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, + &pProgressiveRefinementSegmentEnd->progressiveRefinementId); + if (tmp != HANTRO_OK) + return(tmp); + + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + Function: DecodeMotionConstrainedSliceGroupSet + + Functional description: + <++> + Inputs: + <++> + Outputs: + <++> + +------------------------------------------------------------------------------*/ + +static u32 DecodeMotionConstrainedSliceGroupSet( + strmData_t *pStrmData, + seiMotionConstrainedSliceGroupSet_t *pMotionConstrainedSliceGroupSet, + u32 numSliceGroups) +{ + +/* Variables */ + + u32 tmp,i; + +/* Code */ + + ASSERT(pStrmData); + ASSERT(pMotionConstrainedSliceGroupSet); + ASSERT(numSliceGroups < MAX_NUM_SLICE_GROUPS); + + + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, + &pMotionConstrainedSliceGroupSet->numSliceGroupsInSet); + if (tmp != HANTRO_OK) + return(tmp); + pMotionConstrainedSliceGroupSet->numSliceGroupsInSet++; + if (pMotionConstrainedSliceGroupSet->numSliceGroupsInSet > numSliceGroups) + return(HANTRO_NOK); + + for (i = 0; i < pMotionConstrainedSliceGroupSet->numSliceGroupsInSet; i++) + { + tmp = h264bsdGetBits(pStrmData, + ceilLog2NumSliceGroups[numSliceGroups]); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pMotionConstrainedSliceGroupSet->sliceGroupId[i] = tmp; + if (pMotionConstrainedSliceGroupSet->sliceGroupId[i] > + pMotionConstrainedSliceGroupSet->numSliceGroupsInSet-1) + return(HANTRO_NOK); + } + + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pMotionConstrainedSliceGroupSet->exactSampleValueMatchFlag = + tmp == 1 ? HANTRO_TRUE : HANTRO_FALSE; + + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pMotionConstrainedSliceGroupSet->panScanRectFlag = tmp == 1 ? + HANTRO_TRUE : HANTRO_FALSE; + + if (pMotionConstrainedSliceGroupSet->panScanRectFlag) + { + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, + &pMotionConstrainedSliceGroupSet->panScanRectId); + if (tmp != HANTRO_OK) + return(tmp); + } + + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + Function: DecodeReservedSeiMessage + + Functional description: + <++> + Inputs: + <++> + Outputs: + <++> + +------------------------------------------------------------------------------*/ + +static u32 DecodeReservedSeiMessage( + strmData_t *pStrmData, + seiReservedSeiMessage_t *pReservedSeiMessage, + u32 payloadSize) +{ + +/* Variables */ + + u32 i, tmp; + +/* Code */ + + ASSERT(pStrmData); + ASSERT(pReservedSeiMessage); + + + /* where corresponding FREE() ??? */ + ALLOCATE(pReservedSeiMessage->reservedSeiMessagePayloadByte,payloadSize,u8); + if (pReservedSeiMessage->reservedSeiMessagePayloadByte == NULL) + return(MEMORY_ALLOCATION_ERROR); + + pReservedSeiMessage->numPayloadBytes = payloadSize; + + for (i = 0; i < payloadSize; i++) + { + tmp = h264bsdGetBits(pStrmData,8); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pReservedSeiMessage->reservedSeiMessagePayloadByte[i] = (u8)tmp; + } + + return(HANTRO_OK); + +} + diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_sei.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_sei.h new file mode 100755 index 0000000000000000000000000000000000000000..efe543a63797cc3e363332de693b09961dabdeea --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_sei.h @@ -0,0 +1,252 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. Module defines + 3. Data types + 4. Function prototypes + +------------------------------------------------------------------------------*/ + +#ifndef H264SWDEC_SEI_H +#define H264SWDEC_SEI_H + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#include "basetype.h" +#include "h264bsd_stream.h" +#include "h264bsd_slice_header.h" +#include "h264bsd_seq_param_set.h" +#include "h264bsd_vui.h" + +/*------------------------------------------------------------------------------ + 2. Module defines +------------------------------------------------------------------------------*/ + +#define MAX_PAN_SCAN_CNT 32 +#define MAX_NUM_SPARE_PICS 16 +#define MAX_NUM_CLOCK_TS 3 +#define MAX_NUM_SUB_SEQ_LAYERS 256 + +/*------------------------------------------------------------------------------ + 3. Data types +------------------------------------------------------------------------------*/ + +typedef struct +{ + u32 seqParameterSetId; + u32 initialCpbRemovalDelay[MAX_CPB_CNT]; + u32 initialCpbRemovalDelayOffset[MAX_CPB_CNT]; +} seiBufferingPeriod_t; + +typedef struct +{ + u32 cpbRemovalDelay; + u32 dpbOutputDelay; + u32 picStruct; + u32 clockTimeStampFlag[MAX_NUM_CLOCK_TS]; + u32 clockTimeStamp[MAX_NUM_CLOCK_TS]; + u32 ctType[MAX_NUM_CLOCK_TS]; + u32 nuitFieldBasedFlag[MAX_NUM_CLOCK_TS]; + u32 countingType[MAX_NUM_CLOCK_TS]; + u32 fullTimeStampFlag[MAX_NUM_CLOCK_TS]; + u32 discontinuityFlag[MAX_NUM_CLOCK_TS]; + u32 cntDroppedFlag[MAX_NUM_CLOCK_TS]; + u32 nFrames[MAX_NUM_CLOCK_TS]; + u32 secondsFlag[MAX_NUM_CLOCK_TS]; + u32 secondsValue[MAX_NUM_CLOCK_TS]; + u32 minutesFlag[MAX_NUM_CLOCK_TS]; + u32 minutesValue[MAX_NUM_CLOCK_TS]; + u32 hoursFlag[MAX_NUM_CLOCK_TS]; + u32 hoursValue[MAX_NUM_CLOCK_TS]; + i32 timeOffset[MAX_NUM_CLOCK_TS]; +} seiPicTiming_t; + +typedef struct +{ + u32 panScanRectId; + u32 panScanRectCancelFlag; + u32 panScanCnt; + i32 panScanRectLeftOffset[MAX_PAN_SCAN_CNT]; + i32 panScanRectRightOffset[MAX_PAN_SCAN_CNT]; + i32 panScanRectTopOffset[MAX_PAN_SCAN_CNT]; + i32 panScanRectBottomOffset[MAX_PAN_SCAN_CNT]; + u32 panScanRectRepetitionPeriod; +} seiPanScanRect_t; + +typedef struct +{ + u32 ituTT35CountryCode; + u32 ituTT35CountryCodeExtensionByte; + u8 *ituTT35PayloadByte; + u32 numPayloadBytes; +} seiUserDataRegisteredItuTT35_t; + +typedef struct +{ + u32 uuidIsoIec11578[4]; + u8 *userDataPayloadByte; + u32 numPayloadBytes; +} seiUserDataUnregistered_t; + +typedef struct +{ + u32 recoveryFrameCnt; + u32 exactMatchFlag; + u32 brokenLinkFlag; + u32 changingSliceGroupIdc; +} seiRecoveryPoint_t; + +typedef struct +{ + u32 originalIdrFlag; + u32 originalFrameNum; + decRefPicMarking_t decRefPicMarking; +} seiDecRefPicMarkingRepetition_t; + +typedef struct +{ + u32 targetFrameNum; + u32 spareFieldFlag; + u32 targetBottomFieldFlag; + u32 numSparePics; + u32 deltaSpareFrameNum[MAX_NUM_SPARE_PICS]; + u32 spareBottomFieldFlag[MAX_NUM_SPARE_PICS]; + u32 spareAreaIdc[MAX_NUM_SPARE_PICS]; + u32 *spareUnitFlag[MAX_NUM_SPARE_PICS]; + u32 *zeroRunLength[MAX_NUM_SPARE_PICS]; +} seiSparePic_t; + +typedef struct +{ + u32 sceneInfoPresentFlag; + u32 sceneId; + u32 sceneTransitionType; + u32 secondSceneId; +} seiSceneInfo_t; + +typedef struct +{ + u32 subSeqLayerNum; + u32 subSeqId; + u32 firstRefPicFlag; + u32 leadingNonRefPicFlag; + u32 lastPicFlag; + u32 subSeqFrameNumFlag; + u32 subSeqFrameNum; +} seiSubSeqInfo_t; + +typedef struct +{ + u32 numSubSeqLayers; + u32 accurateStatisticsFlag[MAX_NUM_SUB_SEQ_LAYERS]; + u32 averageBitRate[MAX_NUM_SUB_SEQ_LAYERS]; + u32 averageFrameRate[MAX_NUM_SUB_SEQ_LAYERS]; +} seiSubSeqLayerCharacteristics_t; + +typedef struct +{ + u32 subSeqLayerNum; + u32 subSeqId; + u32 durationFlag; + u32 subSeqDuration; + u32 averageRateFlag; + u32 accurateStatisticsFlag; + u32 averageBitRate; + u32 averageFrameRate; + u32 numReferencedSubseqs; + u32 refSubSeqLayerNum[MAX_NUM_SUB_SEQ_LAYERS]; + u32 refSubSeqId[MAX_NUM_SUB_SEQ_LAYERS]; + u32 refSubSeqDirection[MAX_NUM_SUB_SEQ_LAYERS]; +} seiSubSeqCharacteristics_t; + +typedef struct +{ + u32 fullFrameFreezeRepetitionPeriod; +} seiFullFrameFreeze_t; + +typedef struct +{ + u32 snapShotId; +} seiFullFrameSnapshot_t; + +typedef struct +{ + u32 progressiveRefinementId; + u32 numRefinementSteps; +} seiProgressiveRefinementSegmentStart_t; + +typedef struct +{ + u32 progressiveRefinementId; +} seiProgressiveRefinementSegmentEnd_t; + +typedef struct +{ + u32 numSliceGroupsInSet; + u32 sliceGroupId[MAX_NUM_SLICE_GROUPS]; + u32 exactSampleValueMatchFlag; + u32 panScanRectFlag; + u32 panScanRectId; +} seiMotionConstrainedSliceGroupSet_t; + +typedef struct +{ + u8 *reservedSeiMessagePayloadByte; + u32 numPayloadBytes; +} seiReservedSeiMessage_t; + +typedef struct +{ + u32 payloadType; + seiBufferingPeriod_t bufferingPeriod; + seiPicTiming_t picTiming; + seiPanScanRect_t panScanRect; + seiUserDataRegisteredItuTT35_t userDataRegisteredItuTT35; + seiUserDataUnregistered_t userDataUnregistered; + seiRecoveryPoint_t recoveryPoint; + seiDecRefPicMarkingRepetition_t decRefPicMarkingRepetition; + seiSparePic_t sparePic; + seiSceneInfo_t sceneInfo; + seiSubSeqInfo_t subSeqInfo; + seiSubSeqLayerCharacteristics_t subSeqLayerCharacteristics; + seiSubSeqCharacteristics_t subSeqCharacteristics; + seiFullFrameFreeze_t fullFrameFreeze; + seiFullFrameSnapshot_t fullFrameSnapshot; + seiProgressiveRefinementSegmentStart_t progressiveRefinementSegmentStart; + seiProgressiveRefinementSegmentEnd_t progressiveRefinementSegmentEnd; + seiMotionConstrainedSliceGroupSet_t motionConstrainedSliceGroupSet; + seiReservedSeiMessage_t reservedSeiMessage; +} seiMessage_t; + +/*------------------------------------------------------------------------------ + 4. Function prototypes +------------------------------------------------------------------------------*/ + +u32 h264bsdDecodeSeiMessage( + strmData_t *pStrmData, + seqParamSet_t *pSeqParamSet, + seiMessage_t *pSeiMessage, + u32 numSliceGroups); + +#endif /* #ifdef H264SWDEC_SEI_H */ + diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_seq_param_set.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_seq_param_set.c new file mode 100644 index 0000000000000000000000000000000000000000..751051a14ffd8a3a27beea1c7e311e38b7c971e5 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_seq_param_set.c @@ -0,0 +1,577 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. External compiler flags + 3. Module defines + 4. Local function prototypes + 5. Functions + h264bsdDecodeSeqParamSet + GetDpbSize + h264bsdCompareSeqParamSets + +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#include "h264bsd_seq_param_set.h" +#include "h264bsd_util.h" +#include "h264bsd_vlc.h" +#include "h264bsd_vui.h" +#include "h264bsd_cfg.h" + +/*------------------------------------------------------------------------------ + 2. External compiler flags +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- + 3. Module defines +------------------------------------------------------------------------------*/ + +/* enumeration to indicate invalid return value from the GetDpbSize function */ +enum {INVALID_DPB_SIZE = 0x7FFFFFFF}; + +/*------------------------------------------------------------------------------ + 4. Local function prototypes +------------------------------------------------------------------------------*/ + +static u32 GetDpbSize(u32 picSizeInMbs, u32 levelIdc); + +/*------------------------------------------------------------------------------ + + Function name: h264bsdDecodeSeqParamSet + + Functional description: + Decode sequence parameter set information from the stream. + + Function allocates memory for offsetForRefFrame array if + picture order count type is 1 and numRefFramesInPicOrderCntCycle + is greater than zero. + + Inputs: + pStrmData pointer to stream data structure + + Outputs: + pSeqParamSet decoded information is stored here + + Returns: + HANTRO_OK success + HANTRO_NOK failure, invalid information or end of stream + MEMORY_ALLOCATION_ERROR for memory allocation failure + +------------------------------------------------------------------------------*/ + +u32 h264bsdDecodeSeqParamSet(strmData_t *pStrmData, seqParamSet_t *pSeqParamSet) +{ + +/* Variables */ + + u32 tmp, i, value; + +/* Code */ + + ASSERT(pStrmData); + ASSERT(pSeqParamSet); + + H264SwDecMemset(pSeqParamSet, 0, sizeof(seqParamSet_t)); + + /* profile_idc */ + tmp = h264bsdGetBits(pStrmData, 8); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + if (tmp != 66) + { + DEBUG(("NOT BASELINE PROFILE %d\n", tmp)); + } + pSeqParamSet->profileIdc = tmp; + + /* constrained_set0_flag */ + tmp = h264bsdGetBits(pStrmData, 1); + /* constrained_set1_flag */ + tmp = h264bsdGetBits(pStrmData, 1); + /* constrained_set2_flag */ + tmp = h264bsdGetBits(pStrmData, 1); + + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + + /* reserved_zero_5bits, values of these bits shall be ignored */ + tmp = h264bsdGetBits(pStrmData, 5); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + + tmp = h264bsdGetBits(pStrmData, 8); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pSeqParamSet->levelIdc = tmp; + + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, + &pSeqParamSet->seqParameterSetId); + if (tmp != HANTRO_OK) + return(tmp); + if (pSeqParamSet->seqParameterSetId >= MAX_NUM_SEQ_PARAM_SETS) + { + EPRINT("seq_param_set_id"); + return(HANTRO_NOK); + } + + /* log2_max_frame_num_minus4 */ + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value); + if (tmp != HANTRO_OK) + return(tmp); + if (value > 12) + { + EPRINT("log2_max_frame_num_minus4"); + return(HANTRO_NOK); + } + /* maxFrameNum = 2^(log2_max_frame_num_minus4 + 4) */ + pSeqParamSet->maxFrameNum = 1 << (value+4); + + /* valid POC types are 0, 1 and 2 */ + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value); + if (tmp != HANTRO_OK) + return(tmp); + if (value > 2) + { + EPRINT("pic_order_cnt_type"); + return(HANTRO_NOK); + } + pSeqParamSet->picOrderCntType = value; + + if (pSeqParamSet->picOrderCntType == 0) + { + /* log2_max_pic_order_cnt_lsb_minus4 */ + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value); + if (tmp != HANTRO_OK) + return(tmp); + if (value > 12) + { + EPRINT("log2_max_pic_order_cnt_lsb_minus4"); + return(HANTRO_NOK); + } + /* maxPicOrderCntLsb = 2^(log2_max_pic_order_cnt_lsb_minus4 + 4) */ + pSeqParamSet->maxPicOrderCntLsb = 1 << (value+4); + } + else if (pSeqParamSet->picOrderCntType == 1) + { + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pSeqParamSet->deltaPicOrderAlwaysZeroFlag = (tmp == 1) ? + HANTRO_TRUE : HANTRO_FALSE; + + tmp = h264bsdDecodeExpGolombSigned(pStrmData, + &pSeqParamSet->offsetForNonRefPic); + if (tmp != HANTRO_OK) + return(tmp); + + tmp = h264bsdDecodeExpGolombSigned(pStrmData, + &pSeqParamSet->offsetForTopToBottomField); + if (tmp != HANTRO_OK) + return(tmp); + + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, + &pSeqParamSet->numRefFramesInPicOrderCntCycle); + if (tmp != HANTRO_OK) + return(tmp); + if (pSeqParamSet->numRefFramesInPicOrderCntCycle > 255) + { + EPRINT("num_ref_frames_in_pic_order_cnt_cycle"); + return(HANTRO_NOK); + } + + if (pSeqParamSet->numRefFramesInPicOrderCntCycle) + { + /* NOTE: This has to be freed somewhere! */ + ALLOCATE(pSeqParamSet->offsetForRefFrame, + pSeqParamSet->numRefFramesInPicOrderCntCycle, i32); + if (pSeqParamSet->offsetForRefFrame == NULL) + return(MEMORY_ALLOCATION_ERROR); + + for (i = 0; i < pSeqParamSet->numRefFramesInPicOrderCntCycle; i++) + { + tmp = h264bsdDecodeExpGolombSigned(pStrmData, + pSeqParamSet->offsetForRefFrame + i); + if (tmp != HANTRO_OK) + return(tmp); + } + } + else + { + pSeqParamSet->offsetForRefFrame = NULL; + } + } + + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, + &pSeqParamSet->numRefFrames); + if (tmp != HANTRO_OK) + return(tmp); + if (pSeqParamSet->numRefFrames > MAX_NUM_REF_PICS) + { + EPRINT("num_ref_frames"); + return(HANTRO_NOK); + } + + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pSeqParamSet->gapsInFrameNumValueAllowedFlag = (tmp == 1) ? + HANTRO_TRUE : HANTRO_FALSE; + + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value); + if (tmp != HANTRO_OK) + return(tmp); + pSeqParamSet->picWidthInMbs = value + 1; + + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value); + if (tmp != HANTRO_OK) + return(tmp); + pSeqParamSet->picHeightInMbs = value + 1; + + /* frame_mbs_only_flag, shall be 1 for baseline profile */ + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + if (!tmp) + { + EPRINT("frame_mbs_only_flag"); + return(HANTRO_NOK); + } + + /* direct_8x8_inference_flag */ + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pSeqParamSet->frameCroppingFlag = (tmp == 1) ? HANTRO_TRUE : HANTRO_FALSE; + + if (pSeqParamSet->frameCroppingFlag) + { + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, + &pSeqParamSet->frameCropLeftOffset); + if (tmp != HANTRO_OK) + return(tmp); + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, + &pSeqParamSet->frameCropRightOffset); + if (tmp != HANTRO_OK) + return(tmp); + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, + &pSeqParamSet->frameCropTopOffset); + if (tmp != HANTRO_OK) + return(tmp); + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, + &pSeqParamSet->frameCropBottomOffset); + if (tmp != HANTRO_OK) + return(tmp); + + /* check that frame cropping params are valid, parameters shall + * specify non-negative area within the original picture */ + if ( ( (i32)pSeqParamSet->frameCropLeftOffset > + ( 8 * (i32)pSeqParamSet->picWidthInMbs - + ((i32)pSeqParamSet->frameCropRightOffset + 1) ) ) || + ( (i32)pSeqParamSet->frameCropTopOffset > + ( 8 * (i32)pSeqParamSet->picHeightInMbs - + ((i32)pSeqParamSet->frameCropBottomOffset + 1) ) ) ) + { + EPRINT("frame_cropping"); + return(HANTRO_NOK); + } + } + + /* check that image dimensions and levelIdc match */ + tmp = pSeqParamSet->picWidthInMbs * pSeqParamSet->picHeightInMbs; + value = GetDpbSize(tmp, pSeqParamSet->levelIdc); + if (value == INVALID_DPB_SIZE || pSeqParamSet->numRefFrames > value) + { + DEBUG(("WARNING! Invalid DPB size based on SPS Level!\n")); + DEBUG(("WARNING! Using num_ref_frames =%d for DPB size!\n", + pSeqParamSet->numRefFrames)); + value = pSeqParamSet->numRefFrames; + } + pSeqParamSet->maxDpbSize = value; + + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pSeqParamSet->vuiParametersPresentFlag = (tmp == 1) ? + HANTRO_TRUE : HANTRO_FALSE; + + /* VUI */ + if (pSeqParamSet->vuiParametersPresentFlag) + { + ALLOCATE(pSeqParamSet->vuiParameters, 1, vuiParameters_t); + if (pSeqParamSet->vuiParameters == NULL) + return(MEMORY_ALLOCATION_ERROR); + tmp = h264bsdDecodeVuiParameters(pStrmData, + pSeqParamSet->vuiParameters); + if (tmp != HANTRO_OK) + return(tmp); + /* check numReorderFrames and maxDecFrameBuffering */ + if (pSeqParamSet->vuiParameters->bitstreamRestrictionFlag) + { + if (pSeqParamSet->vuiParameters->numReorderFrames > + pSeqParamSet->vuiParameters->maxDecFrameBuffering || + pSeqParamSet->vuiParameters->maxDecFrameBuffering < + pSeqParamSet->numRefFrames || + pSeqParamSet->vuiParameters->maxDecFrameBuffering > + pSeqParamSet->maxDpbSize) + { + return(HANTRO_NOK); + } + + /* standard says that "the sequence shall not require a DPB with + * size of more than max(1, maxDecFrameBuffering) */ + pSeqParamSet->maxDpbSize = + MAX(1, pSeqParamSet->vuiParameters->maxDecFrameBuffering); + } + } + + tmp = h264bsdRbspTrailingBits(pStrmData); + + /* ignore possible errors in trailing bits of parameters sets */ + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + Function: GetDpbSize + + Functional description: + Get size of the DPB in frames. Size is determined based on the + picture size and MaxDPB for the specified level. These determine + how many pictures may fit into to the buffer. However, the size + is also limited to a maximum of 16 frames and therefore function + returns the minimum of the determined size and 16. + + Inputs: + picSizeInMbs number of macroblocks in the picture + levelIdc indicates the level + + Outputs: + none + + Returns: + size of the DPB in frames + INVALID_DPB_SIZE when invalid levelIdc specified or picSizeInMbs + is higher than supported by the level in question + +------------------------------------------------------------------------------*/ + +u32 GetDpbSize(u32 picSizeInMbs, u32 levelIdc) +{ + +/* Variables */ + + u32 tmp; + u32 maxPicSizeInMbs; + +/* Code */ + + ASSERT(picSizeInMbs); + + /* use tmp as the size of the DPB in bytes, computes as 1024 * MaxDPB + * (from table A-1 in Annex A) */ + switch (levelIdc) + { + case 10: + tmp = 152064; + maxPicSizeInMbs = 99; + break; + + case 11: + tmp = 345600; + maxPicSizeInMbs = 396; + break; + + case 12: + tmp = 912384; + maxPicSizeInMbs = 396; + break; + + case 13: + tmp = 912384; + maxPicSizeInMbs = 396; + break; + + case 20: + tmp = 912384; + maxPicSizeInMbs = 396; + break; + + case 21: + tmp = 1824768; + maxPicSizeInMbs = 792; + break; + + case 22: + tmp = 3110400; + maxPicSizeInMbs = 1620; + break; + + case 30: + tmp = 3110400; + maxPicSizeInMbs = 1620; + break; + + case 31: + tmp = 6912000; + maxPicSizeInMbs = 3600; + break; + + case 32: + tmp = 7864320; + maxPicSizeInMbs = 5120; + break; + + case 40: + tmp = 12582912; + maxPicSizeInMbs = 8192; + break; + + case 41: + tmp = 12582912; + maxPicSizeInMbs = 8192; + break; + + case 42: + tmp = 34816*384; + maxPicSizeInMbs = 8704; + break; + + case 50: + /* standard says 42301440 here, but corrigendum "corrects" this to + * 42393600 */ + tmp = 42393600; + maxPicSizeInMbs = 22080; + break; + + case 51: + tmp = 70778880; + maxPicSizeInMbs = 36864; + break; + + default: + return(INVALID_DPB_SIZE); + } + + /* this is not "correct" return value! However, it results in error in + * decoding and this was easiest place to check picture size */ + if (picSizeInMbs > maxPicSizeInMbs) + return(INVALID_DPB_SIZE); + + tmp /= (picSizeInMbs*384); + + return(MIN(tmp, 16)); + +} + +/*------------------------------------------------------------------------------ + + Function name: h264bsdCompareSeqParamSets + + Functional description: + Compare two sequence parameter sets. + + Inputs: + pSps1 pointer to a sequence parameter set + pSps2 pointer to another sequence parameter set + + Outputs: + 0 sequence parameter sets are equal + 1 otherwise + +------------------------------------------------------------------------------*/ + +u32 h264bsdCompareSeqParamSets(seqParamSet_t *pSps1, seqParamSet_t *pSps2) +{ + +/* Variables */ + + u32 i; + +/* Code */ + + ASSERT(pSps1); + ASSERT(pSps2); + + /* first compare parameters whose existence does not depend on other + * parameters and only compare the rest of the params if these are equal */ + if (pSps1->profileIdc == pSps2->profileIdc && + pSps1->levelIdc == pSps2->levelIdc && + pSps1->maxFrameNum == pSps2->maxFrameNum && + pSps1->picOrderCntType == pSps2->picOrderCntType && + pSps1->numRefFrames == pSps2->numRefFrames && + pSps1->gapsInFrameNumValueAllowedFlag == + pSps2->gapsInFrameNumValueAllowedFlag && + pSps1->picWidthInMbs == pSps2->picWidthInMbs && + pSps1->picHeightInMbs == pSps2->picHeightInMbs && + pSps1->frameCroppingFlag == pSps2->frameCroppingFlag && + pSps1->vuiParametersPresentFlag == pSps2->vuiParametersPresentFlag) + { + if (pSps1->picOrderCntType == 0) + { + if (pSps1->maxPicOrderCntLsb != pSps2->maxPicOrderCntLsb) + return 1; + } + else if (pSps1->picOrderCntType == 1) + { + if (pSps1->deltaPicOrderAlwaysZeroFlag != + pSps2->deltaPicOrderAlwaysZeroFlag || + pSps1->offsetForNonRefPic != pSps2->offsetForNonRefPic || + pSps1->offsetForTopToBottomField != + pSps2->offsetForTopToBottomField || + pSps1->numRefFramesInPicOrderCntCycle != + pSps2->numRefFramesInPicOrderCntCycle) + { + return 1; + } + else + { + for (i = 0; i < pSps1->numRefFramesInPicOrderCntCycle; i++) + if (pSps1->offsetForRefFrame[i] != + pSps2->offsetForRefFrame[i]) + { + return 1; + } + } + } + if (pSps1->frameCroppingFlag) + { + if (pSps1->frameCropLeftOffset != pSps2->frameCropLeftOffset || + pSps1->frameCropRightOffset != pSps2->frameCropRightOffset || + pSps1->frameCropTopOffset != pSps2->frameCropTopOffset || + pSps1->frameCropBottomOffset != pSps2->frameCropBottomOffset) + { + return 1; + } + } + + return 0; + } + + return 1; +} + diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_seq_param_set.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_seq_param_set.h new file mode 100755 index 0000000000000000000000000000000000000000..e18df94976709fd6de3978421f4a79e529183b8a --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_seq_param_set.h @@ -0,0 +1,86 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. Module defines + 3. Data types + 4. Function prototypes + +------------------------------------------------------------------------------*/ + +#ifndef H264SWDEC_SEQ_PARAM_SET_H +#define H264SWDEC_SEQ_PARAM_SET_H + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#include "basetype.h" +#include "h264bsd_stream.h" +#include "h264bsd_vui.h" + +/*------------------------------------------------------------------------------ + 2. Module defines +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 3. Data types +------------------------------------------------------------------------------*/ + +/* structure to store sequence parameter set information decoded from the + * stream */ +typedef struct +{ + u32 profileIdc; + u32 levelIdc; + u32 seqParameterSetId; + u32 maxFrameNum; + u32 picOrderCntType; + u32 maxPicOrderCntLsb; + u32 deltaPicOrderAlwaysZeroFlag; + i32 offsetForNonRefPic; + i32 offsetForTopToBottomField; + u32 numRefFramesInPicOrderCntCycle; + i32 *offsetForRefFrame; + u32 numRefFrames; + u32 gapsInFrameNumValueAllowedFlag; + u32 picWidthInMbs; + u32 picHeightInMbs; + u32 frameCroppingFlag; + u32 frameCropLeftOffset; + u32 frameCropRightOffset; + u32 frameCropTopOffset; + u32 frameCropBottomOffset; + u32 vuiParametersPresentFlag; + vuiParameters_t *vuiParameters; + u32 maxDpbSize; +} seqParamSet_t; + +/*------------------------------------------------------------------------------ + 4. Function prototypes +------------------------------------------------------------------------------*/ + +u32 h264bsdDecodeSeqParamSet(strmData_t *pStrmData, + seqParamSet_t *pSeqParamSet); + +u32 h264bsdCompareSeqParamSets(seqParamSet_t *pSps1, seqParamSet_t *pSps2); + +#endif /* #ifdef H264SWDEC_SEQ_PARAM_SET_H */ + diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_data.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_data.c new file mode 100755 index 0000000000000000000000000000000000000000..c288d4b7dd8702d1462633eb930cc02fe6958a52 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_data.c @@ -0,0 +1,354 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. External compiler flags + 3. Module defines + 4. Local function prototypes + 5. Functions + h264bsdDecodeSliceData + SetMbParams + h264bsdMarkSliceCorrupted + +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#include "h264bsd_slice_data.h" +#include "h264bsd_util.h" +#include "h264bsd_vlc.h" + +/*------------------------------------------------------------------------------ + 2. External compiler flags +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- + 3. Module defines +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 4. Local function prototypes +------------------------------------------------------------------------------*/ + +static void SetMbParams(mbStorage_t *pMb, sliceHeader_t *pSlice, u32 sliceId, + i32 chromaQpIndexOffset); + +/*------------------------------------------------------------------------------ + + 5.1 Function name: h264bsdDecodeSliceData + + Functional description: + Decode one slice. Function decodes stream data, i.e. macroblocks + and possible skip_run fields. h264bsdDecodeMacroblock function is + called to handle all other macroblock related processing. + Macroblock to slice group mapping is considered when next + macroblock to process is determined (h264bsdNextMbAddress function) + map + + Inputs: + pStrmData pointer to stream data structure + pStorage pointer to storage structure + currImage pointer to current processed picture, needed for + intra prediction of the macroblocks + pSliceHeader pointer to slice header of the current slice + + Outputs: + currImage processed macroblocks are written to current image + pStorage mbStorage structure of each processed macroblock + is updated here + + Returns: + HANTRO_OK success + HANTRO_NOK invalid stream data + +------------------------------------------------------------------------------*/ + +u32 h264bsdDecodeSliceData(strmData_t *pStrmData, storage_t *pStorage, + image_t *currImage, sliceHeader_t *pSliceHeader) +{ + +/* Variables */ + + u8 mbData[384 + 15 + 32]; + u8 *data; + u32 tmp; + u32 skipRun; + u32 prevSkipped; + u32 currMbAddr; + u32 moreMbs; + u32 mbCount; + i32 qpY; + macroblockLayer_t *mbLayer; + +/* Code */ + + ASSERT(pStrmData); + ASSERT(pSliceHeader); + ASSERT(pStorage); + ASSERT(pSliceHeader->firstMbInSlice < pStorage->picSizeInMbs); + + /* ensure 16-byte alignment */ + data = (u8*)ALIGN(mbData, 16); + + mbLayer = pStorage->mbLayer; + + currMbAddr = pSliceHeader->firstMbInSlice; + skipRun = 0; + prevSkipped = HANTRO_FALSE; + + /* increment slice index, will be one for decoding of the first slice of + * the picture */ + pStorage->slice->sliceId++; + + /* lastMbAddr stores address of the macroblock that was last successfully + * decoded, needed for error handling */ + pStorage->slice->lastMbAddr = 0; + + mbCount = 0; + /* initial quantization parameter for the slice is obtained as the sum of + * initial QP for the picture and sliceQpDelta for the current slice */ + qpY = (i32)pStorage->activePps->picInitQp + pSliceHeader->sliceQpDelta; + do + { + /* primary picture and already decoded macroblock -> error */ + if (!pSliceHeader->redundantPicCnt && pStorage->mb[currMbAddr].decoded) + { + EPRINT("Primary and already decoded"); + return(HANTRO_NOK); + } + + SetMbParams(pStorage->mb + currMbAddr, pSliceHeader, + pStorage->slice->sliceId, pStorage->activePps->chromaQpIndexOffset); + + if (!IS_I_SLICE(pSliceHeader->sliceType)) + { + if (!prevSkipped) + { + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &skipRun); + if (tmp != HANTRO_OK) + return(tmp); + /* skip_run shall be less than or equal to number of + * macroblocks left */ + if (skipRun > (pStorage->picSizeInMbs - currMbAddr)) + { + EPRINT("skip_run"); + return(HANTRO_NOK); + } + if (skipRun) + { + prevSkipped = HANTRO_TRUE; + H264SwDecMemset(&mbLayer->mbPred, 0, sizeof(mbPred_t)); + /* mark current macroblock skipped */ + mbLayer->mbType = P_Skip; + } + } + } + + if (skipRun) + { + DEBUG(("Skipping macroblock %d\n", currMbAddr)); + skipRun--; + } + else + { + prevSkipped = HANTRO_FALSE; + tmp = h264bsdDecodeMacroblockLayer(pStrmData, mbLayer, + pStorage->mb + currMbAddr, pSliceHeader->sliceType, + pSliceHeader->numRefIdxL0Active); + if (tmp != HANTRO_OK) + { + EPRINT("macroblock_layer"); + return(tmp); + } + } + + tmp = h264bsdDecodeMacroblock(pStorage->mb + currMbAddr, mbLayer, + currImage, pStorage->dpb, &qpY, currMbAddr, + pStorage->activePps->constrainedIntraPredFlag, data); + if (tmp != HANTRO_OK) + { + EPRINT("MACRO_BLOCK"); + return(tmp); + } + + /* increment macroblock count only for macroblocks that were decoded + * for the first time (redundant slices) */ + if (pStorage->mb[currMbAddr].decoded == 1) + mbCount++; + + /* keep on processing as long as there is stream data left or + * processing of macroblocks to be skipped based on the last skipRun is + * not finished */ + moreMbs = (h264bsdMoreRbspData(pStrmData) || skipRun) ? + HANTRO_TRUE : HANTRO_FALSE; + + /* lastMbAddr is only updated for intra slices (all macroblocks of + * inter slices will be lost in case of an error) */ + if (IS_I_SLICE(pSliceHeader->sliceType)) + pStorage->slice->lastMbAddr = currMbAddr; + + currMbAddr = h264bsdNextMbAddress(pStorage->sliceGroupMap, + pStorage->picSizeInMbs, currMbAddr); + /* data left in the buffer but no more macroblocks for current slice + * group -> error */ + if (moreMbs && !currMbAddr) + { + EPRINT("Next mb address"); + return(HANTRO_NOK); + } + + } while (moreMbs); + + if ((pStorage->slice->numDecodedMbs + mbCount) > pStorage->picSizeInMbs) + { + EPRINT("Num decoded mbs"); + return(HANTRO_NOK); + } + + pStorage->slice->numDecodedMbs += mbCount; + + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + 5.2 Function: SetMbParams + + Functional description: + Set macroblock parameters that remain constant for this slice + + Inputs: + pSlice pointer to current slice header + sliceId id of the current slice + chromaQpIndexOffset + + Outputs: + pMb pointer to macroblock structure which is updated + + Returns: + none + +------------------------------------------------------------------------------*/ + +void SetMbParams(mbStorage_t *pMb, sliceHeader_t *pSlice, u32 sliceId, + i32 chromaQpIndexOffset) +{ + +/* Variables */ + u32 tmp1; + i32 tmp2, tmp3; + +/* Code */ + + tmp1 = pSlice->disableDeblockingFilterIdc; + tmp2 = pSlice->sliceAlphaC0Offset; + tmp3 = pSlice->sliceBetaOffset; + pMb->sliceId = sliceId; + pMb->disableDeblockingFilterIdc = tmp1; + pMb->filterOffsetA = tmp2; + pMb->filterOffsetB = tmp3; + pMb->chromaQpIndexOffset = chromaQpIndexOffset; + +} + +/*------------------------------------------------------------------------------ + + 5.3 Function name: h264bsdMarkSliceCorrupted + + Functional description: + Mark macroblocks of the slice corrupted. If lastMbAddr in the slice + storage is set -> picWidhtInMbs (or at least 10) macroblocks back + from the lastMbAddr are marked corrupted. However, if lastMbAddr + is not set -> all macroblocks of the slice are marked. + + Inputs: + pStorage pointer to storage structure + firstMbInSlice address of the first macroblock in the slice, this + identifies the slice to be marked corrupted + + Outputs: + pStorage mbStorage for the corrupted macroblocks updated + + Returns: + none + +------------------------------------------------------------------------------*/ + +void h264bsdMarkSliceCorrupted(storage_t *pStorage, u32 firstMbInSlice) +{ + +/* Variables */ + + u32 tmp, i; + u32 sliceId; + u32 currMbAddr; + +/* Code */ + + ASSERT(pStorage); + ASSERT(firstMbInSlice < pStorage->picSizeInMbs); + + currMbAddr = firstMbInSlice; + + sliceId = pStorage->slice->sliceId; + + /* DecodeSliceData sets lastMbAddr for I slices -> if it was set, go back + * MAX(picWidthInMbs, 10) macroblocks and start marking from there */ + if (pStorage->slice->lastMbAddr) + { + ASSERT(pStorage->mb[pStorage->slice->lastMbAddr].sliceId == sliceId); + i = pStorage->slice->lastMbAddr - 1; + tmp = 0; + while (i > currMbAddr) + { + if (pStorage->mb[i].sliceId == sliceId) + { + tmp++; + if (tmp >= MAX(pStorage->activeSps->picWidthInMbs, 10)) + break; + } + i--; + } + currMbAddr = i; + } + + do + { + + if ( (pStorage->mb[currMbAddr].sliceId == sliceId) && + (pStorage->mb[currMbAddr].decoded) ) + { + pStorage->mb[currMbAddr].decoded--; + } + else + { + break; + } + + currMbAddr = h264bsdNextMbAddress(pStorage->sliceGroupMap, + pStorage->picSizeInMbs, currMbAddr); + + } while (currMbAddr); + +} + diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_data.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_data.h new file mode 100755 index 0000000000000000000000000000000000000000..f23d49ec7a5530c8a668a4dd70ea42d27fc07814 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_data.h @@ -0,0 +1,59 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. Module defines + 3. Data types + 4. Function prototypes + +------------------------------------------------------------------------------*/ + +#ifndef H264SWDEC_SLICE_DATA_H +#define H264SWDEC_SLICE_DATA_H + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#include "basetype.h" +#include "h264bsd_stream.h" +#include "h264bsd_cfg.h" +#include "h264bsd_slice_header.h" +#include "h264bsd_storage.h" + +/*------------------------------------------------------------------------------ + 2. Module defines +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 3. Data types +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 4. Function prototypes +------------------------------------------------------------------------------*/ + +u32 h264bsdDecodeSliceData(strmData_t *pStrmData, storage_t *pStorage, + image_t *currImage, sliceHeader_t *pSliceHeader); + +void h264bsdMarkSliceCorrupted(storage_t *pStorage, u32 firstMbInSlice); + +#endif /* #ifdef H264SWDEC_SLICE_DATA_H */ + diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_group_map.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_group_map.c new file mode 100755 index 0000000000000000000000000000000000000000..7cbb534155cc2b5cd8b7be8dcf9be49af211fe1d --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_group_map.c @@ -0,0 +1,589 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. External compiler flags + 3. Module defines + 4. Local function prototypes + 5. Functions + DecodeInterleavedMap + DecodeDispersedMap + DecodeForegroundLeftOverMap + DecodeBoxOutMap + DecodeRasterScanMap + DecodeWipeMap + h264bsdDecodeSliceGroupMap + +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#include "basetype.h" +#include "h264bsd_slice_group_map.h" +#include "h264bsd_cfg.h" +#include "h264bsd_pic_param_set.h" +#include "h264bsd_util.h" + +/*------------------------------------------------------------------------------ + 2. External compiler flags +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- + 3. Module defines +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 4. Local function prototypes +------------------------------------------------------------------------------*/ + +static void DecodeInterleavedMap( + u32 *map, + u32 numSliceGroups, + u32 *runLength, + u32 picSize); + +static void DecodeDispersedMap( + u32 *map, + u32 numSliceGroups, + u32 picWidth, + u32 picHeight); + +static void DecodeForegroundLeftOverMap( + u32 *map, + u32 numSliceGroups, + u32 *topLeft, + u32 *bottomRight, + u32 picWidth, + u32 picHeight); + +static void DecodeBoxOutMap( + u32 *map, + u32 sliceGroupChangeDirectionFlag, + u32 unitsInSliceGroup0, + u32 picWidth, + u32 picHeight); + +static void DecodeRasterScanMap( + u32 *map, + u32 sliceGroupChangeDirectionFlag, + u32 sizeOfUpperLeftGroup, + u32 picSize); + +static void DecodeWipeMap( + u32 *map, + u32 sliceGroupChangeDirectionFlag, + u32 sizeOfUpperLeftGroup, + u32 picWidth, + u32 picHeight); + +/*------------------------------------------------------------------------------ + + Function: DecodeInterleavedMap + + Functional description: + Function to decode interleaved slice group map type, i.e. slice + group map type 0. + + Inputs: + map pointer to the map + numSliceGroups number of slice groups + runLength run_length[] values for each slice group + picSize picture size in macroblocks + + Outputs: + map slice group map is stored here + + Returns: + none + +------------------------------------------------------------------------------*/ + +void DecodeInterleavedMap( + u32 *map, + u32 numSliceGroups, + u32 *runLength, + u32 picSize) +{ + +/* Variables */ + + u32 i,j, group; + +/* Code */ + + ASSERT(map); + ASSERT(numSliceGroups >= 1 && numSliceGroups <= MAX_NUM_SLICE_GROUPS); + ASSERT(runLength); + + i = 0; + + do { + for (group = 0; group < numSliceGroups && i < picSize; + i += runLength[group++]) + { + ASSERT(runLength[group] <= picSize); + for (j = 0; j < runLength[group] && i + j < picSize; j++) + map[i+j] = group; + } + } while (i < picSize); + + +} + +/*------------------------------------------------------------------------------ + + Function: DecodeDispersedMap + + Functional description: + Function to decode dispersed slice group map type, i.e. slice + group map type 1. + + Inputs: + map pointer to the map + numSliceGroups number of slice groups + picWidth picture width in macroblocks + picHeight picture height in macroblocks + + Outputs: + map slice group map is stored here + + Returns: + none + +------------------------------------------------------------------------------*/ + +void DecodeDispersedMap( + u32 *map, + u32 numSliceGroups, + u32 picWidth, + u32 picHeight) +{ + +/* Variables */ + + u32 i, picSize; + +/* Code */ + + ASSERT(map); + ASSERT(numSliceGroups >= 1 && numSliceGroups <= MAX_NUM_SLICE_GROUPS); + ASSERT(picWidth); + ASSERT(picHeight); + + picSize = picWidth * picHeight; + + for (i = 0; i < picSize; i++) + map[i] = ((i % picWidth) + (((i / picWidth) * numSliceGroups) >> 1)) % + numSliceGroups; + + +} + +/*------------------------------------------------------------------------------ + + Function: DecodeForegroundLeftOverMap + + Functional description: + Function to decode foreground with left-over slice group map type, + i.e. slice group map type 2. + + Inputs: + map pointer to the map + numSliceGroups number of slice groups + topLeft top_left[] values + bottomRight bottom_right[] values + picWidth picture width in macroblocks + picHeight picture height in macroblocks + + Outputs: + map slice group map is stored here + + Returns: + none + +------------------------------------------------------------------------------*/ + +void DecodeForegroundLeftOverMap( + u32 *map, + u32 numSliceGroups, + u32 *topLeft, + u32 *bottomRight, + u32 picWidth, + u32 picHeight) +{ + +/* Variables */ + + u32 i,y,x,yTopLeft,yBottomRight,xTopLeft,xBottomRight, picSize; + u32 group; + +/* Code */ + + ASSERT(map); + ASSERT(numSliceGroups >= 1 && numSliceGroups <= MAX_NUM_SLICE_GROUPS); + ASSERT(topLeft); + ASSERT(bottomRight); + ASSERT(picWidth); + ASSERT(picHeight); + + picSize = picWidth * picHeight; + + for (i = 0; i < picSize; i++) + map[i] = numSliceGroups - 1; + + for (group = numSliceGroups - 1; group--; ) + { + ASSERT( topLeft[group] <= bottomRight[group] && + bottomRight[group] < picSize ); + yTopLeft = topLeft[group] / picWidth; + xTopLeft = topLeft[group] % picWidth; + yBottomRight = bottomRight[group] / picWidth; + xBottomRight = bottomRight[group] % picWidth; + ASSERT(xTopLeft <= xBottomRight); + + for (y = yTopLeft; y <= yBottomRight; y++) + for (x = xTopLeft; x <= xBottomRight; x++) + map[ y * picWidth + x ] = group; + } + + +} + +/*------------------------------------------------------------------------------ + + Function: DecodeBoxOutMap + + Functional description: + Function to decode box-out slice group map type, i.e. slice group + map type 3. + + Inputs: + map pointer to the map + sliceGroupChangeDirectionFlag slice_group_change_direction_flag + unitsInSliceGroup0 mbs on slice group 0 + picWidth picture width in macroblocks + picHeight picture height in macroblocks + + Outputs: + map slice group map is stored here + + Returns: + none + +------------------------------------------------------------------------------*/ + +void DecodeBoxOutMap( + u32 *map, + u32 sliceGroupChangeDirectionFlag, + u32 unitsInSliceGroup0, + u32 picWidth, + u32 picHeight) +{ + +/* Variables */ + + u32 i, k, picSize; + i32 x, y, xDir, yDir, leftBound, topBound, rightBound, bottomBound; + u32 mapUnitVacant; + +/* Code */ + + ASSERT(map); + ASSERT(picWidth); + ASSERT(picHeight); + + picSize = picWidth * picHeight; + ASSERT(unitsInSliceGroup0 <= picSize); + + for (i = 0; i < picSize; i++) + map[i] = 1; + + x = (picWidth - (u32)sliceGroupChangeDirectionFlag) >> 1; + y = (picHeight - (u32)sliceGroupChangeDirectionFlag) >> 1; + + leftBound = x; + topBound = y; + + rightBound = x; + bottomBound = y; + + xDir = (i32)sliceGroupChangeDirectionFlag - 1; + yDir = (i32)sliceGroupChangeDirectionFlag; + + for (k = 0; k < unitsInSliceGroup0; k += mapUnitVacant ? 1 : 0) + { + mapUnitVacant = (map[ (u32)y * picWidth + (u32)x ] == 1) ? + HANTRO_TRUE : HANTRO_FALSE; + + if (mapUnitVacant) + map[ (u32)y * picWidth + (u32)x ] = 0; + + if (xDir == -1 && x == leftBound) + { + leftBound = MAX(leftBound - 1, 0); + x = leftBound; + xDir = 0; + yDir = 2 * (i32)sliceGroupChangeDirectionFlag - 1; + } + else if (xDir == 1 && x == rightBound) + { + rightBound = MIN(rightBound + 1, (i32)picWidth - 1); + x = rightBound; + xDir = 0; + yDir = 1 - 2 * (i32)sliceGroupChangeDirectionFlag; + } + else if (yDir == -1 && y == topBound) + { + topBound = MAX(topBound - 1, 0); + y = topBound; + xDir = 1 - 2 * (i32)sliceGroupChangeDirectionFlag; + yDir = 0; + } + else if (yDir == 1 && y == bottomBound) + { + bottomBound = MIN(bottomBound + 1, (i32)picHeight - 1); + y = bottomBound; + xDir = 2 * (i32)sliceGroupChangeDirectionFlag - 1; + yDir = 0; + } + else + { + x += xDir; + y += yDir; + } + } + + +} + +/*------------------------------------------------------------------------------ + + Function: DecodeRasterScanMap + + Functional description: + Function to decode raster scan slice group map type, i.e. slice + group map type 4. + + Inputs: + map pointer to the map + sliceGroupChangeDirectionFlag slice_group_change_direction_flag + sizeOfUpperLeftGroup mbs in upperLeftGroup + picSize picture size in macroblocks + + Outputs: + map slice group map is stored here + + Returns: + none + +------------------------------------------------------------------------------*/ + +void DecodeRasterScanMap( + u32 *map, + u32 sliceGroupChangeDirectionFlag, + u32 sizeOfUpperLeftGroup, + u32 picSize) +{ + +/* Variables */ + + u32 i; + +/* Code */ + + ASSERT(map); + ASSERT(picSize); + ASSERT(sizeOfUpperLeftGroup <= picSize); + + for (i = 0; i < picSize; i++) + if (i < sizeOfUpperLeftGroup) + map[i] = (u32)sliceGroupChangeDirectionFlag; + else + map[i] = 1 - (u32)sliceGroupChangeDirectionFlag; + + +} + +/*------------------------------------------------------------------------------ + + Function: DecodeWipeMap + + Functional description: + Function to decode wipe slice group map type, i.e. slice group map + type 5. + + Inputs: + sliceGroupChangeDirectionFlag slice_group_change_direction_flag + sizeOfUpperLeftGroup mbs in upperLeftGroup + picWidth picture width in macroblocks + picHeight picture height in macroblocks + + Outputs: + map slice group map is stored here + + Returns: + none + +------------------------------------------------------------------------------*/ + +void DecodeWipeMap( + u32 *map, + u32 sliceGroupChangeDirectionFlag, + u32 sizeOfUpperLeftGroup, + u32 picWidth, + u32 picHeight) +{ + +/* Variables */ + + u32 i,j,k; + +/* Code */ + + ASSERT(map); + ASSERT(picWidth); + ASSERT(picHeight); + ASSERT(sizeOfUpperLeftGroup <= picWidth * picHeight); + + k = 0; + for (j = 0; j < picWidth; j++) + for (i = 0; i < picHeight; i++) + if (k++ < sizeOfUpperLeftGroup) + map[ i * picWidth + j ] = (u32)sliceGroupChangeDirectionFlag; + else + map[ i * picWidth + j ] = 1 - + (u32)sliceGroupChangeDirectionFlag; + + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdDecodeSliceGroupMap + + Functional description: + Function to decode macroblock to slice group map. Construction + of different slice group map types is handled by separate + functions defined above. See standard for details how slice group + maps are computed. + + Inputs: + pps active picture parameter set + sliceGroupChangeCycle slice_group_change_cycle + picWidth picture width in macroblocks + picHeight picture height in macroblocks + + Outputs: + map slice group map is stored here + + Returns: + none + +------------------------------------------------------------------------------*/ + +void h264bsdDecodeSliceGroupMap( + u32 *map, + picParamSet_t *pps, + u32 sliceGroupChangeCycle, + u32 picWidth, + u32 picHeight) +{ + +/* Variables */ + + u32 i, picSize, unitsInSliceGroup0 = 0, sizeOfUpperLeftGroup = 0; + +/* Code */ + + ASSERT(map); + ASSERT(pps); + ASSERT(picWidth); + ASSERT(picHeight); + ASSERT(pps->sliceGroupMapType < 7); + + picSize = picWidth * picHeight; + + /* just one slice group -> all macroblocks belong to group 0 */ + if (pps->numSliceGroups == 1) + { + H264SwDecMemset(map, 0, picSize * sizeof(u32)); + return; + } + + if (pps->sliceGroupMapType > 2 && pps->sliceGroupMapType < 6) + { + ASSERT(pps->sliceGroupChangeRate && + pps->sliceGroupChangeRate <= picSize); + + unitsInSliceGroup0 = + MIN(sliceGroupChangeCycle * pps->sliceGroupChangeRate, picSize); + + if (pps->sliceGroupMapType == 4 || pps->sliceGroupMapType == 5) + sizeOfUpperLeftGroup = pps->sliceGroupChangeDirectionFlag ? + (picSize - unitsInSliceGroup0) : unitsInSliceGroup0; + } + + switch (pps->sliceGroupMapType) + { + case 0: + DecodeInterleavedMap(map, pps->numSliceGroups, + pps->runLength, picSize); + break; + + case 1: + DecodeDispersedMap(map, pps->numSliceGroups, picWidth, + picHeight); + break; + + case 2: + DecodeForegroundLeftOverMap(map, pps->numSliceGroups, + pps->topLeft, pps->bottomRight, picWidth, picHeight); + break; + + case 3: + DecodeBoxOutMap(map, pps->sliceGroupChangeDirectionFlag, + unitsInSliceGroup0, picWidth, picHeight); + break; + + case 4: + DecodeRasterScanMap(map, + pps->sliceGroupChangeDirectionFlag, sizeOfUpperLeftGroup, + picSize); + break; + + case 5: + DecodeWipeMap(map, pps->sliceGroupChangeDirectionFlag, + sizeOfUpperLeftGroup, picWidth, picHeight); + break; + + default: + ASSERT(pps->sliceGroupId); + for (i = 0; i < picSize; i++) + { + ASSERT(pps->sliceGroupId[i] < pps->numSliceGroups); + map[i] = pps->sliceGroupId[i]; + } + break; + } + +} + diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_group_map.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_group_map.h new file mode 100755 index 0000000000000000000000000000000000000000..4bcb6f2cee0943070a54ec12d40f0fd8c42e4170 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_group_map.h @@ -0,0 +1,60 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. Module defines + 3. Data types + 4. Function prototypes + +------------------------------------------------------------------------------*/ + +#ifndef H264SWDEC_SLICE_GROUP_MAP_H +#define H264SWDEC_SLICE_GROUP_MAP_H + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#include "basetype.h" +#include "h264bsd_pic_param_set.h" + +/*------------------------------------------------------------------------------ + 2. Module defines +------------------------------------------------------------------------------*/ + + +/*------------------------------------------------------------------------------ + 3. Data types +------------------------------------------------------------------------------*/ + + +/*------------------------------------------------------------------------------ + 4. Function prototypes +------------------------------------------------------------------------------*/ + +void h264bsdDecodeSliceGroupMap( + u32 *map, + picParamSet_t *pps, + u32 sliceGroupChangeCycle, + u32 picWidth, + u32 picHeight); + +#endif /* #ifdef H264SWDEC_SLICE_GROUP_MAP_H */ + diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_header.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_header.c new file mode 100755 index 0000000000000000000000000000000000000000..a7c6f64df4a6586dd74d6b5b4794b86dbb5c33ea --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_header.c @@ -0,0 +1,1511 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. External compiler flags + 3. Module defines + 4. Local function prototypes + 5. Functions + h264bsdDecodeSliceHeader + NumSliceGroupChangeCycleBits + RefPicListReordering + DecRefPicMarking + CheckPpsId + CheckFrameNum + CheckIdrPicId + CheckPicOrderCntLsb + CheckDeltaPicOrderCntBottom + CheckDeltaPicOrderCnt + CheckRedundantPicCnt + +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#include "h264bsd_slice_header.h" +#include "h264bsd_util.h" +#include "h264bsd_vlc.h" +#include "h264bsd_nal_unit.h" +#include "h264bsd_dpb.h" + +/*------------------------------------------------------------------------------ + 2. External compiler flags +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- + 3. Module defines +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 4. Local function prototypes +------------------------------------------------------------------------------*/ + +static u32 RefPicListReordering(strmData_t *, refPicListReordering_t *, + u32, u32); + +static u32 NumSliceGroupChangeCycleBits(u32 picSizeInMbs, + u32 sliceGroupChangeRate); + +static u32 DecRefPicMarking(strmData_t *pStrmData, + decRefPicMarking_t *pDecRefPicMarking, nalUnitType_e nalUnitType, + u32 numRefFrames); + + +/*------------------------------------------------------------------------------ + + Function name: h264bsdDecodeSliceHeader + + Functional description: + Decode slice header data from the stream. + + Inputs: + pStrmData pointer to stream data structure + pSeqParamSet pointer to active sequence parameter set + pPicParamSet pointer to active picture parameter set + pNalUnit pointer to current NAL unit structure + + Outputs: + pSliceHeader decoded data is stored here + + Returns: + HANTRO_OK success + HANTRO_NOK invalid stream data or end of stream + +------------------------------------------------------------------------------*/ + +u32 h264bsdDecodeSliceHeader(strmData_t *pStrmData, sliceHeader_t *pSliceHeader, + seqParamSet_t *pSeqParamSet, picParamSet_t *pPicParamSet, + nalUnit_t *pNalUnit) +{ + +/* Variables */ + + u32 tmp, i, value; + i32 itmp; + u32 picSizeInMbs; + +/* Code */ + + ASSERT(pStrmData); + ASSERT(pSliceHeader); + ASSERT(pSeqParamSet); + ASSERT(pPicParamSet); + ASSERT( pNalUnit->nalUnitType == NAL_CODED_SLICE || + pNalUnit->nalUnitType == NAL_CODED_SLICE_IDR ); + + + H264SwDecMemset(pSliceHeader, 0, sizeof(sliceHeader_t)); + + picSizeInMbs = pSeqParamSet->picWidthInMbs * pSeqParamSet->picHeightInMbs; + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value); + if (tmp != HANTRO_OK) + return(tmp); + pSliceHeader->firstMbInSlice = value; + if (value >= picSizeInMbs) + { + EPRINT("first_mb_in_slice"); + return(HANTRO_NOK); + } + + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value); + if (tmp != HANTRO_OK) + return(tmp); + pSliceHeader->sliceType = value; + /* slice type has to be either I or P slice. P slice is not allowed when + * current NAL unit is an IDR NAL unit or num_ref_frames is 0 */ + if ( !IS_I_SLICE(pSliceHeader->sliceType) && + ( !IS_P_SLICE(pSliceHeader->sliceType) || + IS_IDR_NAL_UNIT(pNalUnit) || + !pSeqParamSet->numRefFrames ) ) + { + EPRINT("slice_type"); + return(HANTRO_NOK); + } + + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value); + if (tmp != HANTRO_OK) + return(tmp); + pSliceHeader->picParameterSetId = value; + if (pSliceHeader->picParameterSetId != pPicParamSet->picParameterSetId) + { + EPRINT("pic_parameter_set_id"); + return(HANTRO_NOK); + } + + /* log2(maxFrameNum) -> num bits to represent frame_num */ + i = 0; + while (pSeqParamSet->maxFrameNum >> i) + i++; + i--; + + tmp = h264bsdGetBits(pStrmData, i); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + if (IS_IDR_NAL_UNIT(pNalUnit) && tmp != 0) + { + EPRINT("frame_num"); + return(HANTRO_NOK); + } + pSliceHeader->frameNum = tmp; + + if (IS_IDR_NAL_UNIT(pNalUnit)) + { + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value); + if (tmp != HANTRO_OK) + return(tmp); + pSliceHeader->idrPicId = value; + if (value > 65535) + { + EPRINT("idr_pic_id"); + return(HANTRO_NOK); + } + } + + if (pSeqParamSet->picOrderCntType == 0) + { + /* log2(maxPicOrderCntLsb) -> num bits to represent pic_order_cnt_lsb */ + i = 0; + while (pSeqParamSet->maxPicOrderCntLsb >> i) + i++; + i--; + + tmp = h264bsdGetBits(pStrmData, i); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pSliceHeader->picOrderCntLsb = tmp; + + if (pPicParamSet->picOrderPresentFlag) + { + tmp = h264bsdDecodeExpGolombSigned(pStrmData, &itmp); + if (tmp != HANTRO_OK) + return(tmp); + pSliceHeader->deltaPicOrderCntBottom = itmp; + } + + /* check that picOrderCnt for IDR picture will be zero. See + * DecodePicOrderCnt function to understand the logic here */ + if ( IS_IDR_NAL_UNIT(pNalUnit) && + ( (pSliceHeader->picOrderCntLsb > + pSeqParamSet->maxPicOrderCntLsb/2) || + MIN((i32)pSliceHeader->picOrderCntLsb, + (i32)pSliceHeader->picOrderCntLsb + + pSliceHeader->deltaPicOrderCntBottom) != 0 ) ) + { + return(HANTRO_NOK); + } + } + + if ( (pSeqParamSet->picOrderCntType == 1) && + !pSeqParamSet->deltaPicOrderAlwaysZeroFlag ) + { + tmp = h264bsdDecodeExpGolombSigned(pStrmData, &itmp); + if (tmp != HANTRO_OK) + return(tmp); + pSliceHeader->deltaPicOrderCnt[0] = itmp; + + if (pPicParamSet->picOrderPresentFlag) + { + tmp = h264bsdDecodeExpGolombSigned(pStrmData, &itmp); + if (tmp != HANTRO_OK) + return(tmp); + pSliceHeader->deltaPicOrderCnt[1] = itmp; + } + + /* check that picOrderCnt for IDR picture will be zero. See + * DecodePicOrderCnt function to understand the logic here */ + if ( IS_IDR_NAL_UNIT(pNalUnit) && + MIN(pSliceHeader->deltaPicOrderCnt[0], + pSliceHeader->deltaPicOrderCnt[0] + + pSeqParamSet->offsetForTopToBottomField + + pSliceHeader->deltaPicOrderCnt[1]) != 0) + { + return(HANTRO_NOK); + } + } + + if (pPicParamSet->redundantPicCntPresentFlag) + { + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value); + if (tmp != HANTRO_OK) + return(tmp); + pSliceHeader->redundantPicCnt = value; + if (value > 127) + { + EPRINT("redundant_pic_cnt"); + return(HANTRO_NOK); + } + } + + if (IS_P_SLICE(pSliceHeader->sliceType)) + { + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pSliceHeader->numRefIdxActiveOverrideFlag = tmp; + + if (pSliceHeader->numRefIdxActiveOverrideFlag) + { + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value); + if (tmp != HANTRO_OK) + return(tmp); + if (value > 15) + { + EPRINT("num_ref_idx_l0_active_minus1"); + return(HANTRO_NOK); + } + pSliceHeader->numRefIdxL0Active = value + 1; + } + /* set numRefIdxL0Active from pic param set */ + else + { + /* if value (minus1) in picture parameter set exceeds 15 it should + * have been overridden here */ + if (pPicParamSet->numRefIdxL0Active > 16) + { + EPRINT("num_ref_idx_active_override_flag"); + return(HANTRO_NOK); + } + pSliceHeader->numRefIdxL0Active = pPicParamSet->numRefIdxL0Active; + } + } + + if (IS_P_SLICE(pSliceHeader->sliceType)) + { + tmp = RefPicListReordering(pStrmData, + &pSliceHeader->refPicListReordering, + pSliceHeader->numRefIdxL0Active, + pSeqParamSet->maxFrameNum); + if (tmp != HANTRO_OK) + return(tmp); + } + + if (pNalUnit->nalRefIdc != 0) + { + tmp = DecRefPicMarking(pStrmData, &pSliceHeader->decRefPicMarking, + pNalUnit->nalUnitType, pSeqParamSet->numRefFrames); + if (tmp != HANTRO_OK) + return(tmp); + } + + /* decode sliceQpDelta and check that initial QP for the slice will be on + * the range [0, 51] */ + tmp = h264bsdDecodeExpGolombSigned(pStrmData, &itmp); + if (tmp != HANTRO_OK) + return(tmp); + pSliceHeader->sliceQpDelta = itmp; + itmp += (i32)pPicParamSet->picInitQp; + if ( (itmp < 0) || (itmp > 51) ) + { + EPRINT("slice_qp_delta"); + return(HANTRO_NOK); + } + + if (pPicParamSet->deblockingFilterControlPresentFlag) + { + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value); + if (tmp != HANTRO_OK) + return(tmp); + pSliceHeader->disableDeblockingFilterIdc = value; + if (pSliceHeader->disableDeblockingFilterIdc > 2) + { + EPRINT("disable_deblocking_filter_idc"); + return(HANTRO_NOK); + } + + if (pSliceHeader->disableDeblockingFilterIdc != 1) + { + tmp = h264bsdDecodeExpGolombSigned(pStrmData, &itmp); + if (tmp != HANTRO_OK) + return(tmp); + if ( (itmp < -6) || (itmp > 6) ) + { + EPRINT("slice_alpha_c0_offset_div2"); + return(HANTRO_NOK); + } + pSliceHeader->sliceAlphaC0Offset = itmp * 2; + + tmp = h264bsdDecodeExpGolombSigned(pStrmData, &itmp); + if (tmp != HANTRO_OK) + return(tmp); + if ( (itmp < -6) || (itmp > 6) ) + { + EPRINT("slice_beta_offset_div2"); + return(HANTRO_NOK); + } + pSliceHeader->sliceBetaOffset = itmp * 2; + } + } + + if ( (pPicParamSet->numSliceGroups > 1) && + (pPicParamSet->sliceGroupMapType >= 3) && + (pPicParamSet->sliceGroupMapType <= 5) ) + { + /* set tmp to number of bits used to represent slice_group_change_cycle + * in the stream */ + tmp = NumSliceGroupChangeCycleBits(picSizeInMbs, + pPicParamSet->sliceGroupChangeRate); + value = h264bsdGetBits(pStrmData, tmp); + if (value == END_OF_STREAM) + return(HANTRO_NOK); + pSliceHeader->sliceGroupChangeCycle = value; + + /* corresponds to tmp = Ceil(picSizeInMbs / sliceGroupChangeRate) */ + tmp = (picSizeInMbs + pPicParamSet->sliceGroupChangeRate - 1) / + pPicParamSet->sliceGroupChangeRate; + if (pSliceHeader->sliceGroupChangeCycle > tmp) + { + EPRINT("slice_group_change_cycle"); + return(HANTRO_NOK); + } + } + + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + Function: NumSliceGroupChangeCycleBits + + Functional description: + Determine number of bits needed to represent + slice_group_change_cycle in the stream. The standard states that + slice_group_change_cycle is represented by + Ceil( Log2( (picSizeInMbs / sliceGroupChangeRate) + 1) ) + + bits. Division "/" in the equation is non-truncating division. + + Inputs: + picSizeInMbs picture size in macroblocks + sliceGroupChangeRate + + Outputs: + none + + Returns: + number of bits needed + +------------------------------------------------------------------------------*/ + +u32 NumSliceGroupChangeCycleBits(u32 picSizeInMbs, u32 sliceGroupChangeRate) +{ + +/* Variables */ + + u32 tmp,numBits,mask; + +/* Code */ + + ASSERT(picSizeInMbs); + ASSERT(sliceGroupChangeRate); + ASSERT(sliceGroupChangeRate <= picSizeInMbs); + + /* compute (picSizeInMbs / sliceGroupChangeRate + 1), rounded up */ + if (picSizeInMbs % sliceGroupChangeRate) + tmp = 2 + picSizeInMbs/sliceGroupChangeRate; + else + tmp = 1 + picSizeInMbs/sliceGroupChangeRate; + + numBits = 0; + mask = ~0U; + + /* set numBits to position of right-most non-zero bit */ + while (tmp & (mask<<++numBits)) + ; + numBits--; + + /* add one more bit if value greater than 2^numBits */ + if (tmp & ((1<refPicListReorderingFlagL0 = tmp; + + if (pRefPicListReordering->refPicListReorderingFlagL0) + { + i = 0; + + do + { + if (i > numRefIdxActive) + { + EPRINT("Too many reordering commands"); + return(HANTRO_NOK); + } + + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &command); + if (tmp != HANTRO_OK) + return(tmp); + if (command > 3) + { + EPRINT("reordering_of_pic_nums_idc"); + return(HANTRO_NOK); + } + + pRefPicListReordering->command[i].reorderingOfPicNumsIdc = command; + + if ((command == 0) || (command == 1)) + { + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value); + if (tmp != HANTRO_OK) + return(tmp); + if (value >= maxPicNum) + { + EPRINT("abs_diff_pic_num_minus1"); + return(HANTRO_NOK); + } + pRefPicListReordering->command[i].absDiffPicNum = value + 1; + } + else if (command == 2) + { + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value); + if (tmp != HANTRO_OK) + return(tmp); + pRefPicListReordering->command[i].longTermPicNum = value; + } + i++; + } while (command != 3); + + /* there shall be at least one reordering command if + * refPicListReorderingFlagL0 was set */ + if (i == 1) + { + EPRINT("ref_pic_list_reordering"); + return(HANTRO_NOK); + } + } + + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + Function: DecRefPicMarking + + Functional description: + Decode decoded reference picture marking syntax elements from + the stream. + + Inputs: + pStrmData pointer to stream data structure + nalUnitType type of the current NAL unit + numRefFrames max number of reference frames from the active SPS + + Outputs: + pDecRefPicMarking decoded data is stored here + + Returns: + HANTRO_OK success + HANTRO_NOK invalid stream data + +------------------------------------------------------------------------------*/ + +u32 DecRefPicMarking(strmData_t *pStrmData, + decRefPicMarking_t *pDecRefPicMarking, nalUnitType_e nalUnitType, + u32 numRefFrames) +{ + +/* Variables */ + + u32 tmp, value; + u32 i; + u32 operation; + /* variables for error checking purposes, store number of memory + * management operations of certain type */ + u32 num4 = 0, num5 = 0, num6 = 0, num1to3 = 0; + +/* Code */ + + ASSERT( nalUnitType == NAL_CODED_SLICE_IDR || + nalUnitType == NAL_CODED_SLICE || + nalUnitType == NAL_SEI ); + + + if (nalUnitType == NAL_CODED_SLICE_IDR) + { + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pDecRefPicMarking->noOutputOfPriorPicsFlag = tmp; + + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pDecRefPicMarking->longTermReferenceFlag = tmp; + if (!numRefFrames && pDecRefPicMarking->longTermReferenceFlag) + { + EPRINT("long_term_reference_flag"); + return(HANTRO_NOK); + } + } + else + { + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pDecRefPicMarking->adaptiveRefPicMarkingModeFlag = tmp; + if (pDecRefPicMarking->adaptiveRefPicMarkingModeFlag) + { + i = 0; + do + { + /* see explanation of the MAX_NUM_MMC_OPERATIONS in + * slice_header.h */ + if (i > (2 * numRefFrames + 2)) + { + EPRINT("Too many management operations"); + return(HANTRO_NOK); + } + + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &operation); + if (tmp != HANTRO_OK) + return(tmp); + if (operation > 6) + { + EPRINT("memory_management_control_operation"); + return(HANTRO_NOK); + } + + pDecRefPicMarking->operation[i]. + memoryManagementControlOperation = operation; + if ((operation == 1) || (operation == 3)) + { + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value); + if (tmp != HANTRO_OK) + return(tmp); + pDecRefPicMarking->operation[i].differenceOfPicNums = + value + 1; + } + if (operation == 2) + { + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value); + if (tmp != HANTRO_OK) + return(tmp); + pDecRefPicMarking->operation[i].longTermPicNum = value; + } + if ((operation == 3) || (operation == 6)) + { + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value); + if (tmp != HANTRO_OK) + return(tmp); + pDecRefPicMarking->operation[i].longTermFrameIdx = + value; + } + if (operation == 4) + { + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value); + if (tmp != HANTRO_OK) + return(tmp); + /* value shall be in range [0, numRefFrames] */ + if (value > numRefFrames) + { + EPRINT("max_long_term_frame_idx_plus1"); + return(HANTRO_NOK); + } + if (value == 0) + { + pDecRefPicMarking->operation[i]. + maxLongTermFrameIdx = + NO_LONG_TERM_FRAME_INDICES; + } + else + { + pDecRefPicMarking->operation[i]. + maxLongTermFrameIdx = value - 1; + } + num4++; + } + if (operation == 5) + { + num5++; + } + if (operation && operation <= 3) + num1to3++; + if (operation == 6) + num6++; + + i++; + } while (operation != 0); + + /* error checking */ + if (num4 > 1 || num5 > 1 || num6 > 1 || (num1to3 && num5)) + return(HANTRO_NOK); + + } + } + + return(HANTRO_OK); +} + +/*------------------------------------------------------------------------------ + + Function name: h264bsdCheckPpsId + + Functional description: + Peek value of pic_parameter_set_id from the slice header. Function + does not modify current stream positions but copies the stream + data structure to tmp structure which is used while accessing + stream data. + + Inputs: + pStrmData pointer to stream data structure + + Outputs: + picParamSetId value is stored here + + Returns: + HANTRO_OK success + HANTRO_NOK invalid stream data + +------------------------------------------------------------------------------*/ + +u32 h264bsdCheckPpsId(strmData_t *pStrmData, u32 *picParamSetId) +{ + +/* Variables */ + + u32 tmp, value; + strmData_t tmpStrmData[1]; + +/* Code */ + + ASSERT(pStrmData); + + /* don't touch original stream position params */ + *tmpStrmData = *pStrmData; + + /* first_mb_in_slice */ + tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value); + if (tmp != HANTRO_OK) + return(tmp); + + /* slice_type */ + tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value); + if (tmp != HANTRO_OK) + return(tmp); + + tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value); + if (tmp != HANTRO_OK) + return(tmp); + if (value >= MAX_NUM_PIC_PARAM_SETS) + return(HANTRO_NOK); + + *picParamSetId = value; + + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdCheckFrameNum + + Functional description: + Peek value of frame_num from the slice header. Function does not + modify current stream positions but copies the stream data + structure to tmp structure which is used while accessing stream + data. + + Inputs: + pStrmData pointer to stream data structure + maxFrameNum + + Outputs: + frameNum value is stored here + + Returns: + HANTRO_OK success + HANTRO_NOK invalid stream data + +------------------------------------------------------------------------------*/ + +u32 h264bsdCheckFrameNum( + strmData_t *pStrmData, + u32 maxFrameNum, + u32 *frameNum) +{ + +/* Variables */ + + u32 tmp, value, i; + strmData_t tmpStrmData[1]; + +/* Code */ + + ASSERT(pStrmData); + ASSERT(maxFrameNum); + ASSERT(frameNum); + + /* don't touch original stream position params */ + *tmpStrmData = *pStrmData; + + /* skip first_mb_in_slice */ + tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value); + if (tmp != HANTRO_OK) + return(tmp); + + /* skip slice_type */ + tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value); + if (tmp != HANTRO_OK) + return(tmp); + + /* skip pic_parameter_set_id */ + tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value); + if (tmp != HANTRO_OK) + return(tmp); + + /* log2(maxFrameNum) -> num bits to represent frame_num */ + i = 0; + while (maxFrameNum >> i) + i++; + i--; + + /* frame_num */ + tmp = h264bsdGetBits(tmpStrmData, i); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + *frameNum = tmp; + + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdCheckIdrPicId + + Functional description: + Peek value of idr_pic_id from the slice header. Function does not + modify current stream positions but copies the stream data + structure to tmp structure which is used while accessing stream + data. + + Inputs: + pStrmData pointer to stream data structure + maxFrameNum max frame number from active SPS + nalUnitType type of the current NAL unit + + Outputs: + idrPicId value is stored here + + Returns: + HANTRO_OK success + HANTRO_NOK invalid stream data + +------------------------------------------------------------------------------*/ + +u32 h264bsdCheckIdrPicId( + strmData_t *pStrmData, + u32 maxFrameNum, + nalUnitType_e nalUnitType, + u32 *idrPicId) +{ + +/* Variables */ + + u32 tmp, value, i; + strmData_t tmpStrmData[1]; + +/* Code */ + + ASSERT(pStrmData); + ASSERT(maxFrameNum); + ASSERT(idrPicId); + + /* nalUnitType must be equal to 5 because otherwise idrPicId is not + * present */ + if (nalUnitType != NAL_CODED_SLICE_IDR) + return(HANTRO_NOK); + + /* don't touch original stream position params */ + *tmpStrmData = *pStrmData; + + /* skip first_mb_in_slice */ + tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value); + if (tmp != HANTRO_OK) + return(tmp); + + /* skip slice_type */ + tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value); + if (tmp != HANTRO_OK) + return(tmp); + + /* skip pic_parameter_set_id */ + tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value); + if (tmp != HANTRO_OK) + return(tmp); + + /* log2(maxFrameNum) -> num bits to represent frame_num */ + i = 0; + while (maxFrameNum >> i) + i++; + i--; + + /* skip frame_num */ + tmp = h264bsdGetBits(tmpStrmData, i); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + + /* idr_pic_id */ + tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, idrPicId); + if (tmp != HANTRO_OK) + return(tmp); + + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdCheckPicOrderCntLsb + + Functional description: + Peek value of pic_order_cnt_lsb from the slice header. Function + does not modify current stream positions but copies the stream + data structure to tmp structure which is used while accessing + stream data. + + Inputs: + pStrmData pointer to stream data structure + pSeqParamSet pointer to active SPS + nalUnitType type of the current NAL unit + + Outputs: + picOrderCntLsb value is stored here + + Returns: + HANTRO_OK success + HANTRO_NOK invalid stream data + +------------------------------------------------------------------------------*/ + +u32 h264bsdCheckPicOrderCntLsb( + strmData_t *pStrmData, + seqParamSet_t *pSeqParamSet, + nalUnitType_e nalUnitType, + u32 *picOrderCntLsb) +{ + +/* Variables */ + + u32 tmp, value, i; + strmData_t tmpStrmData[1]; + +/* Code */ + + ASSERT(pStrmData); + ASSERT(pSeqParamSet); + ASSERT(picOrderCntLsb); + + /* picOrderCntType must be equal to 0 */ + ASSERT(pSeqParamSet->picOrderCntType == 0); + ASSERT(pSeqParamSet->maxFrameNum); + ASSERT(pSeqParamSet->maxPicOrderCntLsb); + + /* don't touch original stream position params */ + *tmpStrmData = *pStrmData; + + /* skip first_mb_in_slice */ + tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value); + if (tmp != HANTRO_OK) + return(tmp); + + /* skip slice_type */ + tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value); + if (tmp != HANTRO_OK) + return(tmp); + + /* skip pic_parameter_set_id */ + tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value); + if (tmp != HANTRO_OK) + return(tmp); + + /* log2(maxFrameNum) -> num bits to represent frame_num */ + i = 0; + while (pSeqParamSet->maxFrameNum >> i) + i++; + i--; + + /* skip frame_num */ + tmp = h264bsdGetBits(tmpStrmData, i); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + + /* skip idr_pic_id when necessary */ + if (nalUnitType == NAL_CODED_SLICE_IDR) + { + tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value); + if (tmp != HANTRO_OK) + return(tmp); + } + + /* log2(maxPicOrderCntLsb) -> num bits to represent pic_order_cnt_lsb */ + i = 0; + while (pSeqParamSet->maxPicOrderCntLsb >> i) + i++; + i--; + + /* pic_order_cnt_lsb */ + tmp = h264bsdGetBits(tmpStrmData, i); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + *picOrderCntLsb = tmp; + + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdCheckDeltaPicOrderCntBottom + + Functional description: + Peek value of delta_pic_order_cnt_bottom from the slice header. + Function does not modify current stream positions but copies the + stream data structure to tmp structure which is used while + accessing stream data. + + Inputs: + pStrmData pointer to stream data structure + pSeqParamSet pointer to active SPS + nalUnitType type of the current NAL unit + + Outputs: + deltaPicOrderCntBottom value is stored here + + Returns: + HANTRO_OK success + HANTRO_NOK invalid stream data + +------------------------------------------------------------------------------*/ + +u32 h264bsdCheckDeltaPicOrderCntBottom( + strmData_t *pStrmData, + seqParamSet_t *pSeqParamSet, + nalUnitType_e nalUnitType, + i32 *deltaPicOrderCntBottom) +{ + +/* Variables */ + + u32 tmp, value, i; + strmData_t tmpStrmData[1]; + +/* Code */ + + ASSERT(pStrmData); + ASSERT(pSeqParamSet); + ASSERT(deltaPicOrderCntBottom); + + /* picOrderCntType must be equal to 0 and picOrderPresentFlag must be TRUE + * */ + ASSERT(pSeqParamSet->picOrderCntType == 0); + ASSERT(pSeqParamSet->maxFrameNum); + ASSERT(pSeqParamSet->maxPicOrderCntLsb); + + /* don't touch original stream position params */ + *tmpStrmData = *pStrmData; + + /* skip first_mb_in_slice */ + tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value); + if (tmp != HANTRO_OK) + return(tmp); + + /* skip slice_type */ + tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value); + if (tmp != HANTRO_OK) + return(tmp); + + /* skip pic_parameter_set_id */ + tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value); + if (tmp != HANTRO_OK) + return(tmp); + + /* log2(maxFrameNum) -> num bits to represent frame_num */ + i = 0; + while (pSeqParamSet->maxFrameNum >> i) + i++; + i--; + + /* skip frame_num */ + tmp = h264bsdGetBits(tmpStrmData, i); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + + /* skip idr_pic_id when necessary */ + if (nalUnitType == NAL_CODED_SLICE_IDR) + { + tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value); + if (tmp != HANTRO_OK) + return(tmp); + } + + /* log2(maxPicOrderCntLsb) -> num bits to represent pic_order_cnt_lsb */ + i = 0; + while (pSeqParamSet->maxPicOrderCntLsb >> i) + i++; + i--; + + /* skip pic_order_cnt_lsb */ + tmp = h264bsdGetBits(tmpStrmData, i); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + + /* delta_pic_order_cnt_bottom */ + tmp = h264bsdDecodeExpGolombSigned(tmpStrmData, deltaPicOrderCntBottom); + if (tmp != HANTRO_OK) + return(tmp); + + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdCheckDeltaPicOrderCnt + + Functional description: + Peek values delta_pic_order_cnt[0] and delta_pic_order_cnt[1] + from the slice header. Function does not modify current stream + positions but copies the stream data structure to tmp structure + which is used while accessing stream data. + + Inputs: + pStrmData pointer to stream data structure + pSeqParamSet pointer to active SPS + nalUnitType type of the current NAL unit + picOrderPresentFlag flag indicating if delta_pic_order_cnt[1] + is present in the stream + + Outputs: + deltaPicOrderCnt values are stored here + + Returns: + HANTRO_OK success + HANTRO_NOK invalid stream data + +------------------------------------------------------------------------------*/ + +u32 h264bsdCheckDeltaPicOrderCnt( + strmData_t *pStrmData, + seqParamSet_t *pSeqParamSet, + nalUnitType_e nalUnitType, + u32 picOrderPresentFlag, + i32 *deltaPicOrderCnt) +{ + +/* Variables */ + + u32 tmp, value, i; + strmData_t tmpStrmData[1]; + +/* Code */ + + ASSERT(pStrmData); + ASSERT(pSeqParamSet); + ASSERT(deltaPicOrderCnt); + + /* picOrderCntType must be equal to 1 and deltaPicOrderAlwaysZeroFlag must + * be FALSE */ + ASSERT(pSeqParamSet->picOrderCntType == 1); + ASSERT(!pSeqParamSet->deltaPicOrderAlwaysZeroFlag); + ASSERT(pSeqParamSet->maxFrameNum); + + /* don't touch original stream position params */ + *tmpStrmData = *pStrmData; + + /* skip first_mb_in_slice */ + tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value); + if (tmp != HANTRO_OK) + return(tmp); + + /* skip slice_type */ + tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value); + if (tmp != HANTRO_OK) + return(tmp); + + /* skip pic_parameter_set_id */ + tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value); + if (tmp != HANTRO_OK) + return(tmp); + + /* log2(maxFrameNum) -> num bits to represent frame_num */ + i = 0; + while (pSeqParamSet->maxFrameNum >> i) + i++; + i--; + + /* skip frame_num */ + tmp = h264bsdGetBits(tmpStrmData, i); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + + /* skip idr_pic_id when necessary */ + if (nalUnitType == NAL_CODED_SLICE_IDR) + { + tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value); + if (tmp != HANTRO_OK) + return(tmp); + } + + /* delta_pic_order_cnt[0] */ + tmp = h264bsdDecodeExpGolombSigned(tmpStrmData, &deltaPicOrderCnt[0]); + if (tmp != HANTRO_OK) + return(tmp); + + /* delta_pic_order_cnt[1] if present */ + if (picOrderPresentFlag) + { + tmp = h264bsdDecodeExpGolombSigned(tmpStrmData, &deltaPicOrderCnt[1]); + if (tmp != HANTRO_OK) + return(tmp); + } + + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdCheckRedundantPicCnt + + Functional description: + Peek value of redundant_pic_cnt from the slice header. Function + does not modify current stream positions but copies the stream + data structure to tmp structure which is used while accessing + stream data. + + Inputs: + pStrmData pointer to stream data structure + pSeqParamSet pointer to active SPS + pPicParamSet pointer to active PPS + nalUnitType type of the current NAL unit + + Outputs: + redundantPicCnt value is stored here + + Returns: + HANTRO_OK success + HANTRO_NOK invalid stream data + +------------------------------------------------------------------------------*/ + +u32 h264bsdCheckRedundantPicCnt( + strmData_t *pStrmData, + seqParamSet_t *pSeqParamSet, + picParamSet_t *pPicParamSet, + nalUnitType_e nalUnitType, + u32 *redundantPicCnt) +{ + +/* Variables */ + + u32 tmp, value, i; + i32 ivalue; + strmData_t tmpStrmData[1]; + +/* Code */ + + ASSERT(pStrmData); + ASSERT(pSeqParamSet); + ASSERT(pPicParamSet); + ASSERT(redundantPicCnt); + + /* redundant_pic_cnt_flag must be TRUE */ + ASSERT(pPicParamSet->redundantPicCntPresentFlag); + ASSERT(pSeqParamSet->maxFrameNum); + ASSERT(pSeqParamSet->picOrderCntType > 0 || + pSeqParamSet->maxPicOrderCntLsb); + + /* don't touch original stream position params */ + *tmpStrmData = *pStrmData; + + /* skip first_mb_in_slice */ + tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value); + if (tmp != HANTRO_OK) + return(tmp); + + /* skip slice_type */ + tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value); + if (tmp != HANTRO_OK) + return(tmp); + + /* skip pic_parameter_set_id */ + tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value); + if (tmp != HANTRO_OK) + return(tmp); + + /* log2(maxFrameNum) -> num bits to represent frame_num */ + i = 0; + while (pSeqParamSet->maxFrameNum >> i) + i++; + i--; + + /* skip frame_num */ + tmp = h264bsdGetBits(tmpStrmData, i); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + + /* skip idr_pic_id when necessary */ + if (nalUnitType == NAL_CODED_SLICE_IDR) + { + tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value); + if (tmp != HANTRO_OK) + return(tmp); + } + + if (pSeqParamSet->picOrderCntType == 0) + { + /* log2(maxPicOrderCntLsb) -> num bits to represent pic_order_cnt_lsb */ + i = 0; + while (pSeqParamSet->maxPicOrderCntLsb >> i) + i++; + i--; + + /* pic_order_cnt_lsb */ + tmp = h264bsdGetBits(tmpStrmData, i); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + + if (pPicParamSet->picOrderPresentFlag) + { + /* skip delta_pic_order_cnt_bottom */ + tmp = h264bsdDecodeExpGolombSigned(tmpStrmData, &ivalue); + if (tmp != HANTRO_OK) + return(tmp); + } + } + + if (pSeqParamSet->picOrderCntType == 1 && + !pSeqParamSet->deltaPicOrderAlwaysZeroFlag) + { + /* delta_pic_order_cnt[0] */ + tmp = h264bsdDecodeExpGolombSigned(tmpStrmData, &ivalue); + if (tmp != HANTRO_OK) + return(tmp); + + /* delta_pic_order_cnt[1] if present */ + if (pPicParamSet->picOrderPresentFlag) + { + tmp = h264bsdDecodeExpGolombSigned(tmpStrmData, &ivalue); + if (tmp != HANTRO_OK) + return(tmp); + } + } + + /* redundant_pic_cnt */ + tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, redundantPicCnt); + if (tmp != HANTRO_OK) + return(tmp); + + return(HANTRO_OK); + +} + + +/*------------------------------------------------------------------------------ + + Function: h264bsdCheckPriorPicsFlag + + Functional description: + Peek value of no_output_of_prior_pics_flag from the slice header. + Function does not modify current stream positions but copies + the stream data structure to tmp structure which is used while + accessing stream data. + + Inputs: + pStrmData pointer to stream data structure + pSeqParamSet pointer to active SPS + pPicParamSet pointer to active PPS + nalUnitType type of the current NAL unit + + Outputs: + noOutputOfPriorPicsFlag value is stored here + + Returns: + HANTRO_OK success + HANTRO_NOK invalid stream data + +------------------------------------------------------------------------------*/ +/*lint -e715 disable lint info nalUnitType not referenced */ +u32 h264bsdCheckPriorPicsFlag(u32 * noOutputOfPriorPicsFlag, + const strmData_t * pStrmData, + const seqParamSet_t * pSeqParamSet, + const picParamSet_t * pPicParamSet, + nalUnitType_e nalUnitType) +{ +/* Variables */ + + u32 tmp, value, i; + i32 ivalue; + strmData_t tmpStrmData[1]; + +/* Code */ + + ASSERT(pStrmData); + ASSERT(pSeqParamSet); + ASSERT(pPicParamSet); + ASSERT(noOutputOfPriorPicsFlag); + + /* must be IDR lsice */ + ASSERT(nalUnitType == NAL_CODED_SLICE_IDR); + + /* don't touch original stream position params */ + *tmpStrmData = *pStrmData; + + /* skip first_mb_in_slice */ + tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value); + if(tmp != HANTRO_OK) + return (tmp); + + /* slice_type */ + tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value); + if(tmp != HANTRO_OK) + return (tmp); + + /* skip pic_parameter_set_id */ + tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value); + if(tmp != HANTRO_OK) + return (tmp); + + /* log2(maxFrameNum) -> num bits to represent frame_num */ + i = 0; + while(pSeqParamSet->maxFrameNum >> i) + i++; + i--; + + /* skip frame_num */ + tmp = h264bsdGetBits(tmpStrmData, i); + if(tmp == END_OF_STREAM) + return (HANTRO_NOK); + + /* skip idr_pic_id */ + tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value); + if(tmp != HANTRO_OK) + return (tmp); + + if(pSeqParamSet->picOrderCntType == 0) + { + /* log2(maxPicOrderCntLsb) -> num bits to represent pic_order_cnt_lsb */ + i = 0; + while(pSeqParamSet->maxPicOrderCntLsb >> i) + i++; + i--; + + /* skip pic_order_cnt_lsb */ + tmp = h264bsdGetBits(tmpStrmData, i); + if(tmp == END_OF_STREAM) + return (HANTRO_NOK); + + if(pPicParamSet->picOrderPresentFlag) + { + /* skip delta_pic_order_cnt_bottom */ + tmp = h264bsdDecodeExpGolombSigned(tmpStrmData, &ivalue); + if(tmp != HANTRO_OK) + return (tmp); + } + } + + if(pSeqParamSet->picOrderCntType == 1 && + !pSeqParamSet->deltaPicOrderAlwaysZeroFlag) + { + /* skip delta_pic_order_cnt[0] */ + tmp = h264bsdDecodeExpGolombSigned(tmpStrmData, &ivalue); + if(tmp != HANTRO_OK) + return (tmp); + + /* skip delta_pic_order_cnt[1] if present */ + if(pPicParamSet->picOrderPresentFlag) + { + tmp = h264bsdDecodeExpGolombSigned(tmpStrmData, &ivalue); + if(tmp != HANTRO_OK) + return (tmp); + } + } + + /* skip redundant_pic_cnt */ + if(pPicParamSet->redundantPicCntPresentFlag) + { + tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value); + if(tmp != HANTRO_OK) + return (tmp); + } + + *noOutputOfPriorPicsFlag = h264bsdGetBits(tmpStrmData, 1); + if(*noOutputOfPriorPicsFlag == END_OF_STREAM) + return (HANTRO_NOK); + + return (HANTRO_OK); + +} +/*lint +e715 */ + + diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_header.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_header.h new file mode 100755 index 0000000000000000000000000000000000000000..198898a87b5aa5f0b2645f924557dbf147fc9e85 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_header.h @@ -0,0 +1,186 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. Module defines + 3. Data types + 4. Function prototypes + +------------------------------------------------------------------------------*/ + +#ifndef H264SWDEC_SLICE_HEADER_H +#define H264SWDEC_SLICE_HEADER_H + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#include "basetype.h" +#include "h264bsd_stream.h" +#include "h264bsd_cfg.h" +#include "h264bsd_seq_param_set.h" +#include "h264bsd_pic_param_set.h" +#include "h264bsd_nal_unit.h" + +/*------------------------------------------------------------------------------ + 2. Module defines +------------------------------------------------------------------------------*/ + +enum { + P_SLICE = 0, + I_SLICE = 2 +}; + +enum {NO_LONG_TERM_FRAME_INDICES = 0xFFFF}; + +/* macro to determine if slice is an inter slice, sliceTypes 0 and 5 */ +#define IS_P_SLICE(sliceType) (((sliceType) == P_SLICE) || \ + ((sliceType) == P_SLICE + 5)) + +/* macro to determine if slice is an intra slice, sliceTypes 2 and 7 */ +#define IS_I_SLICE(sliceType) (((sliceType) == I_SLICE) || \ + ((sliceType) == I_SLICE + 5)) + +/*------------------------------------------------------------------------------ + 3. Data types +------------------------------------------------------------------------------*/ + +/* structure to store data of one reference picture list reordering operation */ +typedef struct +{ + u32 reorderingOfPicNumsIdc; + u32 absDiffPicNum; + u32 longTermPicNum; +} refPicListReorderingOperation_t; + +/* structure to store reference picture list reordering operations */ +typedef struct +{ + u32 refPicListReorderingFlagL0; + refPicListReorderingOperation_t command[MAX_NUM_REF_PICS+1]; +} refPicListReordering_t; + +/* structure to store data of one DPB memory management control operation */ +typedef struct +{ + u32 memoryManagementControlOperation; + u32 differenceOfPicNums; + u32 longTermPicNum; + u32 longTermFrameIdx; + u32 maxLongTermFrameIdx; +} memoryManagementOperation_t; + +/* worst case scenario: all MAX_NUM_REF_PICS pictures in the buffer are + * short term pictures, each one of them is first marked as long term + * reference picture which is then marked as unused for reference. + * Additionally, max long-term frame index is set and current picture is + * marked as long term reference picture. Last position reserved for + * end memory_management_control_operation command */ +#define MAX_NUM_MMC_OPERATIONS (2*MAX_NUM_REF_PICS+2+1) + +/* structure to store decoded reference picture marking data */ +typedef struct +{ + u32 noOutputOfPriorPicsFlag; + u32 longTermReferenceFlag; + u32 adaptiveRefPicMarkingModeFlag; + memoryManagementOperation_t operation[MAX_NUM_MMC_OPERATIONS]; +} decRefPicMarking_t; + +/* structure to store slice header data decoded from the stream */ +typedef struct +{ + u32 firstMbInSlice; + u32 sliceType; + u32 picParameterSetId; + u32 frameNum; + u32 idrPicId; + u32 picOrderCntLsb; + i32 deltaPicOrderCntBottom; + i32 deltaPicOrderCnt[2]; + u32 redundantPicCnt; + u32 numRefIdxActiveOverrideFlag; + u32 numRefIdxL0Active; + i32 sliceQpDelta; + u32 disableDeblockingFilterIdc; + i32 sliceAlphaC0Offset; + i32 sliceBetaOffset; + u32 sliceGroupChangeCycle; + refPicListReordering_t refPicListReordering; + decRefPicMarking_t decRefPicMarking; +} sliceHeader_t; + +/*------------------------------------------------------------------------------ + 4. Function prototypes +------------------------------------------------------------------------------*/ + +u32 h264bsdDecodeSliceHeader(strmData_t *pStrmData, + sliceHeader_t *pSliceHeader, + seqParamSet_t *pSeqParamSet, + picParamSet_t *pPicParamSet, + nalUnit_t *pNalUnit); + +u32 h264bsdCheckPpsId(strmData_t *pStrmData, u32 *ppsId); + +u32 h264bsdCheckFrameNum( + strmData_t *pStrmData, + u32 maxFrameNum, + u32 *frameNum); + +u32 h264bsdCheckIdrPicId( + strmData_t *pStrmData, + u32 maxFrameNum, + nalUnitType_e nalUnitType, + u32 *idrPicId); + +u32 h264bsdCheckPicOrderCntLsb( + strmData_t *pStrmData, + seqParamSet_t *pSeqParamSet, + nalUnitType_e nalUnitType, + u32 *picOrderCntLsb); + +u32 h264bsdCheckDeltaPicOrderCntBottom( + strmData_t *pStrmData, + seqParamSet_t *pSeqParamSet, + nalUnitType_e nalUnitType, + i32 *deltaPicOrderCntBottom); + +u32 h264bsdCheckDeltaPicOrderCnt( + strmData_t *pStrmData, + seqParamSet_t *pSeqParamSet, + nalUnitType_e nalUnitType, + u32 picOrderPresentFlag, + i32 *deltaPicOrderCnt); + +u32 h264bsdCheckRedundantPicCnt( + strmData_t *pStrmData, + seqParamSet_t *pSeqParamSet, + picParamSet_t *pPicParamSet, + nalUnitType_e nalUnitType, + u32 *redundantPicCnt); + +u32 h264bsdCheckPriorPicsFlag(u32 * noOutputOfPriorPicsFlag, + const strmData_t * pStrmData, + const seqParamSet_t * pSeqParamSet, + const picParamSet_t * pPicParamSet, + nalUnitType_e nalUnitType); + +#endif /* #ifdef H264SWDEC_SLICE_HEADER_H */ + diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_storage.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_storage.c new file mode 100755 index 0000000000000000000000000000000000000000..3234754b30536a0f5658b2a258f16d7ed6f07a07 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_storage.c @@ -0,0 +1,888 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. External compiler flags + 3. Module defines + 4. Local function prototypes + 5. Functions + h264bsdInitStorage + h264bsdStoreSeqParamSet + h264bsdStorePicParamSet + h264bsdActivateParamSets + h264bsdResetStorage + h264bsdIsStartOfPicture + h264bsdIsEndOfPicture + h264bsdComputeSliceGroupMap + h264bsdCheckAccessUnitBoundary + CheckPps + h264bsdValidParamSets + +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#include "h264bsd_storage.h" +#include "h264bsd_util.h" +#include "h264bsd_neighbour.h" +#include "h264bsd_slice_group_map.h" +#include "h264bsd_dpb.h" +#include "h264bsd_nal_unit.h" +#include "h264bsd_slice_header.h" +#include "h264bsd_seq_param_set.h" + +/*------------------------------------------------------------------------------ + 2. External compiler flags +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- + 3. Module defines +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 4. Local function prototypes +------------------------------------------------------------------------------*/ + +static u32 CheckPps(picParamSet_t *pps, seqParamSet_t *sps); + +/*------------------------------------------------------------------------------ + + Function name: h264bsdInitStorage + + Functional description: + Initialize storage structure. Sets contents of the storage to '0' + except for the active parameter set ids, which are initialized + to invalid values. + + Inputs: + + Outputs: + pStorage initialized data stored here + + Returns: + none + +------------------------------------------------------------------------------*/ + +void h264bsdInitStorage(storage_t *pStorage) +{ + +/* Variables */ + +/* Code */ + + ASSERT(pStorage); + + H264SwDecMemset(pStorage, 0, sizeof(storage_t)); + + pStorage->activeSpsId = MAX_NUM_SEQ_PARAM_SETS; + pStorage->activePpsId = MAX_NUM_PIC_PARAM_SETS; + + pStorage->aub->firstCallFlag = HANTRO_TRUE; +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdStoreSeqParamSet + + Functional description: + Store sequence parameter set into the storage. If active SPS is + overwritten -> check if contents changes and if it does, set + parameters to force reactivation of parameter sets + + Inputs: + pStorage pointer to storage structure + pSeqParamSet pointer to param set to be stored + + Outputs: + none + + Returns: + HANTRO_OK success + MEMORY_ALLOCATION_ERROR failure in memory allocation + + +------------------------------------------------------------------------------*/ + +u32 h264bsdStoreSeqParamSet(storage_t *pStorage, seqParamSet_t *pSeqParamSet) +{ + +/* Variables */ + + u32 id; + +/* Code */ + + ASSERT(pStorage); + ASSERT(pSeqParamSet); + ASSERT(pSeqParamSet->seqParameterSetId < MAX_NUM_SEQ_PARAM_SETS); + + id = pSeqParamSet->seqParameterSetId; + + /* seq parameter set with id not used before -> allocate memory */ + if (pStorage->sps[id] == NULL) + { + ALLOCATE(pStorage->sps[id], 1, seqParamSet_t); + if (pStorage->sps[id] == NULL) + return(MEMORY_ALLOCATION_ERROR); + } + /* sequence parameter set with id equal to id of active sps */ + else if (id == pStorage->activeSpsId) + { + /* if seq parameter set contents changes + * -> overwrite and re-activate when next IDR picture decoded + * ids of active param sets set to invalid values to force + * re-activation. Memories allocated for old sps freed + * otherwise free memeries allocated for just decoded sps and + * continue */ + if (h264bsdCompareSeqParamSets(pSeqParamSet, pStorage->activeSps) != 0) + { + FREE(pStorage->sps[id]->offsetForRefFrame); + FREE(pStorage->sps[id]->vuiParameters); + pStorage->activeSpsId = MAX_NUM_SEQ_PARAM_SETS + 1; + pStorage->activePpsId = MAX_NUM_PIC_PARAM_SETS + 1; + pStorage->activeSps = NULL; + pStorage->activePps = NULL; + } + else + { + FREE(pSeqParamSet->offsetForRefFrame); + FREE(pSeqParamSet->vuiParameters); + return(HANTRO_OK); + } + } + /* overwrite seq param set other than active one -> free memories + * allocated for old param set */ + else + { + FREE(pStorage->sps[id]->offsetForRefFrame); + FREE(pStorage->sps[id]->vuiParameters); + } + + *pStorage->sps[id] = *pSeqParamSet; + + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdStorePicParamSet + + Functional description: + Store picture parameter set into the storage. If active PPS is + overwritten -> check if active SPS changes and if it does -> set + parameters to force reactivation of parameter sets + + Inputs: + pStorage pointer to storage structure + pPicParamSet pointer to param set to be stored + + Outputs: + none + + Returns: + HANTRO_OK success + MEMORY_ALLOCATION_ERROR failure in memory allocation + +------------------------------------------------------------------------------*/ + +u32 h264bsdStorePicParamSet(storage_t *pStorage, picParamSet_t *pPicParamSet) +{ + +/* Variables */ + + u32 id; + +/* Code */ + + ASSERT(pStorage); + ASSERT(pPicParamSet); + ASSERT(pPicParamSet->picParameterSetId < MAX_NUM_PIC_PARAM_SETS); + ASSERT(pPicParamSet->seqParameterSetId < MAX_NUM_SEQ_PARAM_SETS); + + id = pPicParamSet->picParameterSetId; + + /* pic parameter set with id not used before -> allocate memory */ + if (pStorage->pps[id] == NULL) + { + ALLOCATE(pStorage->pps[id], 1, picParamSet_t); + if (pStorage->pps[id] == NULL) + return(MEMORY_ALLOCATION_ERROR); + } + /* picture parameter set with id equal to id of active pps */ + else if (id == pStorage->activePpsId) + { + /* check whether seq param set changes, force re-activation of + * param set if it does. Set activeSpsId to invalid value to + * accomplish this */ + if (pPicParamSet->seqParameterSetId != pStorage->activeSpsId) + { + pStorage->activePpsId = MAX_NUM_PIC_PARAM_SETS + 1; + } + /* free memories allocated for old param set */ + FREE(pStorage->pps[id]->runLength); + FREE(pStorage->pps[id]->topLeft); + FREE(pStorage->pps[id]->bottomRight); + FREE(pStorage->pps[id]->sliceGroupId); + } + /* overwrite pic param set other than active one -> free memories + * allocated for old param set */ + else + { + FREE(pStorage->pps[id]->runLength); + FREE(pStorage->pps[id]->topLeft); + FREE(pStorage->pps[id]->bottomRight); + FREE(pStorage->pps[id]->sliceGroupId); + } + + *pStorage->pps[id] = *pPicParamSet; + + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdActivateParamSets + + Functional description: + Activate certain SPS/PPS combination. This function shall be + called in the beginning of each picture. Picture parameter set + can be changed as wanted, but sequence parameter set may only be + changed when the starting picture is an IDR picture. + + When new SPS is activated the function allocates memory for + macroblock storages and slice group map and (re-)initializes the + decoded picture buffer. If this is not the first activation the old + allocations are freed and FreeDpb called before new allocations. + + Inputs: + pStorage pointer to storage data structure + ppsId identifies the PPS to be activated, SPS id obtained + from the PPS + isIdr flag to indicate if the picture is an IDR picture + + Outputs: + none + + Returns: + HANTRO_OK success + HANTRO_NOK non-existing or invalid param set combination, + trying to change SPS with non-IDR picture + MEMORY_ALLOCATION_ERROR failure in memory allocation + +------------------------------------------------------------------------------*/ + +u32 h264bsdActivateParamSets(storage_t *pStorage, u32 ppsId, u32 isIdr) +{ + +/* Variables */ + + u32 tmp; + u32 flag; + +/* Code */ + + ASSERT(pStorage); + ASSERT(ppsId < MAX_NUM_PIC_PARAM_SETS); + + /* check that pps and corresponding sps exist */ + if ( (pStorage->pps[ppsId] == NULL) || + (pStorage->sps[pStorage->pps[ppsId]->seqParameterSetId] == NULL) ) + { + return(HANTRO_NOK); + } + + /* check that pps parameters do not violate picture size constraints */ + tmp = CheckPps(pStorage->pps[ppsId], + pStorage->sps[pStorage->pps[ppsId]->seqParameterSetId]); + if (tmp != HANTRO_OK) + return(tmp); + + /* first activation part1 */ + if (pStorage->activePpsId == MAX_NUM_PIC_PARAM_SETS) + { + pStorage->activePpsId = ppsId; + pStorage->activePps = pStorage->pps[ppsId]; + pStorage->activeSpsId = pStorage->activePps->seqParameterSetId; + pStorage->activeSps = pStorage->sps[pStorage->activeSpsId]; + pStorage->picSizeInMbs = + pStorage->activeSps->picWidthInMbs * + pStorage->activeSps->picHeightInMbs; + + pStorage->currImage->width = pStorage->activeSps->picWidthInMbs; + pStorage->currImage->height = pStorage->activeSps->picHeightInMbs; + + pStorage->pendingActivation = HANTRO_TRUE; + } + /* first activation part2 */ + else if (pStorage->pendingActivation) + { + pStorage->pendingActivation = HANTRO_FALSE; + + FREE(pStorage->mb); + FREE(pStorage->sliceGroupMap); + + ALLOCATE(pStorage->mb, pStorage->picSizeInMbs, mbStorage_t); + ALLOCATE(pStorage->sliceGroupMap, pStorage->picSizeInMbs, u32); + if (pStorage->mb == NULL || pStorage->sliceGroupMap == NULL) + return(MEMORY_ALLOCATION_ERROR); + + H264SwDecMemset(pStorage->mb, 0, + pStorage->picSizeInMbs * sizeof(mbStorage_t)); + + h264bsdInitMbNeighbours(pStorage->mb, + pStorage->activeSps->picWidthInMbs, + pStorage->picSizeInMbs); + + /* dpb output reordering disabled if + * 1) application set noReordering flag + * 2) POC type equal to 2 + * 3) num_reorder_frames in vui equal to 0 */ + if ( pStorage->noReordering || + pStorage->activeSps->picOrderCntType == 2 || + (pStorage->activeSps->vuiParametersPresentFlag && + pStorage->activeSps->vuiParameters->bitstreamRestrictionFlag && + !pStorage->activeSps->vuiParameters->numReorderFrames) ) + flag = HANTRO_TRUE; + else + flag = HANTRO_FALSE; + + tmp = h264bsdResetDpb(pStorage->dpb, + pStorage->activeSps->picWidthInMbs * + pStorage->activeSps->picHeightInMbs, + pStorage->activeSps->maxDpbSize, + pStorage->activeSps->numRefFrames, + pStorage->activeSps->maxFrameNum, + flag); + if (tmp != HANTRO_OK) + return(tmp); + } + else if (ppsId != pStorage->activePpsId) + { + /* sequence parameter set shall not change but before an IDR picture */ + if (pStorage->pps[ppsId]->seqParameterSetId != pStorage->activeSpsId) + { + DEBUG(("SEQ PARAM SET CHANGING...\n")); + if (isIdr) + { + pStorage->activePpsId = ppsId; + pStorage->activePps = pStorage->pps[ppsId]; + pStorage->activeSpsId = pStorage->activePps->seqParameterSetId; + pStorage->activeSps = pStorage->sps[pStorage->activeSpsId]; + pStorage->picSizeInMbs = + pStorage->activeSps->picWidthInMbs * + pStorage->activeSps->picHeightInMbs; + + pStorage->currImage->width = pStorage->activeSps->picWidthInMbs; + pStorage->currImage->height = + pStorage->activeSps->picHeightInMbs; + + pStorage->pendingActivation = HANTRO_TRUE; + } + else + { + DEBUG(("TRYING TO CHANGE SPS IN NON-IDR SLICE\n")); + return(HANTRO_NOK); + } + } + else + { + pStorage->activePpsId = ppsId; + pStorage->activePps = pStorage->pps[ppsId]; + } + } + + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdResetStorage + + Functional description: + Reset contents of the storage. This should be called before + processing of new image is started. + + Inputs: + pStorage pointer to storage structure + + Outputs: + none + + Returns: + none + + +------------------------------------------------------------------------------*/ + +void h264bsdResetStorage(storage_t *pStorage) +{ + +/* Variables */ + + u32 i; + +/* Code */ + + ASSERT(pStorage); + + pStorage->slice->numDecodedMbs = 0; + pStorage->slice->sliceId = 0; + + for (i = 0; i < pStorage->picSizeInMbs; i++) + { + pStorage->mb[i].sliceId = 0; + pStorage->mb[i].decoded = 0; + } + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdIsStartOfPicture + + Functional description: + Determine if the decoder is in the start of a picture. This + information is needed to decide if h264bsdActivateParamSets and + h264bsdCheckGapsInFrameNum functions should be called. Function + considers that new picture is starting if no slice headers + have been successfully decoded for the current access unit. + + Inputs: + pStorage pointer to storage structure + + Outputs: + none + + Returns: + HANTRO_TRUE new picture is starting + HANTRO_FALSE not starting + +------------------------------------------------------------------------------*/ + +u32 h264bsdIsStartOfPicture(storage_t *pStorage) +{ + +/* Variables */ + + +/* Code */ + + if (pStorage->validSliceInAccessUnit == HANTRO_FALSE) + return(HANTRO_TRUE); + else + return(HANTRO_FALSE); + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdIsEndOfPicture + + Functional description: + Determine if the decoder is in the end of a picture. This + information is needed to determine when deblocking filtering + and reference picture marking processes should be performed. + + If the decoder is processing primary slices the return value + is determined by checking the value of numDecodedMbs in the + storage. On the other hand, if the decoder is processing + redundant slices the numDecodedMbs may not contain valid + informationa and each macroblock has to be checked separately. + + Inputs: + pStorage pointer to storage structure + + Outputs: + none + + Returns: + HANTRO_TRUE end of picture + HANTRO_FALSE noup + +------------------------------------------------------------------------------*/ + +u32 h264bsdIsEndOfPicture(storage_t *pStorage) +{ + +/* Variables */ + + u32 i, tmp; + +/* Code */ + + /* primary picture */ + if (!pStorage->sliceHeader[0].redundantPicCnt) + { + if (pStorage->slice->numDecodedMbs == pStorage->picSizeInMbs) + return(HANTRO_TRUE); + } + else + { + for (i = 0, tmp = 0; i < pStorage->picSizeInMbs; i++) + tmp += pStorage->mb[i].decoded ? 1 : 0; + + if (tmp == pStorage->picSizeInMbs) + return(HANTRO_TRUE); + } + + return(HANTRO_FALSE); + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdComputeSliceGroupMap + + Functional description: + Compute slice group map. Just call h264bsdDecodeSliceGroupMap with + appropriate parameters. + + Inputs: + pStorage pointer to storage structure + sliceGroupChangeCycle + + Outputs: + none + + Returns: + none + +------------------------------------------------------------------------------*/ + +void h264bsdComputeSliceGroupMap(storage_t *pStorage, u32 sliceGroupChangeCycle) +{ + +/* Variables */ + + +/* Code */ + + h264bsdDecodeSliceGroupMap(pStorage->sliceGroupMap, + pStorage->activePps, sliceGroupChangeCycle, + pStorage->activeSps->picWidthInMbs, + pStorage->activeSps->picHeightInMbs); + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdCheckAccessUnitBoundary + + Functional description: + Check if next NAL unit starts a new access unit. Following + conditions specify start of a new access unit: + + -NAL unit types 6-11, 13-18 (e.g. SPS, PPS) + + following conditions checked only for slice NAL units, values + compared to ones obtained from previous slice: + + -NAL unit type differs (slice / IDR slice) + -frame_num differs + -nal_ref_idc differs and one of the values is 0 + -POC information differs + -both are IDR slices and idr_pic_id differs + + Inputs: + strm pointer to stream data structure + nuNext pointer to NAL unit structure + storage pointer to storage structure + + Outputs: + accessUnitBoundaryFlag the result is stored here, TRUE for + access unit boundary, FALSE otherwise + + Returns: + HANTRO_OK success + HANTRO_NOK failure, invalid stream data + PARAM_SET_ERROR invalid param set usage + +------------------------------------------------------------------------------*/ + +u32 h264bsdCheckAccessUnitBoundary( + strmData_t *strm, + nalUnit_t *nuNext, + storage_t *storage, + u32 *accessUnitBoundaryFlag) +{ + +/* Variables */ + + u32 tmp, ppsId, frameNum, idrPicId, picOrderCntLsb; + i32 deltaPicOrderCntBottom, deltaPicOrderCnt[2]; + seqParamSet_t *sps; + picParamSet_t *pps; + +/* Code */ + + ASSERT(strm); + ASSERT(nuNext); + ASSERT(storage); + ASSERT(storage->sps); + ASSERT(storage->pps); + + /* initialize default output to FALSE */ + *accessUnitBoundaryFlag = HANTRO_FALSE; + + if ( ( (nuNext->nalUnitType > 5) && (nuNext->nalUnitType < 12) ) || + ( (nuNext->nalUnitType > 12) && (nuNext->nalUnitType <= 18) ) ) + { + *accessUnitBoundaryFlag = HANTRO_TRUE; + return(HANTRO_OK); + } + else if ( nuNext->nalUnitType != NAL_CODED_SLICE && + nuNext->nalUnitType != NAL_CODED_SLICE_IDR ) + { + return(HANTRO_OK); + } + + /* check if this is the very first call to this function */ + if (storage->aub->firstCallFlag) + { + *accessUnitBoundaryFlag = HANTRO_TRUE; + storage->aub->firstCallFlag = HANTRO_FALSE; + } + + /* get picture parameter set id */ + tmp = h264bsdCheckPpsId(strm, &ppsId); + if (tmp != HANTRO_OK) + return(tmp); + + /* store sps and pps in separate pointers just to make names shorter */ + pps = storage->pps[ppsId]; + if ( pps == NULL || storage->sps[pps->seqParameterSetId] == NULL || + (storage->activeSpsId != MAX_NUM_SEQ_PARAM_SETS && + pps->seqParameterSetId != storage->activeSpsId && + nuNext->nalUnitType != NAL_CODED_SLICE_IDR) ) + return(PARAM_SET_ERROR); + sps = storage->sps[pps->seqParameterSetId]; + + if (storage->aub->nuPrev->nalRefIdc != nuNext->nalRefIdc && + (storage->aub->nuPrev->nalRefIdc == 0 || nuNext->nalRefIdc == 0)) + *accessUnitBoundaryFlag = HANTRO_TRUE; + + if ((storage->aub->nuPrev->nalUnitType == NAL_CODED_SLICE_IDR && + nuNext->nalUnitType != NAL_CODED_SLICE_IDR) || + (storage->aub->nuPrev->nalUnitType != NAL_CODED_SLICE_IDR && + nuNext->nalUnitType == NAL_CODED_SLICE_IDR)) + *accessUnitBoundaryFlag = HANTRO_TRUE; + + tmp = h264bsdCheckFrameNum(strm, sps->maxFrameNum, &frameNum); + if (tmp != HANTRO_OK) + return(HANTRO_NOK); + + if (storage->aub->prevFrameNum != frameNum) + { + storage->aub->prevFrameNum = frameNum; + *accessUnitBoundaryFlag = HANTRO_TRUE; + } + + if (nuNext->nalUnitType == NAL_CODED_SLICE_IDR) + { + tmp = h264bsdCheckIdrPicId(strm, sps->maxFrameNum, nuNext->nalUnitType, + &idrPicId); + if (tmp != HANTRO_OK) + return(HANTRO_NOK); + + if (storage->aub->nuPrev->nalUnitType == NAL_CODED_SLICE_IDR && + storage->aub->prevIdrPicId != idrPicId) + *accessUnitBoundaryFlag = HANTRO_TRUE; + + storage->aub->prevIdrPicId = idrPicId; + } + + if (sps->picOrderCntType == 0) + { + tmp = h264bsdCheckPicOrderCntLsb(strm, sps, nuNext->nalUnitType, + &picOrderCntLsb); + if (tmp != HANTRO_OK) + return(HANTRO_NOK); + + if (storage->aub->prevPicOrderCntLsb != picOrderCntLsb) + { + storage->aub->prevPicOrderCntLsb = picOrderCntLsb; + *accessUnitBoundaryFlag = HANTRO_TRUE; + } + + if (pps->picOrderPresentFlag) + { + tmp = h264bsdCheckDeltaPicOrderCntBottom(strm, sps, + nuNext->nalUnitType, &deltaPicOrderCntBottom); + if (tmp != HANTRO_OK) + return(tmp); + + if (storage->aub->prevDeltaPicOrderCntBottom != + deltaPicOrderCntBottom) + { + storage->aub->prevDeltaPicOrderCntBottom = + deltaPicOrderCntBottom; + *accessUnitBoundaryFlag = HANTRO_TRUE; + } + } + } + else if (sps->picOrderCntType == 1 && !sps->deltaPicOrderAlwaysZeroFlag) + { + tmp = h264bsdCheckDeltaPicOrderCnt(strm, sps, nuNext->nalUnitType, + pps->picOrderPresentFlag, deltaPicOrderCnt); + if (tmp != HANTRO_OK) + return(tmp); + + if (storage->aub->prevDeltaPicOrderCnt[0] != deltaPicOrderCnt[0]) + { + storage->aub->prevDeltaPicOrderCnt[0] = deltaPicOrderCnt[0]; + *accessUnitBoundaryFlag = HANTRO_TRUE; + } + + if (pps->picOrderPresentFlag) + if (storage->aub->prevDeltaPicOrderCnt[1] != deltaPicOrderCnt[1]) + { + storage->aub->prevDeltaPicOrderCnt[1] = deltaPicOrderCnt[1]; + *accessUnitBoundaryFlag = HANTRO_TRUE; + } + } + + *storage->aub->nuPrev = *nuNext; + + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + Function: CheckPps + + Functional description: + Check picture parameter set. Contents of the picture parameter + set information that depends on the image dimensions is checked + against the dimensions in the sps. + + Inputs: + pps pointer to picture paramter set + sps pointer to sequence parameter set + + Outputs: + none + + Returns: + HANTRO_OK everything ok + HANTRO_NOK invalid data in picture parameter set + +------------------------------------------------------------------------------*/ +u32 CheckPps(picParamSet_t *pps, seqParamSet_t *sps) +{ + + u32 i; + u32 picSize; + + picSize = sps->picWidthInMbs * sps->picHeightInMbs; + + /* check slice group params */ + if (pps->numSliceGroups > 1) + { + if (pps->sliceGroupMapType == 0) + { + ASSERT(pps->runLength); + for (i = 0; i < pps->numSliceGroups; i++) + { + if (pps->runLength[i] > picSize) + return(HANTRO_NOK); + } + } + else if (pps->sliceGroupMapType == 2) + { + ASSERT(pps->topLeft); + ASSERT(pps->bottomRight); + for (i = 0; i < pps->numSliceGroups-1; i++) + { + if (pps->topLeft[i] > pps->bottomRight[i] || + pps->bottomRight[i] >= picSize) + return(HANTRO_NOK); + + if ( (pps->topLeft[i] % sps->picWidthInMbs) > + (pps->bottomRight[i] % sps->picWidthInMbs) ) + return(HANTRO_NOK); + } + } + else if (pps->sliceGroupMapType > 2 && pps->sliceGroupMapType < 6) + { + if (pps->sliceGroupChangeRate > picSize) + return(HANTRO_NOK); + } + else if (pps->sliceGroupMapType == 6 && + pps->picSizeInMapUnits < picSize) + return(HANTRO_NOK); + } + + return(HANTRO_OK); +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdValidParamSets + + Functional description: + Check if any valid SPS/PPS combination exists in the storage. + Function tries each PPS in the buffer and checks if corresponding + SPS exists and calls CheckPps to determine if the PPS conforms + to image dimensions of the SPS. + + Inputs: + pStorage pointer to storage structure + + Outputs: + HANTRO_OK there is at least one valid combination + HANTRO_NOK no valid combinations found + + +------------------------------------------------------------------------------*/ + +u32 h264bsdValidParamSets(storage_t *pStorage) +{ + +/* Variables */ + + u32 i; + +/* Code */ + + ASSERT(pStorage); + + for (i = 0; i < MAX_NUM_PIC_PARAM_SETS; i++) + { + if ( pStorage->pps[i] && + pStorage->sps[pStorage->pps[i]->seqParameterSetId] && + CheckPps(pStorage->pps[i], + pStorage->sps[pStorage->pps[i]->seqParameterSetId]) == + HANTRO_OK) + { + return(HANTRO_OK); + } + } + + return(HANTRO_NOK); + +} + diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_storage.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_storage.h new file mode 100755 index 0000000000000000000000000000000000000000..ba3b2da7726e3d0221079eee139e96f7c74306b9 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_storage.h @@ -0,0 +1,174 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. Module defines + 3. Data types + 4. Function prototypes + +------------------------------------------------------------------------------*/ + +#ifndef H264SWDEC_STORAGE_H +#define H264SWDEC_STORAGE_H + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#include "basetype.h" +#include "h264bsd_cfg.h" +#include "h264bsd_seq_param_set.h" +#include "h264bsd_pic_param_set.h" +#include "h264bsd_macroblock_layer.h" +#include "h264bsd_nal_unit.h" +#include "h264bsd_slice_header.h" +#include "h264bsd_seq_param_set.h" +#include "h264bsd_dpb.h" +#include "h264bsd_pic_order_cnt.h" + +/*------------------------------------------------------------------------------ + 2. Module defines +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 3. Data types +------------------------------------------------------------------------------*/ + +typedef struct +{ + u32 sliceId; + u32 numDecodedMbs; + u32 lastMbAddr; +} sliceStorage_t; + +/* structure to store parameters needed for access unit boundary checking */ +typedef struct +{ + nalUnit_t nuPrev[1]; + u32 prevFrameNum; + u32 prevIdrPicId; + u32 prevPicOrderCntLsb; + i32 prevDeltaPicOrderCntBottom; + i32 prevDeltaPicOrderCnt[2]; + u32 firstCallFlag; +} aubCheck_t; + +/* storage data structure, holds all data of a decoder instance */ +typedef struct +{ + /* active paramet set ids and pointers */ + u32 oldSpsId; + u32 activePpsId; + u32 activeSpsId; + picParamSet_t *activePps; + seqParamSet_t *activeSps; + seqParamSet_t *sps[MAX_NUM_SEQ_PARAM_SETS]; + picParamSet_t *pps[MAX_NUM_PIC_PARAM_SETS]; + + /* current slice group map, recomputed for each slice */ + u32 *sliceGroupMap; + + u32 picSizeInMbs; + + /* this flag is set after all macroblocks of a picture successfully + * decoded -> redundant slices not decoded */ + u32 skipRedundantSlices; + u32 picStarted; + + /* flag to indicate if current access unit contains any valid slices */ + u32 validSliceInAccessUnit; + + /* store information needed for handling of slice decoding */ + sliceStorage_t slice[1]; + + /* number of concealed macroblocks in the current image */ + u32 numConcealedMbs; + + /* picId given by application */ + u32 currentPicId; + + /* macroblock specific storages, size determined by image dimensions */ + mbStorage_t *mb; + + /* flag to store noOutputReordering flag set by the application */ + u32 noReordering; + + /* DPB */ + dpbStorage_t dpb[1]; + + /* structure to store picture order count related information */ + pocStorage_t poc[1]; + + /* access unit boundary checking related data */ + aubCheck_t aub[1]; + + /* current processed image */ + image_t currImage[1]; + + /* last valid NAL unit header is stored here */ + nalUnit_t prevNalUnit[1]; + + /* slice header, second structure used as a temporary storage while + * decoding slice header, first one stores last successfully decoded + * slice header */ + sliceHeader_t sliceHeader[2]; + + /* fields to store old stream buffer pointers, needed when only part of + * a stream buffer is processed by h264bsdDecode function */ + u32 prevBufNotFinished; + u8 *prevBufPointer; + u32 prevBytesConsumed; + strmData_t strm[1]; + + /* macroblock layer structure, there is no need to store this but it + * would have increased the stack size excessively and needed to be + * allocated from head -> easiest to put it here */ + macroblockLayer_t *mbLayer; + + u32 pendingActivation; /* Activate parameter sets after returning + HEADERS_RDY to the user */ + u32 intraConcealmentFlag; /* 0 gray picture for corrupted intra + 1 previous frame used if available */ +} storage_t; + +/*------------------------------------------------------------------------------ + 4. Function prototypes +------------------------------------------------------------------------------*/ + +void h264bsdInitStorage(storage_t *pStorage); +void h264bsdResetStorage(storage_t *pStorage); +u32 h264bsdIsStartOfPicture(storage_t *pStorage); +u32 h264bsdIsEndOfPicture(storage_t *pStorage); +u32 h264bsdStoreSeqParamSet(storage_t *pStorage, seqParamSet_t *pSeqParamSet); +u32 h264bsdStorePicParamSet(storage_t *pStorage, picParamSet_t *pPicParamSet); +u32 h264bsdActivateParamSets(storage_t *pStorage, u32 ppsId, u32 isIdr); +void h264bsdComputeSliceGroupMap(storage_t *pStorage, + u32 sliceGroupChangeCycle); + +u32 h264bsdCheckAccessUnitBoundary( + strmData_t *strm, + nalUnit_t *nuNext, + storage_t *storage, + u32 *accessUnitBoundaryFlag); + +u32 h264bsdValidParamSets(storage_t *pStorage); + +#endif /* #ifdef H264SWDEC_STORAGE_H */ + diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_stream.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_stream.c new file mode 100755 index 0000000000000000000000000000000000000000..20d10836a6d298ed3e6c72068c47ebd062dd8739 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_stream.c @@ -0,0 +1,242 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. External compiler flags + 3. Module defines + 4. Local function prototypes + 5. Functions + h264bsdGetBits + h264bsdShowBits32 + h264bsdFlushBits + h264bsdIsByteAligned + +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#include "h264bsd_util.h" +#include "h264bsd_stream.h" + +/*------------------------------------------------------------------------------ + 2. External compiler flags +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- + 3. Module defines +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 4. Local function prototypes +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + + Function: h264bsdGetBits + + Functional description: + Read and remove bits from the stream buffer. + + Input: + pStrmData pointer to stream data structure + numBits number of bits to read + + Output: + none + + Returns: + bits read from stream + END_OF_STREAM if not enough bits left + +------------------------------------------------------------------------------*/ + +u32 h264bsdGetBits(strmData_t *pStrmData, u32 numBits) +{ + + u32 out; + + ASSERT(pStrmData); + ASSERT(numBits < 32); + + out = h264bsdShowBits32(pStrmData) >> (32 - numBits); + + if (h264bsdFlushBits(pStrmData, numBits) == HANTRO_OK) + { + return(out); + } + else + { + return(END_OF_STREAM); + } + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdShowBits32 + + Functional description: + Read 32 bits from the stream buffer. Buffer is left as it is, i.e. + no bits are removed. First bit read from the stream is the MSB of + the return value. If there is not enough bits in the buffer -> + bits beyong the end of the stream are set to '0' in the return + value. + + Input: + pStrmData pointer to stream data structure + + Output: + none + + Returns: + bits read from stream + +------------------------------------------------------------------------------*/ + +u32 h264bsdShowBits32(strmData_t *pStrmData) +{ + + i32 bits, shift; + u32 out; + u8 *pStrm; + + ASSERT(pStrmData); + ASSERT(pStrmData->pStrmCurrPos); + ASSERT(pStrmData->bitPosInWord < 8); + ASSERT(pStrmData->bitPosInWord == + (pStrmData->strmBuffReadBits & 0x7)); + + pStrm = pStrmData->pStrmCurrPos; + + /* number of bits left in the buffer */ + bits = (i32)pStrmData->strmBuffSize*8 - (i32)pStrmData->strmBuffReadBits; + + /* at least 32-bits in the buffer */ + if (bits >= 32) + { + u32 bitPosInWord = pStrmData->bitPosInWord; + out = ((u32)pStrm[0] << 24) | ((u32)pStrm[1] << 16) | + ((u32)pStrm[2] << 8) | ((u32)pStrm[3]); + + if (bitPosInWord) + { + u32 byte = (u32)pStrm[4]; + u32 tmp = (8-bitPosInWord); + out <<= bitPosInWord; + out |= byte>>tmp; + } + return (out); + } + /* at least one bit in the buffer */ + else if (bits > 0) + { + shift = (i32)(24 + pStrmData->bitPosInWord); + out = (u32)(*pStrm++) << shift; + bits -= (i32)(8 - pStrmData->bitPosInWord); + while (bits > 0) + { + shift -= 8; + out |= (u32)(*pStrm++) << shift; + bits -= 8; + } + return (out); + } + else + return (0); + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdFlushBits + + Functional description: + Remove bits from the stream buffer + + Input: + pStrmData pointer to stream data structure + numBits number of bits to remove + + Output: + none + + Returns: + HANTRO_OK success + END_OF_STREAM not enough bits left + +------------------------------------------------------------------------------*/ +#ifndef H264DEC_NEON +u32 h264bsdFlushBits(strmData_t *pStrmData, u32 numBits) +{ + + ASSERT(pStrmData); + ASSERT(pStrmData->pStrmBuffStart); + ASSERT(pStrmData->pStrmCurrPos); + ASSERT(pStrmData->bitPosInWord < 8); + ASSERT(pStrmData->bitPosInWord == (pStrmData->strmBuffReadBits & 0x7)); + + pStrmData->strmBuffReadBits += numBits; + pStrmData->bitPosInWord = pStrmData->strmBuffReadBits & 0x7; + if ( (pStrmData->strmBuffReadBits ) <= (8*pStrmData->strmBuffSize) ) + { + pStrmData->pStrmCurrPos = pStrmData->pStrmBuffStart + + (pStrmData->strmBuffReadBits >> 3); + return(HANTRO_OK); + } + else + return(END_OF_STREAM); + +} +#endif +/*------------------------------------------------------------------------------ + + Function: h264bsdIsByteAligned + + Functional description: + Check if current stream position is byte aligned. + + Inputs: + pStrmData pointer to stream data structure + + Outputs: + none + + Returns: + TRUE stream is byte aligned + FALSE stream is not byte aligned + +------------------------------------------------------------------------------*/ + +u32 h264bsdIsByteAligned(strmData_t *pStrmData) +{ + +/* Variables */ + +/* Code */ + + if (!pStrmData->bitPosInWord) + return(HANTRO_TRUE); + else + return(HANTRO_FALSE); + +} + diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_stream.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_stream.h new file mode 100755 index 0000000000000000000000000000000000000000..4404b6683415b53bd47121343ccc0450d1ee9206 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_stream.h @@ -0,0 +1,67 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. Module defines + 3. Data types + 4. Function prototypes + +------------------------------------------------------------------------------*/ + +#ifndef H264SWDEC_STREAM_H +#define H264SWDEC_STREAM_H + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#include "basetype.h" + +/*------------------------------------------------------------------------------ + 2. Module defines +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 3. Data types +------------------------------------------------------------------------------*/ + +typedef struct +{ + u8 *pStrmBuffStart; /* pointer to start of stream buffer */ + u8 *pStrmCurrPos; /* current read address in stream buffer */ + u32 bitPosInWord; /* bit position in stream buffer byte */ + u32 strmBuffSize; /* size of stream buffer (bytes) */ + u32 strmBuffReadBits; /* number of bits read from stream buffer */ +} strmData_t; + +/*------------------------------------------------------------------------------ + 4. Function prototypes +------------------------------------------------------------------------------*/ + +u32 h264bsdGetBits(strmData_t *pStrmData, u32 numBits); + +u32 h264bsdShowBits32(strmData_t *pStrmData); + +u32 h264bsdFlushBits(strmData_t *pStrmData, u32 numBits); + +u32 h264bsdIsByteAligned(strmData_t *); + +#endif /* #ifdef H264SWDEC_STREAM_H */ + diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_transform.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_transform.c new file mode 100755 index 0000000000000000000000000000000000000000..4eb6dd09013f60fc37803a5a708eb71aa3ec63f9 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_transform.c @@ -0,0 +1,402 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. External compiler flags + 3. Module defines + 4. Local function prototypes + 5. Functions + h264bsdProcessBlock + h264bsdProcessLumaDc + h264bsdProcessChromaDc + +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#include "basetype.h" +#include "h264bsd_transform.h" +#include "h264bsd_util.h" + +/*------------------------------------------------------------------------------ + 2. External compiler flags +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- + 3. Module defines +------------------------------------------------------------------------------*/ + +/* Switch off the following Lint messages for this file: + * Info 701: Shift left of signed quantity (int) + * Info 702: Shift right of signed quantity (int) + */ +/*lint -e701 -e702 */ + +/* LevelScale function */ +static const i32 levelScale[6][3] = { + {10,13,16}, {11,14,18}, {13,16,20}, {14,18,23}, {16,20,25}, {18,23,29}}; + +/* qp % 6 as a function of qp */ +static const u8 qpMod6[52] = {0,1,2,3,4,5,0,1,2,3,4,5,0,1,2,3,4,5,0,1,2,3,4,5, + 0,1,2,3,4,5,0,1,2,3,4,5,0,1,2,3,4,5,0,1,2,3,4,5,0,1,2,3}; + +/* qp / 6 as a function of qp */ +static const u8 qpDiv6[52] = {0,0,0,0,0,0,1,1,1,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3, + 4,4,4,4,4,4,5,5,5,5,5,5,6,6,6,6,6,6,7,7,7,7,7,7,8,8,8,8}; + +/*------------------------------------------------------------------------------ + 4. Local function prototypes +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + + Function: h264bsdProcessBlock + + Functional description: + Function performs inverse zig-zag scan, inverse scaling and + inverse transform for a luma or a chroma residual block + + Inputs: + data pointer to data to be processed + qp quantization parameter + skip skip processing of data[0], set to non-zero value + if dc coeff hanled separately + coeffMap 16 lsb's indicate which coeffs are non-zero, + bit 0 (lsb) for coeff 0, bit 1 for coeff 1 etc. + + Outputs: + data processed data + + Returns: + HANTRO_OK success + HANTRO_NOK processed data not in valid range [-512, 511] + +------------------------------------------------------------------------------*/ +u32 h264bsdProcessBlock(i32 *data, u32 qp, u32 skip, u32 coeffMap) +{ + +/* Variables */ + + i32 tmp0, tmp1, tmp2, tmp3; + i32 d1, d2, d3; + u32 row,col; + u32 qpDiv; + i32 *ptr; + +/* Code */ + + qpDiv = qpDiv6[qp]; + tmp1 = levelScale[qpMod6[qp]][0] << qpDiv; + tmp2 = levelScale[qpMod6[qp]][1] << qpDiv; + tmp3 = levelScale[qpMod6[qp]][2] << qpDiv; + + if (!skip) + data[0] = (data[0] * tmp1); + + /* at least one of the rows 1, 2 or 3 contain non-zero coeffs, mask takes + * the scanning order into account */ + if (coeffMap & 0xFF9C) + { + /* do the zig-zag scan and inverse quantization */ + d1 = data[1]; + d2 = data[14]; + d3 = data[15]; + data[1] = (d1 * tmp2); + data[14] = (d2 * tmp2); + data[15] = (d3 * tmp3); + + d1 = data[2]; + d2 = data[5]; + d3 = data[4]; + data[4] = (d1 * tmp2); + data[2] = (d2 * tmp1); + data[5] = (d3 * tmp3); + + d1 = data[8]; + d2 = data[3]; + d3 = data[6]; + tmp0 = (d1 * tmp2); + data[8] = (d2 * tmp1); + data[3] = (d3 * tmp2); + d1 = data[7]; + d2 = data[12]; + d3 = data[9]; + data[6] = (d1 * tmp2); + data[7] = (d2 * tmp3); + data[12] = (d3 * tmp2); + data[9] = tmp0; + + d1 = data[10]; + d2 = data[11]; + d3 = data[13]; + data[13] = (d1 * tmp3); + data[10] = (d2 * tmp1); + data[11] = (d3 * tmp2); + + /* horizontal transform */ + for (row = 4, ptr = data; row--; ptr += 4) + { + tmp0 = ptr[0] + ptr[2]; + tmp1 = ptr[0] - ptr[2]; + tmp2 = (ptr[1] >> 1) - ptr[3]; + tmp3 = ptr[1] + (ptr[3] >> 1); + ptr[0] = tmp0 + tmp3; + ptr[1] = tmp1 + tmp2; + ptr[2] = tmp1 - tmp2; + ptr[3] = tmp0 - tmp3; + } + + /*lint +e661 +e662*/ + /* then vertical transform */ + for (col = 4; col--; data++) + { + tmp0 = data[0] + data[8]; + tmp1 = data[0] - data[8]; + tmp2 = (data[4] >> 1) - data[12]; + tmp3 = data[4] + (data[12] >> 1); + data[0 ] = (tmp0 + tmp3 + 32)>>6; + data[4 ] = (tmp1 + tmp2 + 32)>>6; + data[8 ] = (tmp1 - tmp2 + 32)>>6; + data[12] = (tmp0 - tmp3 + 32)>>6; + /* check that each value is in the range [-512,511] */ + if (((u32)(data[0] + 512) > 1023) || + ((u32)(data[4] + 512) > 1023) || + ((u32)(data[8] + 512) > 1023) || + ((u32)(data[12] + 512) > 1023) ) + return(HANTRO_NOK); + } + } + else /* rows 1, 2 and 3 are zero */ + { + /* only dc-coeff is non-zero, i.e. coeffs at original positions + * 1, 5 and 6 are zero */ + if ((coeffMap & 0x62) == 0) + { + tmp0 = (data[0] + 32) >> 6; + /* check that value is in the range [-512,511] */ + if ((u32)(tmp0 + 512) > 1023) + return(HANTRO_NOK); + data[0] = data[1] = data[2] = data[3] = data[4] = data[5] = + data[6] = data[7] = data[8] = data[9] = data[10] = + data[11] = data[12] = data[13] = data[14] = data[15] = + tmp0; + } + else /* at least one of the coeffs 1, 5 or 6 is non-zero */ + { + data[1] = (data[1] * tmp2); + data[2] = (data[5] * tmp1); + data[3] = (data[6] * tmp2); + tmp0 = data[0] + data[2]; + tmp1 = data[0] - data[2]; + tmp2 = (data[1] >> 1) - data[3]; + tmp3 = data[1] + (data[3] >> 1); + data[0] = (tmp0 + tmp3 + 32)>>6; + data[1] = (tmp1 + tmp2 + 32)>>6; + data[2] = (tmp1 - tmp2 + 32)>>6; + data[3] = (tmp0 - tmp3 + 32)>>6; + data[4] = data[8] = data[12] = data[0]; + data[5] = data[9] = data[13] = data[1]; + data[6] = data[10] = data[14] = data[2]; + data[7] = data[11] = data[15] = data[3]; + /* check that each value is in the range [-512,511] */ + if (((u32)(data[0] + 512) > 1023) || + ((u32)(data[1] + 512) > 1023) || + ((u32)(data[2] + 512) > 1023) || + ((u32)(data[3] + 512) > 1023) ) + return(HANTRO_NOK); + } + } + + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdProcessLumaDc + + Functional description: + Function performs inverse zig-zag scan, inverse transform and + inverse scaling for a luma DC coefficients block + + Inputs: + data pointer to data to be processed + qp quantization parameter + + Outputs: + data processed data + + Returns: + none + +------------------------------------------------------------------------------*/ +void h264bsdProcessLumaDc(i32 *data, u32 qp) +{ + +/* Variables */ + + i32 tmp0, tmp1, tmp2, tmp3; + u32 row,col; + u32 qpMod, qpDiv; + i32 levScale; + i32 *ptr; + +/* Code */ + + qpMod = qpMod6[qp]; + qpDiv = qpDiv6[qp]; + + /* zig-zag scan */ + tmp0 = data[2]; + data[2] = data[5]; + data[5] = data[4]; + data[4] = tmp0; + + tmp0 = data[8]; + data[8] = data[3]; + data[3] = data[6]; + data[6] = data[7]; + data[7] = data[12]; + data[12] = data[9]; + data[9] = tmp0; + + tmp0 = data[10]; + data[10] = data[11]; + data[11] = data[13]; + data[13] = tmp0; + + /* horizontal transform */ + for (row = 4, ptr = data; row--; ptr += 4) + { + tmp0 = ptr[0] + ptr[2]; + tmp1 = ptr[0] - ptr[2]; + tmp2 = ptr[1] - ptr[3]; + tmp3 = ptr[1] + ptr[3]; + ptr[0] = tmp0 + tmp3; + ptr[1] = tmp1 + tmp2; + ptr[2] = tmp1 - tmp2; + ptr[3] = tmp0 - tmp3; + } + + /*lint +e661 +e662*/ + /* then vertical transform and inverse scaling */ + levScale = levelScale[ qpMod ][0]; + if (qp >= 12) + { + levScale <<= (qpDiv-2); + for (col = 4; col--; data++) + { + tmp0 = data[0] + data[8 ]; + tmp1 = data[0] - data[8 ]; + tmp2 = data[4] - data[12]; + tmp3 = data[4] + data[12]; + data[0 ] = ((tmp0 + tmp3)*levScale); + data[4 ] = ((tmp1 + tmp2)*levScale); + data[8 ] = ((tmp1 - tmp2)*levScale); + data[12] = ((tmp0 - tmp3)*levScale); + } + } + else + { + i32 tmp; + tmp = ((1 - qpDiv) == 0) ? 1 : 2; + for (col = 4; col--; data++) + { + tmp0 = data[0] + data[8 ]; + tmp1 = data[0] - data[8 ]; + tmp2 = data[4] - data[12]; + tmp3 = data[4] + data[12]; + data[0 ] = ((tmp0 + tmp3)*levScale+tmp) >> (2-qpDiv); + data[4 ] = ((tmp1 + tmp2)*levScale+tmp) >> (2-qpDiv); + data[8 ] = ((tmp1 - tmp2)*levScale+tmp) >> (2-qpDiv); + data[12] = ((tmp0 - tmp3)*levScale+tmp) >> (2-qpDiv); + } + } + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdProcessChromaDc + + Functional description: + Function performs inverse transform and inverse scaling for a + chroma DC coefficients block + + Inputs: + data pointer to data to be processed + qp quantization parameter + + Outputs: + data processed data + + Returns: + none + +------------------------------------------------------------------------------*/ +void h264bsdProcessChromaDc(i32 *data, u32 qp) +{ + +/* Variables */ + + i32 tmp0, tmp1, tmp2, tmp3; + u32 qpDiv; + i32 levScale; + u32 levShift; + +/* Code */ + + qpDiv = qpDiv6[qp]; + levScale = levelScale[ qpMod6[qp] ][0]; + + if (qp >= 6) + { + levScale <<= (qpDiv-1); + levShift = 0; + } + else + { + levShift = 1; + } + + tmp0 = data[0] + data[2]; + tmp1 = data[0] - data[2]; + tmp2 = data[1] - data[3]; + tmp3 = data[1] + data[3]; + data[0] = ((tmp0 + tmp3) * levScale) >> levShift; + data[1] = ((tmp0 - tmp3) * levScale) >> levShift; + data[2] = ((tmp1 + tmp2) * levScale) >> levShift; + data[3] = ((tmp1 - tmp2) * levScale) >> levShift; + tmp0 = data[4] + data[6]; + tmp1 = data[4] - data[6]; + tmp2 = data[5] - data[7]; + tmp3 = data[5] + data[7]; + data[4] = ((tmp0 + tmp3) * levScale) >> levShift; + data[5] = ((tmp0 - tmp3) * levScale) >> levShift; + data[6] = ((tmp1 + tmp2) * levScale) >> levShift; + data[7] = ((tmp1 - tmp2) * levScale) >> levShift; + +} + +/*lint +e701 +e702 */ + + diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_transform.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_transform.h new file mode 100755 index 0000000000000000000000000000000000000000..4f41a23e46636d7b2dd357b0943f6637896e0ef8 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_transform.h @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. Module defines + 3. Data types + 4. Function prototypes + +------------------------------------------------------------------------------*/ + +#ifndef H264SWDEC_TRANSFORM_H +#define H264SWDEC_TRANSFORM_H + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#include "basetype.h" + +/*------------------------------------------------------------------------------ + 2. Module defines +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 3. Data types +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 4. Function prototypes +------------------------------------------------------------------------------*/ + +u32 h264bsdProcessBlock(i32 *data, u32 qp, u32 skip, u32 coeffMap); +void h264bsdProcessLumaDc(i32 *data, u32 qp); +void h264bsdProcessChromaDc(i32 *data, u32 qp); + +#endif /* #ifdef H264SWDEC_TRANSFORM_H */ + diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_util.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_util.c new file mode 100755 index 0000000000000000000000000000000000000000..53b2fd8e53df029f09fa9cfab2ac017881ca9ac6 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_util.c @@ -0,0 +1,286 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. External compiler flags + 3. Module defines + 4. Local function prototypes + 5. Functions + h264bsdCountLeadingZeros + h264bsdRbspTrailingBits + h264bsdMoreRbspData + h264bsdNextMbAddress + h264bsdSetCurrImageMbPointers + +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#include "h264bsd_util.h" + +/*------------------------------------------------------------------------------ + 2. External compiler flags +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- + 3. Module defines +------------------------------------------------------------------------------*/ + +/* look-up table for expected values of stuffing bits */ +static const u32 stuffingTable[8] = {0x1,0x2,0x4,0x8,0x10,0x20,0x40,0x80}; + +/* look-up table for chroma quantization parameter as a function of luma QP */ +const u32 h264bsdQpC[52] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19, + 20,21,22,23,24,25,26,27,28,29,29,30,31,32,32,33,34,34,35,35,36,36,37,37,37, + 38,38,38,39,39,39,39}; + +/*------------------------------------------------------------------------------ + 4. Local function prototypes +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + + 5.1 Function: h264bsdCountLeadingZeros + + Functional description: + Count leading zeros in a code word. Code word is assumed to be + right-aligned, last bit of the code word in the lsb of the value. + + Inputs: + value code word + length number of bits in the code word + + Outputs: + none + + Returns: + number of leading zeros in the code word + +------------------------------------------------------------------------------*/ +#ifndef H264DEC_NEON +u32 h264bsdCountLeadingZeros(u32 value, u32 length) +{ + +/* Variables */ + + u32 zeros = 0; + u32 mask = 1 << (length - 1); + +/* Code */ + + ASSERT(length <= 32); + + while (mask && !(value & mask)) + { + zeros++; + mask >>= 1; + } + return(zeros); + +} +#endif +/*------------------------------------------------------------------------------ + + 5.2 Function: h264bsdRbspTrailingBits + + Functional description: + Check Raw Byte Stream Payload (RBSP) trailing bits, i.e. stuffing. + Rest of the current byte (whole byte if allready byte aligned) + in the stream buffer shall contain a '1' bit followed by zero or + more '0' bits. + + Inputs: + pStrmData pointer to stream data structure + + Outputs: + none + + Returns: + HANTRO_OK RBSP trailing bits found + HANTRO_NOK otherwise + +------------------------------------------------------------------------------*/ + +u32 h264bsdRbspTrailingBits(strmData_t *pStrmData) +{ + +/* Variables */ + + u32 stuffing; + u32 stuffingLength; + +/* Code */ + + ASSERT(pStrmData); + ASSERT(pStrmData->bitPosInWord < 8); + + stuffingLength = 8 - pStrmData->bitPosInWord; + + stuffing = h264bsdGetBits(pStrmData, stuffingLength); + if (stuffing == END_OF_STREAM) + return(HANTRO_NOK); + + if (stuffing != stuffingTable[stuffingLength - 1]) + return(HANTRO_NOK); + else + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + 5.3 Function: h264bsdMoreRbspData + + Functional description: + Check if there is more data in the current RBSP. The standard + defines this function so that there is more data if + -more than 8 bits left or + -last bits are not RBSP trailing bits + + Inputs: + pStrmData pointer to stream data structure + + Outputs: + none + + Returns: + HANTRO_TRUE there is more data + HANTRO_FALSE no more data + +------------------------------------------------------------------------------*/ + +u32 h264bsdMoreRbspData(strmData_t *pStrmData) +{ + +/* Variables */ + + u32 bits; + +/* Code */ + + ASSERT(pStrmData); + ASSERT(pStrmData->strmBuffReadBits <= 8 * pStrmData->strmBuffSize); + + bits = pStrmData->strmBuffSize * 8 - pStrmData->strmBuffReadBits; + + if (bits == 0) + return(HANTRO_FALSE); + + if ( (bits > 8) || + ((h264bsdShowBits32(pStrmData)>>(32-bits)) != (1 << (bits-1))) ) + return(HANTRO_TRUE); + else + return(HANTRO_FALSE); + +} + +/*------------------------------------------------------------------------------ + + 5.4 Function: h264bsdNextMbAddress + + Functional description: + Get address of the next macroblock in the current slice group. + + Inputs: + pSliceGroupMap slice group for each macroblock + picSizeInMbs size of the picture + currMbAddr where to start + + Outputs: + none + + Returns: + address of the next macroblock + 0 if none of the following macroblocks belong to same slice + group as currMbAddr + +------------------------------------------------------------------------------*/ + +u32 h264bsdNextMbAddress(u32 *pSliceGroupMap, u32 picSizeInMbs, u32 currMbAddr) +{ + +/* Variables */ + + u32 i, sliceGroup, tmp; + +/* Code */ + + ASSERT(pSliceGroupMap); + ASSERT(picSizeInMbs); + ASSERT(currMbAddr < picSizeInMbs); + + sliceGroup = pSliceGroupMap[currMbAddr]; + + i = currMbAddr + 1; + tmp = pSliceGroupMap[i]; + while ((i < picSizeInMbs) && (tmp != sliceGroup)) + { + i++; + tmp = pSliceGroupMap[i]; + } + + if (i == picSizeInMbs) + i = 0; + + return(i); + +} + + +/*------------------------------------------------------------------------------ + + 5.5 Function: h264bsdSetCurrImageMbPointers + + Functional description: + Set luma and chroma pointers in image_t for current MB + + Inputs: + image Current image + mbNum number of current MB + + Outputs: + none + + Returns: + none +------------------------------------------------------------------------------*/ +void h264bsdSetCurrImageMbPointers(image_t *image, u32 mbNum) +{ + u32 width, height; + u32 picSize; + u32 row, col; + u32 tmp; + + width = image->width; + height = image->height; + row = mbNum / width; + col = mbNum % width; + + tmp = row * width; + picSize = width * height; + + image->luma = (u8*)(image->data + col * 16 + tmp * 256); + image->cb = (u8*)(image->data + picSize * 256 + tmp * 64 + col * 8); + image->cr = (u8*)(image->cb + picSize * 64); +} + + diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_util.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_util.h new file mode 100755 index 0000000000000000000000000000000000000000..cb3adda7ee456a5f6b8738b431e0da3a9b6310c0 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_util.h @@ -0,0 +1,178 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. Module defines + 3. Data types + 4. Function prototypes + +------------------------------------------------------------------------------*/ + +#ifndef H264SWDEC_UTIL_H +#define H264SWDEC_UTIL_H + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#ifdef _ASSERT_USED +#include +#endif + +#include "H264SwDecApi.h" + +#if defined(_RANGE_CHECK) || defined(_DEBUG_PRINT) || defined(_ERROR_PRINT) +#include +#endif + +#include "basetype.h" +#include "h264bsd_stream.h" +#include "h264bsd_image.h" + +/*------------------------------------------------------------------------------ + 2. Module defines +------------------------------------------------------------------------------*/ + +#define HANTRO_OK 0 +#define HANTRO_NOK 1 + +#define HANTRO_TRUE (1) +#define HANTRO_FALSE (0) + +#ifndef NULL +#define NULL 0 +#endif + +#define MEMORY_ALLOCATION_ERROR 0xFFFF +#define PARAM_SET_ERROR 0xFFF0 + +/* value to be returned by GetBits if stream buffer is empty */ +#define END_OF_STREAM 0xFFFFFFFFU + +#define EMPTY_RESIDUAL_INDICATOR 0xFFFFFF + +/* macro to mark a residual block empty, i.e. contain zero coefficients */ +#define MARK_RESIDUAL_EMPTY(residual) ((residual)[0] = EMPTY_RESIDUAL_INDICATOR) +/* macro to check if residual block is empty */ +#define IS_RESIDUAL_EMPTY(residual) ((residual)[0] == EMPTY_RESIDUAL_INDICATOR) + +/* macro for assertion, used only if compiler flag _ASSERT_USED is defined */ +#ifdef _ASSERT_USED +#define ASSERT(expr) assert(expr) +#else +#define ASSERT(expr) +#endif + +/* macro for range checking an value, used only if compiler flag _RANGE_CHECK + * is defined */ +#ifdef _RANGE_CHECK +#define RANGE_CHECK(value, minBound, maxBound) \ +{ \ + if ((value) < (minBound) || (value) > (maxBound)) \ + fprintf(stderr, "Warning: Value exceeds given limit(s)!\n"); \ +} +#else +#define RANGE_CHECK(value, minBound, maxBound) +#endif + +/* macro for range checking an array, used only if compiler flag _RANGE_CHECK + * is defined */ +#ifdef _RANGE_CHECK +#define RANGE_CHECK_ARRAY(array, minBound, maxBound, length) \ +{ \ + i32 i; \ + for (i = 0; i < (length); i++) \ + if ((array)[i] < (minBound) || (array)[i] > (maxBound)) \ + fprintf(stderr,"Warning: Value [%d] exceeds given limit(s)!\n",i); \ +} +#else +#define RANGE_CHECK_ARRAY(array, minBound, maxBound, length) +#endif + +/* macro for debug printing, used only if compiler flag _DEBUG_PRINT is + * defined */ +#ifdef _DEBUG_PRINT +#define DEBUG(args) printf args +#else +#define DEBUG(args) +#endif + +/* macro for error printing, used only if compiler flag _ERROR_PRINT is + * defined */ +#ifdef _ERROR_PRINT +#define EPRINT(msg) fprintf(stderr,"ERROR: %s\n",msg) +#else +#define EPRINT(msg) +#endif + +/* macro to get smaller of two values */ +#define MIN(a, b) (((a) < (b)) ? (a) : (b)) + +/* macro to get greater of two values */ +#define MAX(a, b) (((a) > (b)) ? (a) : (b)) + +/* macro to get absolute value */ +#define ABS(a) (((a) < 0) ? -(a) : (a)) + +/* macro to clip a value z, so that x <= z =< y */ +#define CLIP3(x,y,z) (((z) < (x)) ? (x) : (((z) > (y)) ? (y) : (z))) + +/* macro to clip a value z, so that 0 <= z =< 255 */ +#define CLIP1(z) (((z) < 0) ? 0 : (((z) > 255) ? 255 : (z))) + +/* macro to allocate memory */ +#define ALLOCATE(ptr, count, type) \ +{ \ + (ptr) = H264SwDecMalloc((count) * sizeof(type)); \ +} + +/* macro to free allocated memory */ +#define FREE(ptr) \ +{ \ + H264SwDecFree((ptr)); (ptr) = NULL; \ +} + +#define ALIGN(ptr, bytePos) \ + (ptr + ( ((bytePos - (int)ptr) & (bytePos - 1)) / sizeof(*ptr) )) + +extern const u32 h264bsdQpC[52]; + +/*------------------------------------------------------------------------------ + 3. Data types +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 4. Function prototypes +------------------------------------------------------------------------------*/ +#ifndef H264DEC_NEON +u32 h264bsdCountLeadingZeros(u32 value, u32 length); +#else +u32 h264bsdCountLeadingZeros(u32 value); +#endif +u32 h264bsdRbspTrailingBits(strmData_t *strmData); + +u32 h264bsdMoreRbspData(strmData_t *strmData); + +u32 h264bsdNextMbAddress(u32 *pSliceGroupMap, u32 picSizeInMbs, u32 currMbAddr); + +void h264bsdSetCurrImageMbPointers(image_t *image, u32 mbNum); + +#endif /* #ifdef H264SWDEC_UTIL_H */ + diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_vlc.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_vlc.c new file mode 100755 index 0000000000000000000000000000000000000000..060f35e3102ac65745e36dae8ed0bb85f8882f59 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_vlc.c @@ -0,0 +1,391 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. External compiler flags + 3. Module defines + 4. Local function prototypes + 5. Functions + h264bsdDecodeExpGolombUnsigned + h264bsdDecodeExpGolombSigned + h264bsdDecodeExpGolombMapped + h264bsdDecodeExpGolombTruncated + +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#include "h264bsd_vlc.h" +#include "basetype.h" +#include "h264bsd_stream.h" +#include "h264bsd_util.h" + +/*------------------------------------------------------------------------------ + 2. External compiler flags +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- + 3. Module defines +------------------------------------------------------------------------------*/ + +/* definition of special code num, this along with the return value is used + * to handle code num in the range [0, 2^32] in the DecodeExpGolombUnsigned + * function */ +#define BIG_CODE_NUM 0xFFFFFFFFU + +/* Mapping tables for coded_block_pattern, used for decoding of mapped + * Exp-Golomb codes */ +static const u8 codedBlockPatternIntra4x4[48] = { + 47,31,15,0,23,27,29,30,7,11,13,14,39,43,45,46,16,3,5,10,12,19,21,26,28,35, + 37,42,44,1,2,4,8,17,18,20,24,6,9,22,25,32,33,34,36,40,38,41}; + +static const u8 codedBlockPatternInter[48] = { + 0,16,1,2,4,8,32,3,5,10,12,15,47,7,11,13,14,6,9,31,35,37,42,44,33,34,36,40, + 39,43,45,46,17,18,20,24,19,21,26,28,23,27,29,30,22,25,38,41}; + +/*------------------------------------------------------------------------------ + 4. Local function prototypes +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + + 5.1 Function: h264bsdDecodeExpGolombUnsigned + + Functional description: + Decode unsigned Exp-Golomb code. This is the same as codeNum used + in other Exp-Golomb code mappings. Code num (i.e. the decoded + symbol) is determined as + + codeNum = 2^leadingZeros - 1 + GetBits(leadingZeros) + + Normal decoded symbols are in the range [0, 2^32 - 2]. Symbol + 2^32-1 is indicated by BIG_CODE_NUM with return value HANTRO_OK + while symbol 2^32 is indicated by BIG_CODE_NUM with return value + HANTRO_NOK. These two symbols are special cases with code length + of 65, i.e. 32 '0' bits, a '1' bit, and either 0 or 1 represented + by 32 bits. + + Symbol 2^32 is out of unsigned 32-bit range but is needed for + DecodeExpGolombSigned to express value -2^31. + + Inputs: + pStrmData pointer to stream data structure + + Outputs: + codeNum decoded code word is stored here + + Returns: + HANTRO_OK success + HANTRO_NOK failure, no valid code word found, note exception + with BIG_CODE_NUM + +------------------------------------------------------------------------------*/ + +u32 h264bsdDecodeExpGolombUnsigned(strmData_t *pStrmData, u32 *codeNum) +{ + +/* Variables */ + + u32 bits, numZeros; + +/* Code */ + + ASSERT(pStrmData); + ASSERT(codeNum); + + bits = h264bsdShowBits32(pStrmData); + + /* first bit is 1 -> code length 1 */ + if (bits >= 0x80000000) + { + h264bsdFlushBits(pStrmData, 1); + *codeNum = 0; + return(HANTRO_OK); + } + /* second bit is 1 -> code length 3 */ + else if (bits >= 0x40000000) + { + if (h264bsdFlushBits(pStrmData, 3) == END_OF_STREAM) + return(HANTRO_NOK); + *codeNum = 1 + ((bits >> 29) & 0x1); + return(HANTRO_OK); + } + /* third bit is 1 -> code length 5 */ + else if (bits >= 0x20000000) + { + if (h264bsdFlushBits(pStrmData, 5) == END_OF_STREAM) + return(HANTRO_NOK); + *codeNum = 3 + ((bits >> 27) & 0x3); + return(HANTRO_OK); + } + /* fourth bit is 1 -> code length 7 */ + else if (bits >= 0x10000000) + { + if (h264bsdFlushBits(pStrmData, 7) == END_OF_STREAM) + return(HANTRO_NOK); + *codeNum = 7 + ((bits >> 25) & 0x7); + return(HANTRO_OK); + } + /* other code lengths */ + else + { +#ifndef H264DEC_NEON + numZeros = 4 + h264bsdCountLeadingZeros(bits, 28); +#else + numZeros = h264bsdCountLeadingZeros(bits); +#endif + /* all 32 bits are zero */ + if (numZeros == 32) + { + *codeNum = 0; + h264bsdFlushBits(pStrmData,32); + bits = h264bsdGetBits(pStrmData, 1); + /* check 33rd bit, must be 1 */ + if (bits == 1) + { + /* cannot use h264bsdGetBits, limited to 31 bits */ + bits = h264bsdShowBits32(pStrmData); + if (h264bsdFlushBits(pStrmData, 32) == END_OF_STREAM) + return(HANTRO_NOK); + /* code num 2^32 - 1, needed for unsigned mapping */ + if (bits == 0) + { + *codeNum = BIG_CODE_NUM; + return(HANTRO_OK); + } + /* code num 2^32, needed for unsigned mapping + * (results in -2^31) */ + else if (bits == 1) + { + *codeNum = BIG_CODE_NUM; + return(HANTRO_NOK); + } + } + /* if more zeros than 32, it is an error */ + return(HANTRO_NOK); + } + else + h264bsdFlushBits(pStrmData,numZeros+1); + + bits = h264bsdGetBits(pStrmData, numZeros); + if (bits == END_OF_STREAM) + return(HANTRO_NOK); + + *codeNum = (1 << numZeros) - 1 + bits; + + } + + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + 5.2 Function: h264bsdDecodeExpGolombSigned + + Functional description: + Decode signed Exp-Golomb code. Code num is determined by + h264bsdDecodeExpGolombUnsigned and then mapped to signed + representation as + + symbol = (-1)^(codeNum+1) * (codeNum+1)/2 + + Signed symbols shall be in the range [-2^31, 2^31 - 1]. Symbol + -2^31 is obtained when codeNum is 2^32, which cannot be expressed + by unsigned 32-bit value. This is signaled as a special case from + the h264bsdDecodeExpGolombUnsigned by setting codeNum to + BIG_CODE_NUM and returning HANTRO_NOK status. + + Inputs: + pStrmData pointer to stream data structure + + Outputs: + value decoded code word is stored here + + Returns: + HANTRO_OK success + HANTRO_NOK failure, no valid code word found + +------------------------------------------------------------------------------*/ + +u32 h264bsdDecodeExpGolombSigned(strmData_t *pStrmData, i32 *value) +{ + +/* Variables */ + + u32 status, codeNum = 0; + +/* Code */ + + ASSERT(pStrmData); + ASSERT(value); + + status = h264bsdDecodeExpGolombUnsigned(pStrmData, &codeNum); + + if (codeNum == BIG_CODE_NUM) + { + /* BIG_CODE_NUM and HANTRO_OK status means codeNum 2^32-1 which would + * result in signed integer valued 2^31 (i.e. out of 32-bit signed + * integer range) */ + if (status == HANTRO_OK) + return(HANTRO_NOK); + /* BIG_CODE_NUM and HANTRO_NOK status means codeNum 2^32 which results + * in signed integer valued -2^31 */ + else + { + *value = (i32)(2147483648U); + return (HANTRO_OK); + } + } + else if (status == HANTRO_OK) + { + /* (-1)^(codeNum+1) results in positive sign if codeNum is odd, + * negative when it is even. (codeNum+1)/2 is obtained as + * (codeNum+1)>>1 when value is positive and as (-codeNum)>>1 for + * negative value */ + /*lint -e702 */ + *value = (codeNum & 0x1) ? (i32)((codeNum + 1) >> 1) : + -(i32)((codeNum + 1) >> 1); + /*lint +e702 */ + return(HANTRO_OK); + } + + return(HANTRO_NOK); + +} + +/*------------------------------------------------------------------------------ + + 5.3 Function: h264bsdDecodeExpGolombMapped + + Functional description: + Decode mapped Exp-Golomb code. Code num is determined by + h264bsdDecodeExpGolombUnsigned and then mapped to codedBlockPattern + either for intra or inter macroblock. The mapping is implemented by + look-up tables defined in the beginning of the file. + + Inputs: + pStrmData pointer to stream data structure + isIntra flag to indicate if intra or inter mapping is to + be used + + Outputs: + value decoded code word is stored here + + Returns: + HANTRO_OK success + HANTRO_NOK failure, no valid code word found + +------------------------------------------------------------------------------*/ + +u32 h264bsdDecodeExpGolombMapped(strmData_t *pStrmData, u32 *value, + u32 isIntra) +{ + +/* Variables */ + + u32 status, codeNum; + +/* Code */ + + ASSERT(pStrmData); + ASSERT(value); + + status = h264bsdDecodeExpGolombUnsigned(pStrmData, &codeNum); + + if (status != HANTRO_OK) + return (HANTRO_NOK); + else + { + /* range of valid codeNums [0,47] */ + if (codeNum > 47) + return (HANTRO_NOK); + if (isIntra) + *value = codedBlockPatternIntra4x4[codeNum]; + else + *value = codedBlockPatternInter[codeNum]; + return(HANTRO_OK); + } + +} + +/*------------------------------------------------------------------------------ + + 5.4 Function: h264bsdDecodeExpGolombTruncated + + Functional description: + Decode truncated Exp-Golomb code. greaterThanOne flag indicates + the range of the symbol to be decoded as follows: + FALSE -> [0,1] + TRUE -> [0,2^32-1] + + If flag is false the decoding is performed by reading one bit + from the stream with h264bsdGetBits and mapping this to decoded + symbol as + symbol = bit ? 0 : 1 + + Otherwise, i.e. when flag is TRUE, code num is determined by + h264bsdDecodeExpGolombUnsigned and this is used as the decoded + symbol. + + Inputs: + pStrmData pointer to stream data structure + greaterThanOne flag to indicate if range is wider than [0,1] + + Outputs: + value decoded code word is stored here + + Returns: + HANTRO_OK success + HANTRO_NOK failure, no valid code word found + +------------------------------------------------------------------------------*/ + +u32 h264bsdDecodeExpGolombTruncated( + strmData_t *pStrmData, + u32 *value, + u32 greaterThanOne) +{ + +/* Variables */ + +/* Code */ + + ASSERT(pStrmData); + ASSERT(value); + + if (greaterThanOne) + { + return(h264bsdDecodeExpGolombUnsigned(pStrmData, value)); + } + else + { + *value = h264bsdGetBits(pStrmData,1); + if (*value == END_OF_STREAM) + return (HANTRO_NOK); + *value ^= 0x1; + } + + return (HANTRO_OK); + +} + diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_vlc.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_vlc.h new file mode 100755 index 0000000000000000000000000000000000000000..4c16773ca7d91b2576104e4cda165c10af00c79f --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_vlc.h @@ -0,0 +1,63 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. Module defines + 3. Data types + 4. Function prototypes + +------------------------------------------------------------------------------*/ + +#ifndef H264SWDEC_VLC_H +#define H264SWDEC_VLC_H + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#include "basetype.h" +#include "h264bsd_stream.h" +#include "h264bsd_transform.h" + +/*------------------------------------------------------------------------------ + 2. Module defines +------------------------------------------------------------------------------*/ + + +/*------------------------------------------------------------------------------ + 3. Data types +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 4. Function prototypes +------------------------------------------------------------------------------*/ + +u32 h264bsdDecodeExpGolombUnsigned(strmData_t *pStrmData, u32 *value); + +u32 h264bsdDecodeExpGolombSigned(strmData_t *pStrmData, i32 *value); + +u32 h264bsdDecodeExpGolombMapped(strmData_t *pStrmData, u32 *value, + u32 isIntra); + +u32 h264bsdDecodeExpGolombTruncated(strmData_t *pStrmData, u32 *value, + u32 greaterThanOne); + +#endif /* #ifdef H264SWDEC_VLC_H */ + diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_vui.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_vui.c new file mode 100755 index 0000000000000000000000000000000000000000..4a9335ad71440e3f746244529368676a48b4e252 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_vui.c @@ -0,0 +1,490 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. External compiler flags + 3. Module defines + 4. Local function prototypes + 5. Functions + h264bsdDecodeVuiParameters + DecodeHrdParameters + +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#include "h264bsd_vui.h" +#include "basetype.h" +#include "h264bsd_vlc.h" +#include "h264bsd_stream.h" +#include "h264bsd_util.h" + +/*------------------------------------------------------------------------------ + 2. External compiler flags +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- + 3. Module defines +------------------------------------------------------------------------------*/ + +#define MAX_DPB_SIZE 16 +#define MAX_BR 240000 /* for level 5.1 */ +#define MAX_CPB 240000 /* for level 5.1 */ + +/*------------------------------------------------------------------------------ + 4. Local function prototypes +------------------------------------------------------------------------------*/ + +static u32 DecodeHrdParameters( + strmData_t *pStrmData, + hrdParameters_t *pHrdParameters); + +/*------------------------------------------------------------------------------ + + Function: h264bsdDecodeVuiParameters + + Functional description: + Decode VUI parameters from the stream. See standard for details. + + Inputs: + pStrmData pointer to stream data structure + + Outputs: + pVuiParameters decoded information is stored here + + Returns: + HANTRO_OK success + HANTRO_NOK invalid stream data or end of stream + +------------------------------------------------------------------------------*/ + +u32 h264bsdDecodeVuiParameters(strmData_t *pStrmData, + vuiParameters_t *pVuiParameters) +{ + +/* Variables */ + + u32 tmp; + +/* Code */ + + ASSERT(pStrmData); + ASSERT(pVuiParameters); + + H264SwDecMemset(pVuiParameters, 0, sizeof(vuiParameters_t)); + + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pVuiParameters->aspectRatioPresentFlag = (tmp == 1) ? + HANTRO_TRUE : HANTRO_FALSE; + + if (pVuiParameters->aspectRatioPresentFlag) + { + tmp = h264bsdGetBits(pStrmData, 8); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pVuiParameters->aspectRatioIdc = tmp; + + if (pVuiParameters->aspectRatioIdc == ASPECT_RATIO_EXTENDED_SAR) + { + tmp = h264bsdGetBits(pStrmData, 16); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pVuiParameters->sarWidth = tmp; + + tmp = h264bsdGetBits(pStrmData, 16); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pVuiParameters->sarHeight = tmp; + } + } + + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pVuiParameters->overscanInfoPresentFlag = (tmp == 1) ? + HANTRO_TRUE : HANTRO_FALSE; + + if (pVuiParameters->overscanInfoPresentFlag) + { + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pVuiParameters->overscanAppropriateFlag = (tmp == 1) ? + HANTRO_TRUE : HANTRO_FALSE; + } + + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pVuiParameters->videoSignalTypePresentFlag = (tmp == 1) ? + HANTRO_TRUE : HANTRO_FALSE; + + if (pVuiParameters->videoSignalTypePresentFlag) + { + tmp = h264bsdGetBits(pStrmData, 3); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pVuiParameters->videoFormat = tmp; + + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pVuiParameters->videoFullRangeFlag = (tmp == 1) ? + HANTRO_TRUE : HANTRO_FALSE; + + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pVuiParameters->colourDescriptionPresentFlag = + (tmp == 1) ? HANTRO_TRUE : HANTRO_FALSE; + + if (pVuiParameters->colourDescriptionPresentFlag) + { + tmp = h264bsdGetBits(pStrmData, 8); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pVuiParameters->colourPrimaries = tmp; + + tmp = h264bsdGetBits(pStrmData, 8); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pVuiParameters->transferCharacteristics = tmp; + + tmp = h264bsdGetBits(pStrmData, 8); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pVuiParameters->matrixCoefficients = tmp; + } + else + { + pVuiParameters->colourPrimaries = 2; + pVuiParameters->transferCharacteristics = 2; + pVuiParameters->matrixCoefficients = 2; + } + } + else + { + pVuiParameters->videoFormat = 5; + pVuiParameters->colourPrimaries = 2; + pVuiParameters->transferCharacteristics = 2; + pVuiParameters->matrixCoefficients = 2; + } + + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pVuiParameters->chromaLocInfoPresentFlag = + (tmp == 1) ? HANTRO_TRUE : HANTRO_FALSE; + + if (pVuiParameters->chromaLocInfoPresentFlag) + { + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, + &pVuiParameters->chromaSampleLocTypeTopField); + if (tmp != HANTRO_OK) + return(tmp); + if (pVuiParameters->chromaSampleLocTypeTopField > 5) + return(HANTRO_NOK); + + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, + &pVuiParameters->chromaSampleLocTypeBottomField); + if (tmp != HANTRO_OK) + return(tmp); + if (pVuiParameters->chromaSampleLocTypeBottomField > 5) + return(HANTRO_NOK); + } + + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pVuiParameters->timingInfoPresentFlag = + (tmp == 1) ? HANTRO_TRUE : HANTRO_FALSE; + + if (pVuiParameters->timingInfoPresentFlag) + { + tmp = h264bsdShowBits32(pStrmData); + if (h264bsdFlushBits(pStrmData, 32) == END_OF_STREAM) + return(HANTRO_NOK); + if (tmp == 0) + return(HANTRO_NOK); + pVuiParameters->numUnitsInTick = tmp; + + tmp = h264bsdShowBits32(pStrmData); + if (h264bsdFlushBits(pStrmData, 32) == END_OF_STREAM) + return(HANTRO_NOK); + if (tmp == 0) + return(HANTRO_NOK); + pVuiParameters->timeScale = tmp; + + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pVuiParameters->fixedFrameRateFlag = + (tmp == 1) ? HANTRO_TRUE : HANTRO_FALSE; + } + + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pVuiParameters->nalHrdParametersPresentFlag = + (tmp == 1) ? HANTRO_TRUE : HANTRO_FALSE; + + if (pVuiParameters->nalHrdParametersPresentFlag) + { + tmp = DecodeHrdParameters(pStrmData, &pVuiParameters->nalHrdParameters); + if (tmp != HANTRO_OK) + return(tmp); + } + else + { + pVuiParameters->nalHrdParameters.cpbCnt = 1; + /* MaxBR and MaxCPB should be the values correspondig to the levelIdc + * in the SPS containing these VUI parameters. However, these values + * are not used anywhere and maximum for any level will be used here */ + pVuiParameters->nalHrdParameters.bitRateValue[0] = 1200 * MAX_BR + 1; + pVuiParameters->nalHrdParameters.cpbSizeValue[0] = 1200 * MAX_CPB + 1; + pVuiParameters->nalHrdParameters.initialCpbRemovalDelayLength = 24; + pVuiParameters->nalHrdParameters.cpbRemovalDelayLength = 24; + pVuiParameters->nalHrdParameters.dpbOutputDelayLength = 24; + pVuiParameters->nalHrdParameters.timeOffsetLength = 24; + } + + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pVuiParameters->vclHrdParametersPresentFlag = + (tmp == 1) ? HANTRO_TRUE : HANTRO_FALSE; + + if (pVuiParameters->vclHrdParametersPresentFlag) + { + tmp = DecodeHrdParameters(pStrmData, &pVuiParameters->vclHrdParameters); + if (tmp != HANTRO_OK) + return(tmp); + } + else + { + pVuiParameters->vclHrdParameters.cpbCnt = 1; + /* MaxBR and MaxCPB should be the values correspondig to the levelIdc + * in the SPS containing these VUI parameters. However, these values + * are not used anywhere and maximum for any level will be used here */ + pVuiParameters->vclHrdParameters.bitRateValue[0] = 1000 * MAX_BR + 1; + pVuiParameters->vclHrdParameters.cpbSizeValue[0] = 1000 * MAX_CPB + 1; + pVuiParameters->vclHrdParameters.initialCpbRemovalDelayLength = 24; + pVuiParameters->vclHrdParameters.cpbRemovalDelayLength = 24; + pVuiParameters->vclHrdParameters.dpbOutputDelayLength = 24; + pVuiParameters->vclHrdParameters.timeOffsetLength = 24; + } + + if (pVuiParameters->nalHrdParametersPresentFlag || + pVuiParameters->vclHrdParametersPresentFlag) + { + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pVuiParameters->lowDelayHrdFlag = + (tmp == 1) ? HANTRO_TRUE : HANTRO_FALSE; + } + + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pVuiParameters->picStructPresentFlag = + (tmp == 1) ? HANTRO_TRUE : HANTRO_FALSE; + + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pVuiParameters->bitstreamRestrictionFlag = + (tmp == 1) ? HANTRO_TRUE : HANTRO_FALSE; + + if (pVuiParameters->bitstreamRestrictionFlag) + { + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pVuiParameters->motionVectorsOverPicBoundariesFlag = + (tmp == 1) ? HANTRO_TRUE : HANTRO_FALSE; + + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, + &pVuiParameters->maxBytesPerPicDenom); + if (tmp != HANTRO_OK) + return(tmp); + if (pVuiParameters->maxBytesPerPicDenom > 16) + return(HANTRO_NOK); + + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, + &pVuiParameters->maxBitsPerMbDenom); + if (tmp != HANTRO_OK) + return(tmp); + if (pVuiParameters->maxBitsPerMbDenom > 16) + return(HANTRO_NOK); + + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, + &pVuiParameters->log2MaxMvLengthHorizontal); + if (tmp != HANTRO_OK) + return(tmp); + if (pVuiParameters->log2MaxMvLengthHorizontal > 16) + return(HANTRO_NOK); + + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, + &pVuiParameters->log2MaxMvLengthVertical); + if (tmp != HANTRO_OK) + return(tmp); + if (pVuiParameters->log2MaxMvLengthVertical > 16) + return(HANTRO_NOK); + + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, + &pVuiParameters->numReorderFrames); + if (tmp != HANTRO_OK) + return(tmp); + + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, + &pVuiParameters->maxDecFrameBuffering); + if (tmp != HANTRO_OK) + return(tmp); + } + else + { + pVuiParameters->motionVectorsOverPicBoundariesFlag = HANTRO_TRUE; + pVuiParameters->maxBytesPerPicDenom = 2; + pVuiParameters->maxBitsPerMbDenom = 1; + pVuiParameters->log2MaxMvLengthHorizontal = 16; + pVuiParameters->log2MaxMvLengthVertical = 16; + pVuiParameters->numReorderFrames = MAX_DPB_SIZE; + pVuiParameters->maxDecFrameBuffering = MAX_DPB_SIZE; + } + + return(HANTRO_OK); + +} + +/*------------------------------------------------------------------------------ + + Function: DecodeHrdParameters + + Functional description: + Decode HRD parameters from the stream. See standard for details. + + Inputs: + pStrmData pointer to stream data structure + + Outputs: + pHrdParameters decoded information is stored here + + Returns: + HANTRO_OK success + HANTRO_NOK invalid stream data + +------------------------------------------------------------------------------*/ + +static u32 DecodeHrdParameters( + strmData_t *pStrmData, + hrdParameters_t *pHrdParameters) +{ + +/* Variables */ + + u32 tmp, i; + +/* Code */ + + ASSERT(pStrmData); + ASSERT(pHrdParameters); + + + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &pHrdParameters->cpbCnt); + if (tmp != HANTRO_OK) + return(tmp); + /* cpbCount = cpb_cnt_minus1 + 1 */ + pHrdParameters->cpbCnt++; + if (pHrdParameters->cpbCnt > MAX_CPB_CNT) + return(HANTRO_NOK); + + tmp = h264bsdGetBits(pStrmData, 4); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pHrdParameters->bitRateScale = tmp; + + tmp = h264bsdGetBits(pStrmData, 4); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pHrdParameters->cpbSizeScale = tmp; + + for (i = 0; i < pHrdParameters->cpbCnt; i++) + { + /* bit_rate_value_minus1 in the range [0, 2^32 - 2] */ + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, + &pHrdParameters->bitRateValue[i]); + if (tmp != HANTRO_OK) + return(tmp); + if (pHrdParameters->bitRateValue[i] > 4294967294U) + return(HANTRO_NOK); + pHrdParameters->bitRateValue[i]++; + /* this may result in overflow, but this value is not used for + * anything */ + pHrdParameters->bitRateValue[i] *= + 1 << (6 + pHrdParameters->bitRateScale); + + /* cpb_size_value_minus1 in the range [0, 2^32 - 2] */ + tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, + &pHrdParameters->cpbSizeValue[i]); + if (tmp != HANTRO_OK) + return(tmp); + if (pHrdParameters->cpbSizeValue[i] > 4294967294U) + return(HANTRO_NOK); + pHrdParameters->cpbSizeValue[i]++; + /* this may result in overflow, but this value is not used for + * anything */ + pHrdParameters->cpbSizeValue[i] *= + 1 << (4 + pHrdParameters->cpbSizeScale); + + tmp = h264bsdGetBits(pStrmData, 1); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pHrdParameters->cbrFlag[i] = (tmp == 1) ? HANTRO_TRUE : HANTRO_FALSE; + } + + tmp = h264bsdGetBits(pStrmData, 5); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pHrdParameters->initialCpbRemovalDelayLength = tmp + 1; + + tmp = h264bsdGetBits(pStrmData, 5); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pHrdParameters->cpbRemovalDelayLength = tmp + 1; + + tmp = h264bsdGetBits(pStrmData, 5); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pHrdParameters->dpbOutputDelayLength = tmp + 1; + + tmp = h264bsdGetBits(pStrmData, 5); + if (tmp == END_OF_STREAM) + return(HANTRO_NOK); + pHrdParameters->timeOffsetLength = tmp; + + return(HANTRO_OK); + +} + diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_vui.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_vui.h new file mode 100755 index 0000000000000000000000000000000000000000..05d52a45a996a91a0b9300a028913d073d182163 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_vui.h @@ -0,0 +1,130 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. Module defines + 3. Data types + 4. Function prototypes + +------------------------------------------------------------------------------*/ + +#ifndef H264SWDEC_VUI_H +#define H264SWDEC_VUI_H + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#include "basetype.h" +#include "h264bsd_stream.h" + +/*------------------------------------------------------------------------------ + 2. Module defines +------------------------------------------------------------------------------*/ + +#define MAX_CPB_CNT 32 + +/*------------------------------------------------------------------------------ + 3. Data types +------------------------------------------------------------------------------*/ + +/* enumerated sample aspect ratios, ASPECT_RATIO_M_N means M:N */ +enum +{ + ASPECT_RATIO_UNSPECIFIED = 0, + ASPECT_RATIO_1_1, + ASPECT_RATIO_12_11, + ASPECT_RATIO_10_11, + ASPECT_RATIO_16_11, + ASPECT_RATIO_40_33, + ASPECT_RATIO_24_11, + ASPECT_RATIO_20_11, + ASPECT_RATIO_32_11, + ASPECT_RATIO_80_33, + ASPECT_RATIO_18_11, + ASPECT_RATIO_15_11, + ASPECT_RATIO_64_33, + ASPECT_RATIO_160_99, + ASPECT_RATIO_EXTENDED_SAR = 255 +}; + +/* structure to store Hypothetical Reference Decoder (HRD) parameters */ +typedef struct +{ + u32 cpbCnt; + u32 bitRateScale; + u32 cpbSizeScale; + u32 bitRateValue[MAX_CPB_CNT]; + u32 cpbSizeValue[MAX_CPB_CNT]; + u32 cbrFlag[MAX_CPB_CNT]; + u32 initialCpbRemovalDelayLength; + u32 cpbRemovalDelayLength; + u32 dpbOutputDelayLength; + u32 timeOffsetLength; +} hrdParameters_t; + +/* storage for VUI parameters */ +typedef struct +{ + u32 aspectRatioPresentFlag; + u32 aspectRatioIdc; + u32 sarWidth; + u32 sarHeight; + u32 overscanInfoPresentFlag; + u32 overscanAppropriateFlag; + u32 videoSignalTypePresentFlag; + u32 videoFormat; + u32 videoFullRangeFlag; + u32 colourDescriptionPresentFlag; + u32 colourPrimaries; + u32 transferCharacteristics; + u32 matrixCoefficients; + u32 chromaLocInfoPresentFlag; + u32 chromaSampleLocTypeTopField; + u32 chromaSampleLocTypeBottomField; + u32 timingInfoPresentFlag; + u32 numUnitsInTick; + u32 timeScale; + u32 fixedFrameRateFlag; + u32 nalHrdParametersPresentFlag; + hrdParameters_t nalHrdParameters; + u32 vclHrdParametersPresentFlag; + hrdParameters_t vclHrdParameters; + u32 lowDelayHrdFlag; + u32 picStructPresentFlag; + u32 bitstreamRestrictionFlag; + u32 motionVectorsOverPicBoundariesFlag; + u32 maxBytesPerPicDenom; + u32 maxBitsPerMbDenom; + u32 log2MaxMvLengthHorizontal; + u32 log2MaxMvLengthVertical; + u32 numReorderFrames; + u32 maxDecFrameBuffering; +} vuiParameters_t; + +/*------------------------------------------------------------------------------ + 4. Function prototypes +------------------------------------------------------------------------------*/ + +u32 h264bsdDecodeVuiParameters(strmData_t *pStrmData, + vuiParameters_t *pVuiParameters); + +#endif /* #ifdef H264SWDEC_VUI_H */ + diff --git a/media/libstagefright/omx/SoftOMXPlugin.cpp b/media/libstagefright/omx/SoftOMXPlugin.cpp index 04ca39e3ab94e7b45ce705a8abb0d07a9a7b8aa4..02b1c8eacf26ad01ef024a806c0edc4d0d7da1b1 100644 --- a/media/libstagefright/omx/SoftOMXPlugin.cpp +++ b/media/libstagefright/omx/SoftOMXPlugin.cpp @@ -37,6 +37,7 @@ static const struct { { "OMX.google.aac.decoder", "aacdec", "audio_decoder.aac" }, { "OMX.google.amrnb.decoder", "amrdec", "audio_decoder.amrnb" }, { "OMX.google.amrwb.decoder", "amrdec", "audio_decoder.amrwb" }, + { "OMX.google.h264.decoder", "h264dec", "video_decoder.avc" }, { "OMX.google.avc.decoder", "avcdec", "video_decoder.avc" }, { "OMX.google.g711.alaw.decoder", "g711dec", "audio_decoder.g711alaw" }, { "OMX.google.g711.mlaw.decoder", "g711dec", "audio_decoder.g711mlaw" },