8 files changed, 2129 insertions, 0 deletions
diff --git a/plugins/pluginCUDA/dllmain_cuda.cxx b/plugins/pluginCUDA/dllmain_cuda.cxx
new file mode 100644
index 0000000..57c3ffd
--- /dev/null
+++ b/plugins/pluginCUDA/dllmain_cuda.cxx
@@ -0,0 +1,137 @@
+/* Copyright (C) 2013 Mamadou DIOP
+* Copyright (C) 2013 Doubango Telecom <http://www.doubango.org>
+*	
+* This file is part of Open Source Doubango Framework.
+*
+* DOUBANGO is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*	
+* DOUBANGO is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*	
+* You should have received a copy of the GNU General Public License
+* along with DOUBANGO.
+*/
+#include "plugin_cuda_config.h"
+#include "plugin_cuda_utils.h"
+
+#include "tinymedia/tmedia_codec.h"
+
+#include "tsk_plugin.h"
+#include "tsk_debug.h"
+
+#include <windows.h>
+
+#if defined(_MSC_VER)
+#       pragma comment(lib, "nvcuvenc")
+#       pragma comment(lib, "nvcuvid")
+#       pragma comment(lib, "cuda")
+#       pragma comment(lib, "cudart")
+
+#       pragma comment(lib, "d3d9")
+#       pragma comment(lib, "d3dx9")
+#endif
+
+
+#if !defined(PLUGIN_CUDA_H264_ENABLE)
+#	define PLUGIN_CUDA_H264_ENABLE 1
+#endif
+
+extern const tmedia_codec_plugin_def_t *cuda_codec_h264_main_plugin_def_t;
+extern const tmedia_codec_plugin_def_t *cuda_codec_h264_base_plugin_def_t;
+
+PLUGIN_CUDA_BEGIN_DECLS /* BEGIN */
+PLUGIN_CUDA_API int __plugin_get_def_count();
+PLUGIN_CUDA_API tsk_plugin_def_type_t __plugin_get_def_type_at(int index);
+PLUGIN_CUDA_API tsk_plugin_def_media_type_t __plugin_get_def_media_type_at(int index);
+PLUGIN_CUDA_API tsk_plugin_def_ptr_const_t __plugin_get_def_at(int index);
+PLUGIN_CUDA_END_DECLS /* END */
+
+BOOL APIENTRY DllMain( HMODULE hModule,
+                       DWORD  ul_reason_for_call,
+                       LPVOID lpReserved
+					 )
+{
+	switch (ul_reason_for_call)
+	{
+		case DLL_PROCESS_ATTACH:
+			break;
+		case DLL_THREAD_ATTACH:
+			break;
+		case DLL_THREAD_DETACH:
+			break;
+		case DLL_PROCESS_DETACH:
+			break;
+	}
+	return TRUE;
+}
+
+
+typedef enum PLUGIN_INDEX_E
+{
+#if PLUGIN_CUDA_H264_ENABLE
+	PLUGIN_INDEX_CODEC_H264_MAIN,
+	PLUGIN_INDEX_CODEC_H264_BASE,
+#endif
+	
+	PLUGIN_INDEX_COUNT
+}
+PLUGIN_INDEX_T;
+
+
+int __plugin_get_def_count()
+{
+	return CudaUtils::IsH264Supported() ? PLUGIN_INDEX_COUNT : 0;
+}
+
+tsk_plugin_def_type_t __plugin_get_def_type_at(int index)
+{
+#if PLUGIN_CUDA_H264_ENABLE
+	switch(index){
+		case PLUGIN_INDEX_CODEC_H264_MAIN:
+		case PLUGIN_INDEX_CODEC_H264_BASE:
+			{
+				return CudaUtils::IsH264Supported() ? tsk_plugin_def_type_codec : tsk_plugin_def_type_none;
+			}
+	}
+#endif
+	TSK_DEBUG_ERROR("No plugin at index %d", index);
+	return tsk_plugin_def_type_none;
+}
+
+tsk_plugin_def_media_type_t	__plugin_get_def_media_type_at(int index)
+{
+#if PLUGIN_CUDA_H264_ENABLE
+	switch(index){
+		case PLUGIN_INDEX_CODEC_H264_MAIN: 
+		case PLUGIN_INDEX_CODEC_H264_BASE:
+			{
+				return CudaUtils::IsH264Supported() ? tsk_plugin_def_media_type_video : tsk_plugin_def_media_type_none;
+			}
+	}
+#endif
+	TSK_DEBUG_ERROR("No plugin at index %d", index);
+	return tsk_plugin_def_media_type_none;
+}
+
+tsk_plugin_def_ptr_const_t __plugin_get_def_at(int index)
+{
+#if PLUGIN_CUDA_H264_ENABLE
+	switch(index){
+		case PLUGIN_INDEX_CODEC_H264_MAIN: 
+			{
+				return CudaUtils::IsH264Supported() ? cuda_codec_h264_main_plugin_def_t : tsk_null;
+			}
+		case PLUGIN_INDEX_CODEC_H264_BASE: 
+			{
+				return CudaUtils::IsH264Supported() ? cuda_codec_h264_base_plugin_def_t : tsk_null;
+			}
+	}
+#endif
+	TSK_DEBUG_ERROR("No plugin at index %d", index);
+	return tsk_null;
+}
diff --git a/plugins/pluginCUDA/pluginCUDA.vcproj b/plugins/pluginCUDA/pluginCUDA.vcproj
new file mode 100644
index 0000000..1f4e8f1
--- /dev/null
+++ b/plugins/pluginCUDA/pluginCUDA.vcproj
@@ -0,0 +1,225 @@
+<?xml version="1.0" encoding="Windows-1252"?>
+<VisualStudioProject
+	ProjectType="Visual C++"
+	Version="9.00"
+	Name="pluginCUDA"
+	ProjectGUID="{97008E5F-C6FC-4748-BE0D-50400E6764CB}"
+	RootNamespace="pluginCUDA"
+	Keyword="Win32Proj"
+	TargetFrameworkVersion="196613"
+	>
+	<Platforms>
+		<Platform
+			Name="Win32"
+		/>
+	</Platforms>
+	<ToolFiles>
+	</ToolFiles>
+	<Configurations>
+		<Configuration
+			Name="Debug|Win32"
+			OutputDirectory="$(SolutionDir)$(ConfigurationName)"
+			IntermediateDirectory="$(ConfigurationName)"
+			ConfigurationType="2"
+			CharacterSet="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="0"
+				AdditionalIncludeDirectories="&quot;$(CUDA_PATH)include&quot;;.;..\..\thirdparties\win32\include;..\..\tinySAK\src;..\..\tinyMEDIA\include;..\..\tinySDP\include;..\..\tinyDAV\include;..\..\tinyRTP\include"
+				PreprocessorDefinitions="WIN32;_DEBUG;_WINDOWS;_USRDLL;PLUGIN_CUDA_EXPORTS"
+				MinimalRebuild="true"
+				BasicRuntimeChecks="3"
+				RuntimeLibrary="3"
+				UsePrecompiledHeader="0"
+				WarningLevel="3"
+				WarnAsError="true"
+				DebugInformationFormat="4"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				AdditionalDependencies="$(OutDir)\tinySAK.lib $(OutDir)\tinyMEDIA.lib"
+				LinkIncremental="2"
+				AdditionalLibraryDirectories="$(DXSDK_DIR)/lib/x86;$(CUDA_PATH)/lib/$(PlatformName);"
+				GenerateDebugInformation="true"
+				SubSystem="2"
+				TargetMachine="1"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Release|Win32"
+			OutputDirectory="$(SolutionDir)$(ConfigurationName)"
+			IntermediateDirectory="$(ConfigurationName)"
+			ConfigurationType="2"
+			CharacterSet="1"
+			WholeProgramOptimization="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="2"
+				EnableIntrinsicFunctions="true"
+				AdditionalIncludeDirectories="&quot;$(CUDA_PATH)include&quot;;.;..\..\thirdparties\win32\include;..\..\tinySAK\src;..\..\tinyMEDIA\include;..\..\tinySDP\include;..\..\tinyDAV\include;..\..\tinyRTP\include"
+				PreprocessorDefinitions="WIN32;NDEBUG;_WINDOWS;_USRDLL;PLUGIN_CUDA_EXPORTS"
+				RuntimeLibrary="2"
+				EnableFunctionLevelLinking="true"
+				UsePrecompiledHeader="0"
+				WarningLevel="3"
+				WarnAsError="true"
+				DebugInformationFormat="0"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				AdditionalDependencies="$(OutDir)\tinySAK.lib $(OutDir)\tinyMEDIA.lib"
+				LinkIncremental="1"
+				AdditionalLibraryDirectories="&quot;$(DXSDK_DIR)/lib/x86&quot;;&quot;$(CUDA_PATH)/lib/$(PlatformName)&quot;"
+				GenerateDebugInformation="true"
+				SubSystem="2"
+				OptimizeReferences="2"
+				EnableCOMDATFolding="2"
+				TargetMachine="1"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+	</Configurations>
+	<References>
+	</References>
+	<Files>
+		<Filter
+			Name="Source Files"
+			Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
+			UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
+			>
+			<File
+				RelativePath=".\dllmain_cuda.cxx"
+				>
+			</File>
+			<File
+				RelativePath=".\plugin_cuda_codec_h264.cxx"
+				>
+			</File>
+			<File
+				RelativePath=".\plugin_cuda_tdav.cxx"
+				>
+			</File>
+			<File
+				RelativePath=".\plugin_cuda_utils.cxx"
+				>
+			</File>
+		</Filter>
+		<Filter
+			Name="Header Files"
+			Filter="h;hpp;hxx;hm;inl;inc;xsd"
+			UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
+			>
+			<File
+				RelativePath=".\plugin_cuda_config.h"
+				>
+			</File>
+			<File
+				RelativePath=".\plugin_cuda_utils.h"
+				>
+			</File>
+		</Filter>
+		<Filter
+			Name="Resource Files"
+			Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"
+			UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
+			>
+			<File
+				RelativePath=".\version.rc"
+				>
+			</File>
+		</Filter>
+	</Files>
+	<Globals>
+	</Globals>
+</VisualStudioProject>
diff --git a/plugins/pluginCUDA/plugin_cuda_codec_h264.cxx b/plugins/pluginCUDA/plugin_cuda_codec_h264.cxx
new file mode 100644
index 0000000..b2c8e2e
--- /dev/null
+++ b/plugins/pluginCUDA/plugin_cuda_codec_h264.cxx
@@ -0,0 +1,1346 @@
+/* Copyright (C) 2013 Mamadou DIOP
+* Copyright (C) 2013 Doubango Telecom <http://www.doubango.org>
+*	
+* This file is part of Open Source Doubango Framework.
+*
+* DOUBANGO is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*	
+* DOUBANGO is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*	
+* You should have received a copy of the GNU General Public License
+* along with DOUBANGO.
+*/
+#include "plugin_cuda_config.h"
+#include "plugin_cuda_utils.h"
+
+#include "tinydav/codecs/h264/tdav_codec_h264_common.h"
+
+#include "tinyrtp/rtp/trtp_rtp_packet.h"
+
+#include "tinymedia/tmedia_codec.h"
+#include "tinymedia/tmedia_params.h"
+#include "tinymedia/tmedia_defaults.h"
+
+#include "tsk_mutex.h"
+#include "tsk_params.h"
+#include "tsk_memory.h"
+#include "tsk_debug.h"
+
+#include <unknwn.h>
+#include <nvcuvid.h>
+#include <cuviddec.h>
+#include <NVEncoderAPI.h>
+#include <NVEncodeDataTypes.h>
+#include <d3d9.h>
+#include <cudad3d9.h>
+#include <cuda/types.h>
+#include <cuda.h>
+#include <Windows.h>
+
+typedef struct cuda_codec_h264_s
+{
+	TDAV_DECLARE_CODEC_H264_COMMON;
+
+	// Encoder
+	struct{
+		NVEncoder pInst;
+		NVEncoderParams ctxParams;
+		NVVE_CallbackParams clbParams;
+		void* pBufferPtr;
+		tsk_size_t nBufferSize;
+		int64_t frame_count;
+		tsk_bool_t force_idr;
+		int32_t quality; // [1-31]
+		int rotation;
+		int neg_width;
+		int neg_height;
+		int neg_fps;
+		int max_bitrate_bps;
+		int32_t max_bw_kpbs;
+		tsk_bool_t passthrough; // whether to bypass encoding
+	} encoder;
+	
+	// decoder
+	struct{
+		CUvideodecoder pInst;
+		CUVIDDECODECREATEINFO cuInfo;
+		CUvideoparser cuParser;
+		CUVIDPARSERPARAMS cuPaserParams;
+		CUdevice cuDevice;
+		IDirect3D9 *pD3D9;
+		IDirect3DDevice9 *pD3D9Device;
+		CUcontext cuContext;
+		struct {
+			void *pcuPtr; // MUST bee freed using cuMemFreeHost()
+			tsk_size_t nSize;
+			tsk_size_t nPitch;
+			tsk_bool_t bAvail;
+		} cuBuffer;
+		void* accumulator;
+		tsk_size_t accumulator_pos;
+		tsk_size_t accumulator_size;
+		uint16_t last_seq;
+		tsk_bool_t passthrough; // whether to bypass decoding
+		tsk_mutex_handle_t *phMutex;
+	} decoder;
+}
+cuda_codec_h264_t;
+
+#if !defined(PLUGIN_CUDA_H264_GOP_SIZE_IN_SECONDS)
+#	define PLUGIN_CUDA_H264_GOP_SIZE_IN_SECONDS		25
+#endif
+#if !defined(PLUGIN_CUDA_H264_MAX_FRM_CNT)
+#	define PLUGIN_CUDA_H264_MAX_FRM_CNT  2
+#endif
+
+static int cuda_codec_h264_init(cuda_codec_h264_t* self, profile_idc_t profile);
+static int cuda_codec_h264_deinit(cuda_codec_h264_t* self);
+static int cuda_codec_h264_open_encoder(cuda_codec_h264_t* self);
+static int cuda_codec_h264_close_encoder(cuda_codec_h264_t* self);
+static int cuda_codec_h264_open_decoder(cuda_codec_h264_t* self);
+static int cuda_codec_h264_close_decoder(cuda_codec_h264_t* self);
+
+static inline tsk_size_t _cuda_codec_h264_pict_layout(cuda_codec_h264_t* self, void**output, tsk_size_t *output_size);
+
+static int CUDAAPI _NVCallback_HandleVideoSequence(void *pvUserData, CUVIDEOFORMAT *pFormat);
+static int CUDAAPI _NVCallback_HandlePictureDecode(void *pvUserData, CUVIDPICPARAMS *pPicParams);
+static int CUDAAPI _NVCallback_HandlePictureDisplay(void *pvUserData, CUVIDPARSERDISPINFO *pPicParams);
+static unsigned char* CUDAAPI _NVCallback_HandleAcquireBitStream(int *pBufferSize, void *pUserdata);
+static void CUDAAPI _NVCallback_HandleReleaseBitStream(int nBytesInBuffer, unsigned char *cb,void *pUserdata);
+static void CUDAAPI _NVCallback_HandleOnBeginFrame(const NVVE_BeginFrameInfo *pbfi, void *pUserdata);
+static void CUDAAPI _NVCallback_HandleOnEndFrame(const NVVE_EndFrameInfo *pefi, void *pUserdata);
+
+/* ============ H.264 Base/Main Profile X.X Plugin interface functions ================= */
+
+static int cuda_codec_h264_set(tmedia_codec_t* self, const tmedia_param_t* param)
+{
+	cuda_codec_h264_t* h264 = (cuda_codec_h264_t*)self;
+	if(!self->opened){
+		TSK_DEBUG_ERROR("Codec not opened");
+		return -1;
+	}
+	if(param->value_type == tmedia_pvt_int32){
+		if(tsk_striequals(param->key, "action")){
+			tmedia_codec_action_t action = (tmedia_codec_action_t)TSK_TO_INT32((uint8_t*)param->value);
+			switch(action){
+				case tmedia_codec_action_encode_idr:
+					{
+						h264->encoder.force_idr = tsk_true;
+						break;
+					}
+				case tmedia_codec_action_bw_down:
+					{
+						h264->encoder.quality = TSK_CLAMP(1, (h264->encoder.quality + 1), 31);
+						break;
+					}
+				case tmedia_codec_action_bw_up:
+					{
+						h264->encoder.quality = TSK_CLAMP(1, (h264->encoder.quality - 1), 31);
+						break;
+					}
+			}
+			return 0;
+		}
+		else if(tsk_striequals(param->key, "bypass-encoding")){
+			h264->encoder.passthrough = *((int32_t*)param->value) ? tsk_true : tsk_false;
+			TSK_DEBUG_INFO("[H.264] bypass-encoding = %d", h264->encoder.passthrough);
+			return 0;
+		}
+		else if(tsk_striequals(param->key, "bypass-decoding")){
+			h264->decoder.passthrough = *((int32_t*)param->value) ? tsk_true : tsk_false;
+			TSK_DEBUG_INFO("[H.264] bypass-decoding = %d", h264->decoder.passthrough);
+			return 0;
+		}
+		else if(tsk_striequals(param->key, "rotation")){
+			int rotation = *((int32_t*)param->value);
+			if(h264->encoder.rotation != rotation){
+				if(self->opened){
+					int ret;
+					h264->encoder.rotation = rotation;
+					if((ret = cuda_codec_h264_close_encoder(h264))){
+						return ret;
+					}
+					if((ret = cuda_codec_h264_open_encoder(h264))){
+						return ret;
+					}
+				}
+			}
+			return 0;
+		}
+	}
+	return -1;
+}
+
+
+static int cuda_codec_h264_open(tmedia_codec_t* self)
+{
+	int ret;
+	cuda_codec_h264_t* h264 = (cuda_codec_h264_t*)self;
+
+	if(!h264){
+		TSK_DEBUG_ERROR("Invalid parameter");
+		return -1;
+	}
+	
+	/* the caller (base class) already checked that the codec is not opened */
+
+	//	Encoder
+	if((ret = cuda_codec_h264_open_encoder(h264))){
+		return ret;
+	}
+
+	//	Decoder
+	if((ret = cuda_codec_h264_open_decoder(h264))){
+		return ret;
+	}
+
+	return 0;
+}
+
+static int cuda_codec_h264_close(tmedia_codec_t* self)
+{
+	cuda_codec_h264_t* h264 = (cuda_codec_h264_t*)self;
+
+	if(!h264){
+		TSK_DEBUG_ERROR("Invalid parameter");
+		return -1;
+	}
+
+	/* the caller (base class) alreasy checked that the codec is opened */
+
+	//	Encoder
+	cuda_codec_h264_close_encoder(h264);
+
+	//	Decoder
+	cuda_codec_h264_close_decoder(h264);
+
+	return 0;
+}
+
+static tsk_size_t cuda_codec_h264_encode(tmedia_codec_t* self, const void* in_data, tsk_size_t in_size, void** out_data, tsk_size_t* out_max_size)
+{
+	int ret = 0;
+	NVVE_EncodeFrameParams efparams;
+	tsk_bool_t send_idr, send_hdr;
+	unsigned long flags = 0;
+
+	cuda_codec_h264_t* h264 = (cuda_codec_h264_t*)self;
+	tdav_codec_h264_common_t* common = (tdav_codec_h264_common_t*)self;
+
+	if(!self || !in_data || !in_size)
+	{
+		TSK_DEBUG_ERROR("Invalid parameter");
+		return 0;
+	}
+
+	if(h264->encoder.passthrough) {
+		tdav_codec_h264_rtp_encap(common, (const uint8_t*)in_data, in_size);
+		return 0;
+	}
+
+	if((h264->encoder.ctxParams.iOutputSize[1] * h264->encoder.ctxParams.iOutputSize[0] * 3) >> 1 != in_size)
+	{
+		/* guard */
+		TSK_DEBUG_ERROR("Invalid size");
+		return 0;
+	}
+
+	if(!self->opened || !h264->encoder.pInst /*|| !h264->encoder.pInst->IsReady()*/)
+	{
+		TSK_DEBUG_ERROR("Encoder not opened or not ready");
+		return 0;
+	}
+
+	if(h264->encoder.passthrough)
+	{
+		tdav_codec_h264_rtp_encap(TDAV_CODEC_H264_COMMON(h264), (const uint8_t*)in_data, in_size);
+		return 0;
+	}
+
+	HRESULT hr = S_OK;
+
+	efparams.Width = h264->encoder.ctxParams.iOutputSize[0];
+    efparams.Height = h264->encoder.ctxParams.iOutputSize[1];  
+	efparams.Pitch = (h264->encoder.ctxParams.nDeviceMemPitch ? h264->encoder.ctxParams.nDeviceMemPitch : h264->encoder.ctxParams.iOutputSize[0]);
+    efparams.PictureStruc = (NVVE_PicStruct)h264->encoder.ctxParams.iPictureType; 
+    efparams.SurfFmt = (NVVE_SurfaceFormat)h264->encoder.ctxParams.iSurfaceFormat;
+	efparams.progressiveFrame = (h264->encoder.ctxParams.iSurfaceFormat == 3) ? 1 : 0;
+    efparams.repeatFirstField = 0;
+	efparams.topfieldfirst = (h264->encoder.ctxParams.iSurfaceFormat == 1) ? 1 : 0;
+	efparams.picBuf = (unsigned char *)in_data;
+	efparams.bLast = 0;
+
+	// send IDR for:
+	//	- the first frame
+	//  - remote peer requested an IDR
+	//	- every second within the first 4seconds
+	send_idr = (
+		h264->encoder.frame_count++ == 0
+		|| h264 ->encoder.force_idr
+		|| ( (h264->encoder.frame_count < h264->encoder.neg_fps * 4) && ((h264->encoder.frame_count % h264->encoder.neg_fps)==0) )
+	   );
+
+	if(send_idr)
+	{
+		flags |= 0x04;  // FORCE IDR
+	}
+
+	// send SPS and PPS headers for:
+	//  - IDR frames (not required but it's the easiest way to deal with pkt loss)
+	//  - every 5 seconds after the first 4seconds
+	send_hdr = (
+		send_idr
+		|| ( (h264->encoder.frame_count % (h264->encoder.neg_fps * 5))==0 )
+		);
+	if(send_hdr)
+	{
+		if(h264->encoder.ctxParams.iDisableSPSPPS)
+		{
+			unsigned char SPSPPSBuff[1024];
+			int SPSPPSBuffSize = sizeof(SPSPPSBuff);
+			hr = NVGetSPSPPS(h264->encoder.pInst, SPSPPSBuff, SPSPPSBuffSize, &SPSPPSBuffSize);
+			if(SUCCEEDED(hr))
+			{
+				int size = 0;
+				while(size < SPSPPSBuffSize - 2)
+				{
+					int16_t next_size = ((int16_t)SPSPPSBuff[size])<<1 | ((int16_t)SPSPPSBuff[size + 1]);
+					tdav_codec_h264_rtp_encap(common, &SPSPPSBuff[size + 2], next_size);
+					size += next_size + 2;
+				}
+			}
+			else
+			{
+				TSK_DEBUG_ERROR("NVGetSPSPPS failed with error code = %08x", hr)
+			}
+		}
+	}
+
+	// Encode data
+	CHECK_HR(hr = NVEncodeFrame(h264->encoder.pInst, &efparams, flags, NULL));
+
+	// reset
+	h264->encoder.force_idr = tsk_false;
+
+bail:
+	return 0;
+}
+
+static tsk_size_t cuda_codec_h264_decode(tmedia_codec_t* self, const void* in_data, tsk_size_t in_size, void** out_data, tsk_size_t* out_max_size, const tsk_object_t* proto_hdr)
+{
+	cuda_codec_h264_t* h264 = (cuda_codec_h264_t*)self;
+	const trtp_rtp_header_t* rtp_hdr = (const trtp_rtp_header_t*)proto_hdr;
+	
+	const uint8_t* pay_ptr = tsk_null;
+	tsk_size_t pay_size = 0;
+	int ret;
+	tsk_bool_t append_scp;
+	tsk_bool_t sps_or_pps;
+	tsk_size_t retsize = 0, size_to_copy = 0;
+	static const tsk_size_t xmax_size = (3840 * 2160 * 3) >> 3; // >>3 instead of >>1 (not an error)
+	static tsk_size_t start_code_prefix_size = sizeof(H264_START_CODE_PREFIX);
+
+	if(!h264 || !in_data || !in_size || !out_data)
+	{
+		TSK_DEBUG_ERROR("Invalid parameter");
+		return 0;
+	}
+
+	if(!self->opened || !h264->encoder.pInst)
+	{
+		TSK_DEBUG_ERROR("Decoder not opened or not ready");
+		return 0;
+	}
+	
+	HRESULT hr = S_OK;
+
+	/* Packet lost? */
+	if((h264->decoder.last_seq + 1) != rtp_hdr->seq_num && h264->decoder.last_seq)
+	{
+		TSK_DEBUG_INFO("[H.264] Packet loss, seq_num=%d", (h264->decoder.last_seq + 1));
+	}
+	h264->decoder.last_seq = rtp_hdr->seq_num;
+
+
+	/* 5.3. NAL Unit Octet Usage
+	  +---------------+
+      |0|1|2|3|4|5|6|7|
+      +-+-+-+-+-+-+-+-+
+      |F|NRI|  Type   |
+      +---------------+
+	*/
+	if(*((uint8_t*)in_data) & 0x80)
+	{
+		TSK_DEBUG_WARN("F=1");
+		/* reset accumulator */
+		h264->decoder.accumulator = 0;
+		return 0;
+	}
+
+	/* get payload */
+	if((ret = tdav_codec_h264_get_pay(in_data, in_size, (const void**)&pay_ptr, &pay_size, &append_scp)) || !pay_ptr || !pay_size)
+	{
+		TSK_DEBUG_ERROR("Depayloader failed to get H.264 content");
+		return 0;
+	}
+	//append_scp = tsk_true;
+	size_to_copy = pay_size + (append_scp ? start_code_prefix_size : 0);
+	// whether it's SPS or PPS (append_scp is false for subsequent FUA chuncks)
+	sps_or_pps = append_scp && pay_ptr && ((pay_ptr[0] & 0x1F) == 7 || (pay_ptr[0] & 0x1F) == 8);
+	
+	// start-accumulator
+	if(!h264->decoder.accumulator)
+	{
+		if(size_to_copy > xmax_size)
+		{
+			TSK_DEBUG_ERROR("%u too big to contain valid encoded data. xmax_size=%u", size_to_copy, xmax_size);
+			return 0;
+		}
+		if(!(h264->decoder.accumulator = tsk_calloc(size_to_copy, sizeof(uint8_t))))
+		{
+			TSK_DEBUG_ERROR("Failed to allocated new buffer");
+			return 0;
+		}
+		h264->decoder.accumulator_size = size_to_copy;
+	}
+	if((h264->decoder.accumulator_pos + size_to_copy) >= xmax_size)
+	{
+		TSK_DEBUG_ERROR("BufferOverflow");
+		h264->decoder.accumulator_pos = 0;
+		return 0;
+	}
+	if((h264->decoder.accumulator_pos + size_to_copy) > h264->decoder.accumulator_size)
+	{
+		if(!(h264->decoder.accumulator = tsk_realloc(h264->decoder.accumulator, (h264->decoder.accumulator_pos + size_to_copy))))
+		{
+			TSK_DEBUG_ERROR("Failed to reallocated new buffer");
+			h264->decoder.accumulator_pos = 0;
+			h264->decoder.accumulator_size = 0;
+			return 0;
+		}
+		h264->decoder.accumulator_size = (h264->decoder.accumulator_pos + size_to_copy);
+	}
+
+	if(append_scp)
+	{
+		memcpy(&((uint8_t*)h264->decoder.accumulator)[h264->decoder.accumulator_pos], H264_START_CODE_PREFIX, start_code_prefix_size);
+		h264->decoder.accumulator_pos += start_code_prefix_size;
+	}
+	memcpy(&((uint8_t*)h264->decoder.accumulator)[h264->decoder.accumulator_pos], pay_ptr, pay_size);
+	h264->decoder.accumulator_pos += pay_size;
+	// end-accumulator
+
+
+	if(sps_or_pps)
+	{
+		// http://libav-users.943685.n4.nabble.com/Decode-H264-streams-how-to-fill-AVCodecContext-from-SPS-PPS-td2484472.html
+		// SPS and PPS should be bundled with IDR
+		TSK_DEBUG_INFO("Receiving SPS or PPS ...to be tied to an IDR");
+	}
+	else if(rtp_hdr->marker)
+	{
+		if(h264->decoder.passthrough)
+		{
+			if(*out_max_size < h264->decoder.accumulator_pos)
+			{
+				if((*out_data = tsk_realloc(*out_data, h264->decoder.accumulator_pos)))
+				{
+					*out_max_size = h264->decoder.accumulator_pos;
+				}
+				else
+				{
+					*out_max_size = 0;
+					return 0;
+				}
+			}
+			memcpy(*out_data, h264->decoder.accumulator, h264->decoder.accumulator_pos);
+			retsize = h264->decoder.accumulator_pos;
+		}
+		else
+		{ 
+			// !h264->decoder.passthrough
+			CUVIDSOURCEDATAPACKET pkt;
+			CUresult cuResult;
+			pkt.flags = 0;
+			pkt.payload_size = (unsigned long) h264->decoder.accumulator_pos;
+			pkt.payload = (unsigned char *)h264->decoder.accumulator;
+			pkt.timestamp = 0;
+
+			// reset accumulator
+			h264->decoder.accumulator_pos = 0;
+			cuResult = cuvidParseVideoData(h264->decoder.cuParser, &pkt);
+			if(cuResult != CUDA_SUCCESS)
+			{
+				TSK_DEBUG_ERROR("cuvidParseVideoData() failed with error code = %d", (int)cuResult);
+				CHECK_HR(hr = E_FAIL);
+			}
+			
+			if(h264->decoder.cuBuffer.bAvail)
+			{
+				h264->decoder.cuBuffer.bAvail = tsk_false;
+				if((retsize = _cuda_codec_h264_pict_layout(h264, out_data, out_max_size)) == 0)
+				{
+					TSK_DEBUG_ERROR("_cuda_codec_h264_pict_layout failed");
+					CHECK_HR(hr = E_FAIL);
+				}
+			}
+		}// else(!h264->decoder.passthrough)
+	} // else if(rtp_hdr->marker)
+
+bail:
+	if(FAILED(hr))
+	{
+		TSK_DEBUG_INFO("Failed to decode the buffer with error code =%d, size=%u, append=%s", ret, h264->decoder.accumulator_pos, append_scp ? "yes" : "no");
+		if(TMEDIA_CODEC_VIDEO(self)->in.callback)
+		{
+			TMEDIA_CODEC_VIDEO(self)->in.result.type = tmedia_video_decode_result_type_error;
+			TMEDIA_CODEC_VIDEO(self)->in.result.proto_hdr = proto_hdr;
+			TMEDIA_CODEC_VIDEO(self)->in.callback(&TMEDIA_CODEC_VIDEO(self)->in.result);
+		}
+	}
+	return retsize;
+}
+
+static tsk_bool_t cuda_codec_h264_sdp_att_match(const tmedia_codec_t* self, const char* att_name, const char* att_value)
+{
+	return tdav_codec_h264_common_sdp_att_match((tdav_codec_h264_common_t*)self, att_name, att_value);
+}
+
+static char* cuda_codec_h264_sdp_att_get(const tmedia_codec_t* self, const char* att_name)
+{
+	char* att = tdav_codec_h264_common_sdp_att_get((const tdav_codec_h264_common_t*)self, att_name);
+	if(att && tsk_striequals(att_name, "fmtp")) {
+		tsk_strcat(&att, "; impl=CUDA");
+	}
+	return att;
+}
+
+
+
+
+/* ============ H.264 Base Profile Plugin interface ================= */
+
+/* constructor */
+static tsk_object_t* cuda_codec_h264_base_ctor(tsk_object_t * self, va_list * app)
+{
+	cuda_codec_h264_t *h264 = (cuda_codec_h264_t*)self;
+	if(h264){
+		/* init base: called by tmedia_codec_create() */
+		/* init self */
+		if(cuda_codec_h264_init(h264, profile_idc_baseline) != 0){
+			return tsk_null;
+		}
+	}
+	return self;
+}
+/* destructor */
+static tsk_object_t* cuda_codec_h264_base_dtor(tsk_object_t * self)
+{ 
+	cuda_codec_h264_t *h264 = (cuda_codec_h264_t*)self;
+	if(h264){
+		/* deinit base */
+		tdav_codec_h264_common_deinit(TDAV_CODEC_H264_COMMON(self));
+		/* deinit self */
+		cuda_codec_h264_deinit(h264);
+	}
+
+	return self;
+}
+/* object definition */
+static const tsk_object_def_t cuda_codec_h264_base_def_s = 
+{
+	sizeof(cuda_codec_h264_t),
+	cuda_codec_h264_base_ctor, 
+	cuda_codec_h264_base_dtor,
+	tmedia_codec_cmp, 
+};
+/* plugin definition*/
+static const tmedia_codec_plugin_def_t cuda_codec_h264_base_plugin_def_s = 
+{
+	&cuda_codec_h264_base_def_s,
+
+	tmedia_video,
+	tmedia_codec_id_h264_bp,
+	"H264",
+	"H264 Base Profile (NVIDIA CUDA)",
+	TMEDIA_CODEC_FORMAT_H264_BP,
+	tsk_true,
+	90000, // rate
+	
+	/* audio */
+	{ 0 },
+
+	/* video (width, height, fps) */
+	{176, 144, 0}, // fps is @deprecated
+
+	cuda_codec_h264_set,
+	cuda_codec_h264_open,
+	cuda_codec_h264_close,
+	cuda_codec_h264_encode,
+	cuda_codec_h264_decode,
+	cuda_codec_h264_sdp_att_match,
+	cuda_codec_h264_sdp_att_get
+};
+const tmedia_codec_plugin_def_t *cuda_codec_h264_base_plugin_def_t = &cuda_codec_h264_base_plugin_def_s;
+
+/* ============ H.264 Main Profile Plugin interface ================= */
+
+/* constructor */
+static tsk_object_t* cuda_codec_h264_main_ctor(tsk_object_t * self, va_list * app)
+{
+	cuda_codec_h264_t *h264 = (cuda_codec_h264_t*)self;
+	if(h264){
+		/* init base: called by tmedia_codec_create() */
+		/* init self */
+		if(cuda_codec_h264_init(h264, profile_idc_main) != 0){
+			return tsk_null;
+		}
+	}
+	return self;
+}
+/* destructor */
+static tsk_object_t* cuda_codec_h264_main_dtor(tsk_object_t * self)
+{ 
+	cuda_codec_h264_t *h264 = (cuda_codec_h264_t*)self;
+	if(h264){
+		/* deinit base */
+		tdav_codec_h264_common_deinit(TDAV_CODEC_H264_COMMON(self));
+		/* deinit self */
+		cuda_codec_h264_deinit(h264);
+		
+	}
+
+	return self;
+}
+/* object definition */
+static const tsk_object_def_t cuda_codec_h264_main_def_s = 
+{
+	sizeof(cuda_codec_h264_t),
+	cuda_codec_h264_main_ctor, 
+	cuda_codec_h264_main_dtor,
+	tmedia_codec_cmp, 
+};
+/* plugin definition*/
+static const tmedia_codec_plugin_def_t cuda_codec_h264_main_plugin_def_s = 
+{
+	&cuda_codec_h264_main_def_s,
+
+	tmedia_video,
+	tmedia_codec_id_h264_mp,
+	"H264",
+	"H264 Main Profile (NVIDIA CUDA)",
+	TMEDIA_CODEC_FORMAT_H264_MP,
+	tsk_true,
+	90000, // rate
+	
+	/* audio */
+	{ 0 },
+
+	/* video (width, height, fps)*/
+	{176, 144, 0},// fps is @deprecated
+
+	cuda_codec_h264_set,
+	cuda_codec_h264_open,
+	cuda_codec_h264_close,
+	cuda_codec_h264_encode,
+	cuda_codec_h264_decode,
+	cuda_codec_h264_sdp_att_match,
+	cuda_codec_h264_sdp_att_get
+};
+const tmedia_codec_plugin_def_t *cuda_codec_h264_main_plugin_def_t = &cuda_codec_h264_main_plugin_def_s;
+
+
+
+
+
+/* ============ Common To all H264 codecs ================= */
+
+int cuda_codec_h264_open_encoder(cuda_codec_h264_t* self)
+{
+	HRESULT hr = S_OK;
+	int32_t max_bw_kpbs;
+	int bestGPU = 0, gpuPerf = 0;
+	static int low_latency = 1;
+	tdav_codec_h264_common_t* common = (tdav_codec_h264_common_t*)self;
+
+	if(self->encoder.pInst)
+	{
+		TSK_DEBUG_ERROR("Encoder already initialized");
+#if defined(E_ILLEGAL_METHOD_CALL)
+		CHECK_HR(hr = E_ILLEGAL_METHOD_CALL);
+#else
+		CHECK_HR(hr = 0x8000000EL);
+#endif
+	}
+
+	memset(&self->encoder.clbParams, 0, sizeof(self->encoder.clbParams));
+	memset(&self->encoder.ctxParams, 0, sizeof(self->encoder.ctxParams));
+
+	// create encoder
+	CHECK_HR(hr = NVCreateEncoder(&self->encoder.pInst));
+	CHECK_HR(hr = NVSetCodec(self->encoder.pInst, NV_CODEC_TYPE_H264));
+	CHECK_HR(hr = NVSetDefaultParam(self->encoder.pInst));
+
+	CHECK_HR(hr = NVGetParamValue(self->encoder.pInst, NVVE_GET_GPU_COUNT, &self->encoder.ctxParams.GPU_count));
+	{
+		int temp = 0, deviceCount;
+		for (deviceCount=0; deviceCount < self->encoder.ctxParams.GPU_count; deviceCount++)
+		{
+			NVVE_GPUAttributes GPUAttributes = {0};
+
+			GPUAttributes.iGpuOrdinal = deviceCount;
+			hr = NVGetParamValue(self->encoder.pInst, NVVE_GET_GPU_ATTRIBUTES,  &GPUAttributes);
+			if(FAILED(hr))
+			{
+				TSK_DEBUG_ERROR("NVGetParamValue(NVVE_GET_GPU_ATTRIBUTES) failed with error code = %08x", hr);
+				continue;
+			}
+
+			temp = GPUAttributes.iClockRate * GPUAttributes.iMultiProcessorCount;
+			temp = temp * CudaUtils::ConvertSMVer2Cores(GPUAttributes.iMajor, GPUAttributes.iMinor);
+
+			if(temp > gpuPerf)
+			{
+				gpuPerf = temp;
+				bestGPU = deviceCount;
+			}
+		}
+	}
+	
+	self->encoder.neg_width = (self->encoder.rotation == 90 || self->encoder.rotation == 270) ? TMEDIA_CODEC_VIDEO(self)->out.height : TMEDIA_CODEC_VIDEO(self)->out.width;
+	self->encoder.neg_height = (self->encoder.rotation == 90 || self->encoder.rotation == 270) ? TMEDIA_CODEC_VIDEO(self)->out.width : TMEDIA_CODEC_VIDEO(self)->out.height;
+	self->encoder.neg_fps = TMEDIA_CODEC_VIDEO(self)->out.fps;
+	max_bw_kpbs = TSK_CLAMP(
+		0,
+		tmedia_get_video_bandwidth_kbps_2(self->encoder.neg_width, self->encoder.neg_height, self->encoder.neg_fps), 
+		self->encoder.max_bw_kpbs
+	);
+	self->encoder.max_bitrate_bps = (max_bw_kpbs * 1024);
+
+	TSK_DEBUG_INFO("[H.264 CUDA Encoder] neg_width=%d, neg_height=%d, neg_fps=%d, max_bitrate_bps=%d",
+		self->encoder.neg_width,
+		self->encoder.neg_height,
+		self->encoder.neg_fps,
+		self->encoder.max_bitrate_bps
+		);
+
+	self->encoder.ctxParams.iForcedGPU = bestGPU;
+	self->encoder.ctxParams.iInputSize[0] = self->encoder.neg_width;
+	self->encoder.ctxParams.iInputSize[1] = self->encoder.neg_height;
+	self->encoder.ctxParams.iOutputSize[0] = self->encoder.neg_width;
+	self->encoder.ctxParams.iOutputSize[1] = self->encoder.neg_height;
+	self->encoder.ctxParams.GPUOffloadLevel= NVVE_GPU_OFFLOAD_ALL;
+	self->encoder.ctxParams.iSurfaceFormat = (int)IYUV;
+	self->encoder.ctxParams.iPictureType   = (int)FRAME_PICTURE;
+	self->encoder.ctxParams.Fieldmode = MODE_FRAME;
+	self->encoder.ctxParams.Presets = (NVVE_PRESETS_TARGET)-1;//Should be iPod, Zune ...
+	// self->encoder.ctxParams.iP_Interval = 1;
+	self->encoder.ctxParams.iAspectRatio[0] = 1;
+	self->encoder.ctxParams.iAspectRatio[1] = 1;
+	self->encoder.ctxParams.iAspectRatio[2] = 0;
+	self->encoder.ctxParams.iIDR_Period = (self->encoder.neg_fps * PLUGIN_CUDA_H264_GOP_SIZE_IN_SECONDS);
+	self->encoder.ctxParams.iUseDeviceMem = 0;
+	self->encoder.ctxParams.iDynamicGOP = 0;
+	self->encoder.ctxParams.RCType = RC_CBR;
+	self->encoder.ctxParams.iAvgBitrate = self->encoder.max_bitrate_bps;
+	self->encoder.ctxParams.iPeakBitrate = self->encoder.max_bitrate_bps;
+	self->encoder.ctxParams.iQP_Level_Intra = 25;
+	self->encoder.ctxParams.iQP_Level_InterP = 28;
+	self->encoder.ctxParams.iQP_Level_InterB = 31;
+	self->encoder.ctxParams.iFrameRate[0] = self->encoder.neg_fps;
+	self->encoder.ctxParams.iFrameRate[1] = 1;
+	self->encoder.ctxParams.iDeblockMode = 1;
+	self->encoder.ctxParams.iForceIntra = 0;
+	self->encoder.ctxParams.iForceIDR = 0;
+	self->encoder.ctxParams.iClearStat = 0;
+	self->encoder.ctxParams.DIMode = DI_MEDIAN;
+	self->encoder.ctxParams.iDisableSPSPPS = 1; // Do not include SPS/PPS frames
+	self->encoder.ctxParams.iNaluFramingType = 0; // StartCodes
+	self->encoder.ctxParams.iMultiGPU = 1;
+	switch(common->profile)
+	{
+		case profile_idc_baseline:
+			{
+				self->encoder.ctxParams.iDisableCabac = 1;
+				self->encoder.ctxParams.iProfileLevel = 0xff42; // 0xff -> autoselect level
+				break;
+			}
+		case profile_idc_main:
+			{
+				self->encoder.ctxParams.iDisableCabac = 0;
+				self->encoder.ctxParams.iProfileLevel = 0xff4d; // 0xff -> autoselect level
+				break;
+			}
+		default:
+			{
+				CHECK_HR(hr = E_NOTIMPL);
+				break;
+			}
+	}
+
+	//
+	// Allocate memory
+	//
+	self->encoder.nBufferSize = (self->encoder.ctxParams.iOutputSize[1] * self->encoder.ctxParams.iOutputSize[0] * 3) >> 4;
+	if(!self->encoder.pBufferPtr && !(self->encoder.pBufferPtr = tsk_realloc(self->encoder.pBufferPtr, self->encoder.nBufferSize)))
+	{
+		self->encoder.nBufferSize = 0;
+		CHECK_HR(hr = E_OUTOFMEMORY);
+	}
+
+	//
+	// Set parameters
+	//
+	hr = NVSetParamValue(self->encoder.pInst, NVVE_FORCE_GPU_SELECTION, &self->encoder.ctxParams.iForcedGPU);
+	if(FAILED(hr))
+	{
+		TSK_DEBUG_WARN("NVSetParamValue(NVVE_FORCE_GPU_SELECTION) failed with error code = %08x", hr);
+	}
+	CHECK_HR(hr = NVSetParamValue(self->encoder.pInst, NVVE_DEVICE_MEMORY_INPUT, &(self->encoder.ctxParams.iUseDeviceMem)));
+	hr = NVSetParamValue(self->encoder.pInst,NVVE_OUT_SIZE,		&(self->encoder.ctxParams.iOutputSize)); if (hr!=S_OK) { TSK_DEBUG_WARN("NVSetParamValue(NVVE_OUT_SIZE) failed with error code = %08x", hr); }
+	hr = NVSetParamValue(self->encoder.pInst,NVVE_IN_SIZE,		&(self->encoder.ctxParams.iInputSize)); if (hr!=S_OK) { TSK_DEBUG_WARN("NVSetParamValue(NVVE_IN_SIZE) failed with error code = %08x", hr); }
+    hr = NVSetParamValue(self->encoder.pInst,NVVE_MULTI_GPU,	&(self->encoder.ctxParams.iMultiGPU)); if (hr!=S_OK) { TSK_DEBUG_WARN("NVSetParamValue(NVVE_MULTI_GPU) failed with error code = %08x", hr); }
+	hr = NVSetParamValue(self->encoder.pInst,NVVE_ASPECT_RATIO,	&(self->encoder.ctxParams.iAspectRatio));if (hr!=S_OK) { TSK_DEBUG_WARN("NVSetParamValue(NVVE_ASPECT_RATIO) failed with error code = %08x", hr); }
+    hr = NVSetParamValue(self->encoder.pInst,NVVE_FIELD_ENC_MODE,	&(self->encoder.ctxParams.Fieldmode)); if (hr!=S_OK) { TSK_DEBUG_WARN("NVSetParamValue(NVVE_FIELD_ENC_MODE) failed with error code = %08x", hr); }
+    hr = NVSetParamValue(self->encoder.pInst,NVVE_P_INTERVAL,		&(self->encoder.ctxParams.iP_Interval)); if (hr!=S_OK) { TSK_DEBUG_WARN("NVSetParamValue(NVVE_P_INTERVAL) failed with error code = %08x", hr); }
+    hr = NVSetParamValue(self->encoder.pInst,NVVE_IDR_PERIOD,		&(self->encoder.ctxParams.iIDR_Period)); if (hr!=S_OK) { TSK_DEBUG_WARN("NVSetParamValue(NVVE_IDR_PERIOD) failed with error code = %08x", hr); }
+    hr = NVSetParamValue(self->encoder.pInst,NVVE_DYNAMIC_GOP,	&(self->encoder.ctxParams.iDynamicGOP)); if (hr!=S_OK) { TSK_DEBUG_WARN("NVSetParamValue(NVVE_DYNAMIC_GOP) failed with error code = %08x", hr); }
+    hr = NVSetParamValue(self->encoder.pInst,NVVE_RC_TYPE,		&(self->encoder.ctxParams.RCType)); if (hr!=S_OK) { TSK_DEBUG_WARN("NVSetParamValue(NVVE_RC_TYPE) failed with error code = %08x", hr); }
+    hr = NVSetParamValue(self->encoder.pInst,NVVE_AVG_BITRATE,	&(self->encoder.ctxParams.iAvgBitrate)); if (hr!=S_OK) { TSK_DEBUG_WARN("NVSetParamValue(NVVE_AVG_BITRATE) failed with error code = %08x", hr); }
+    hr = NVSetParamValue(self->encoder.pInst,NVVE_PEAK_BITRATE,	&(self->encoder.ctxParams.iPeakBitrate)); if (hr!=S_OK) { TSK_DEBUG_WARN("NVSetParamValue(NVVE_PEAK_BITRATE) failed with error code = %08x", hr); }
+    hr = NVSetParamValue(self->encoder.pInst,NVVE_QP_LEVEL_INTRA,	&(self->encoder.ctxParams.iQP_Level_Intra)); if (hr!=S_OK) { TSK_DEBUG_WARN("NVSetParamValue(NVVE_OUT_SIZE) failed with error code = %08x", hr); }
+    hr = NVSetParamValue(self->encoder.pInst,NVVE_QP_LEVEL_INTER_P,&(self->encoder.ctxParams.iQP_Level_InterP)); if (hr!=S_OK) { TSK_DEBUG_WARN("NVSetParamValue(NVVE_QP_LEVEL_INTER_P) failed with error code = %08x", hr); }
+    hr = NVSetParamValue(self->encoder.pInst,NVVE_QP_LEVEL_INTER_B,&(self->encoder.ctxParams.iQP_Level_InterB)); if (hr!=S_OK) { TSK_DEBUG_WARN("NVSetParamValue(NVVE_QP_LEVEL_INTER_B) failed with error code = %08x", hr); }
+    hr = NVSetParamValue(self->encoder.pInst,NVVE_FRAME_RATE,		&(self->encoder.ctxParams.iFrameRate)); if (hr!=S_OK) { TSK_DEBUG_WARN("NVSetParamValue(NVVE_FRAME_RATE) failed with error code = %08x", hr); }
+    hr = NVSetParamValue(self->encoder.pInst,NVVE_DEBLOCK_MODE,	&(self->encoder.ctxParams.iDeblockMode)); if (hr!=S_OK) { TSK_DEBUG_WARN("NVSetParamValue(NVVE_DEBLOCK_MODE) failed with error code = %08x", hr); }
+    hr = NVSetParamValue(self->encoder.pInst,NVVE_PROFILE_LEVEL,	&(self->encoder.ctxParams.iProfileLevel)); if (hr!=S_OK) { TSK_DEBUG_WARN("NVSetParamValue(NVVE_PROFILE_LEVEL) failed with error code = %08x", hr); }
+    hr = NVSetParamValue(self->encoder.pInst,NVVE_FORCE_INTRA,	&(self->encoder.ctxParams.iForceIntra)); if (hr!=S_OK) { TSK_DEBUG_WARN("NVSetParamValue(NVVE_FORCE_INTRA) failed with error code = %08x", hr); }
+    hr = NVSetParamValue(self->encoder.pInst,NVVE_FORCE_IDR,		&(self->encoder.ctxParams.iForceIDR)); if (hr!=S_OK) { TSK_DEBUG_WARN("NVSetParamValue(NVVE_FORCE_IDR) failed with error code = %08x", hr); }
+    hr = NVSetParamValue(self->encoder.pInst,NVVE_CLEAR_STAT,		&(self->encoder.ctxParams.iClearStat)); if (hr!=S_OK) { TSK_DEBUG_WARN("NVSetParamValue(NVVE_CLEAR_STAT) failed with error code = %08x", hr); }
+    hr = NVSetParamValue(self->encoder.pInst,NVVE_SET_DEINTERLACE,&(self->encoder.ctxParams.DIMode)); if (hr!=S_OK) { TSK_DEBUG_WARN("NVSetParamValue(NVVE_SET_DEINTERLACE) failed with error code = %08x", hr); }
+    if (self->encoder.ctxParams.Presets != -1)
+	{
+        hr = NVSetParamValue(self->encoder.pInst,NVVE_PRESETS,	&(self->encoder.ctxParams.Presets)); if (hr!=S_OK) { TSK_DEBUG_WARN("NVSetParamValue(NVVE_PRESETS) failed with error code = %08x", hr); }
+	}
+    hr = NVSetParamValue(self->encoder.pInst,NVVE_DISABLE_CABAC,	&(self->encoder.ctxParams.iDisableCabac)); if (hr!=S_OK) { TSK_DEBUG_WARN("NVSetParamValue(NVVE_DISABLE_CABAC) failed with error code = %08x", hr); }
+    hr = NVSetParamValue(self->encoder.pInst,NVVE_CONFIGURE_NALU_FRAMING_TYPE, &(self->encoder.ctxParams.iNaluFramingType)); if (hr!=S_OK) { TSK_DEBUG_WARN("NVSetParamValue(NVVE_CONFIGURE_NALU_FRAMING_TYPE) failed with error code = %08x", hr); }
+    hr = NVSetParamValue(self->encoder.pInst,NVVE_DISABLE_SPS_PPS,&(self->encoder.ctxParams.iDisableSPSPPS)); if (hr!=S_OK) { TSK_DEBUG_WARN("NVSetParamValue(NVVE_DISABLE_SPS_PPS) failed with error code = %08x", hr); }
+	hr = NVSetParamValue(self->encoder.pInst,NVVE_LOW_LATENCY,&low_latency); if (hr!=S_OK) { TSK_DEBUG_WARN("NVSetParamValue(NVVE_LOW_LATENCY) failed with error code = %08x", hr); }
+
+	self->encoder.clbParams.pfnacquirebitstream = _NVCallback_HandleAcquireBitStream;
+	self->encoder.clbParams.pfnonbeginframe     = _NVCallback_HandleOnBeginFrame;
+	self->encoder.clbParams.pfnonendframe       = _NVCallback_HandleOnEndFrame;
+	self->encoder.clbParams.pfnreleasebitstream = _NVCallback_HandleReleaseBitStream;
+	NVRegisterCB(self->encoder.pInst, self->encoder.clbParams, self);
+	
+	
+	CHECK_HR(hr = NVCreateHWEncoder(self->encoder.pInst));
+	
+bail:
+	return SUCCEEDED(hr) ? 0 : -1;
+}
+
+int cuda_codec_h264_close_encoder(cuda_codec_h264_t* self)
+{
+	if(self)
+	{
+		if(self->encoder.pInst)
+		{
+			NVDestroyEncoder(self->encoder.pInst);
+			self->encoder.pInst = NULL;
+		}
+		if(self->encoder.pBufferPtr)
+		{
+			TSK_FREE(self->encoder.pBufferPtr);
+			self->encoder.nBufferSize = 0;
+		}
+		self->encoder.frame_count = 0;
+	}
+
+	return 0;
+}
+
+int cuda_codec_h264_open_decoder(cuda_codec_h264_t* self)
+{
+	HRESULT hr = S_OK;
+	tdav_codec_h264_common_t* common = (tdav_codec_h264_common_t*)self;
+	int i, adapterCount;
+	CUresult cuResult;
+	D3DPRESENT_PARAMETERS d3dpp;
+
+	if(self->decoder.pInst || self->decoder.cuDevice || self->decoder.cuContext || self->decoder.pD3D9 || self->decoder.pD3D9Device) 
+	{
+		TSK_DEBUG_ERROR("Decoder already initialized");
+#if defined(E_ILLEGAL_METHOD_CALL)
+		CHECK_HR(hr = E_ILLEGAL_METHOD_CALL);
+#else
+		CHECK_HR(hr = 0x8000000EL);
+#endif
+	}
+
+	TSK_DEBUG_INFO("[H.264 MF Decoder] neg_width=%d, neg_height=%d, neg_fps=%d",
+		TMEDIA_CODEC_VIDEO(self)->in.width,
+		TMEDIA_CODEC_VIDEO(self)->in.height,
+		TMEDIA_CODEC_VIDEO(self)->in.fps
+		);
+
+	memset(&self->decoder.cuInfo, 0, sizeof(self->decoder.cuInfo));
+	self->decoder.cuInfo.ulCreationFlags = cudaVideoCreate_PreferCUDA;
+	self->decoder.cuInfo.CodecType = cudaVideoCodec_H264;
+	self->decoder.cuInfo.ulWidth = TMEDIA_CODEC_VIDEO(self)->in.width;
+	self->decoder.cuInfo.ulTargetWidth = TMEDIA_CODEC_VIDEO(self)->in.width;
+	self->decoder.cuInfo.ulHeight = TMEDIA_CODEC_VIDEO(self)->in.height;
+	self->decoder.cuInfo.ulTargetHeight = TMEDIA_CODEC_VIDEO(self)->in.height;
+	self->decoder.cuInfo.ulNumDecodeSurfaces = PLUGIN_CUDA_H264_MAX_FRM_CNT;
+	self->decoder.cuInfo.ulNumOutputSurfaces = 1;
+	self->decoder.cuInfo.ChromaFormat = cudaVideoChromaFormat_420;
+	self->decoder.cuInfo.OutputFormat = cudaVideoSurfaceFormat_NV12;
+	self->decoder.cuInfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Adaptive;
+
+	self->decoder.cuDevice = CudaUtils::GetMaxGflopsDeviceId();
+
+#if _DEBUG || DEBUG
+	{
+		int major, minor;
+		size_t totalGlobalMem;
+		char deviceName[256];
+		cuDeviceComputeCapability(&major, &minor, self->decoder.cuDevice);
+		cuDeviceGetName(deviceName, sizeof(deviceName), self->decoder.cuDevice);
+		TSK_DEBUG_INFO("[CUDA H.264 decoder] Using GPU Device %d: %s has SM %d.%d compute capability", self->decoder.cuDevice, deviceName, major, minor);
+ 
+		/*cutilDrvSafeCallNoSync(*/cuDeviceTotalMem(&totalGlobalMem, self->decoder.cuDevice)/*)*/;
+		TSK_DEBUG_INFO("[CUDA H.264 decoder] Total amount of global memory in GPU device: %4.4f MB", (float)totalGlobalMem/(1024*1024));
+	}
+#endif
+
+	// create Direct3D instance
+	self->decoder.pD3D9 = Direct3DCreate9(D3D_SDK_VERSION);
+	if(!self->decoder.pD3D9)
+	{
+		CHECK_HR(hr = E_OUTOFMEMORY);
+	}
+	adapterCount = self->decoder.pD3D9->GetAdapterCount();
+	for(i = 0; i < adapterCount; ++i)
+	{
+		ZeroMemory(&d3dpp, sizeof(d3dpp));
+        d3dpp.Windowed = TRUE;
+        d3dpp.BackBufferFormat = D3DFMT_X8R8G8B8;
+        d3dpp.BackBufferWidth = self->decoder.cuInfo.ulTargetWidth;
+        d3dpp.BackBufferHeight = self->decoder.cuInfo.ulTargetHeight;
+        d3dpp.BackBufferCount = 1;
+        d3dpp.SwapEffect = D3DSWAPEFFECT_COPY;
+        d3dpp.PresentationInterval = D3DPRESENT_INTERVAL_IMMEDIATE;
+        d3dpp.Flags = D3DPRESENTFLAG_VIDEO;
+        hr = self->decoder.pD3D9->CreateDevice(i,
+                                  D3DDEVTYPE_HAL,
+                                  GetDesktopWindow(),
+                                  D3DCREATE_FPU_PRESERVE | D3DCREATE_MULTITHREADED | D3DCREATE_HARDWARE_VERTEXPROCESSING,
+                                  &d3dpp,
+                                  &self->decoder.pD3D9Device);
+        if(hr == S_OK)
+		{
+            cuResult = cuD3D9CtxCreate(&self->decoder.cuContext, &self->decoder.cuDevice, 0, self->decoder.pD3D9Device);
+			if(cuResult == CUDA_SUCCESS)
+			{
+                break;
+			}
+			SafeRelease(&self->decoder.pD3D9Device);
+			if(self->decoder.cuContext)
+			{
+				cuCtxDestroy(self->decoder.cuContext);
+				self->decoder.cuContext = NULL;
+			}
+        }
+	}
+
+	if(!self->decoder.pD3D9Device)
+	{
+		TSK_DEBUG_ERROR("Failed to create D3D9 device");
+		CHECK_HR(hr = E_FAIL);
+	}
+
+
+	memset(&self->decoder.cuPaserParams, 0, sizeof(self->decoder.cuPaserParams));
+	self->decoder.cuPaserParams.CodecType = cudaVideoCodec_H264;
+    self->decoder.cuPaserParams.ulMaxNumDecodeSurfaces = PLUGIN_CUDA_H264_MAX_FRM_CNT;
+    self->decoder.cuPaserParams.pUserData = self;
+    self->decoder.cuPaserParams.pfnSequenceCallback = _NVCallback_HandleVideoSequence;
+    self->decoder.cuPaserParams.pfnDecodePicture = _NVCallback_HandlePictureDecode;
+    self->decoder.cuPaserParams.pfnDisplayPicture = _NVCallback_HandlePictureDisplay;
+    cuResult = cuvidCreateVideoParser(&self->decoder.cuParser, &self->decoder.cuPaserParams);
+	if(cuResult != CUDA_SUCCESS)
+	{
+		TSK_DEBUG_ERROR("cuvidCreateVideoParser(0) failed with error code = %d", (int)cuResult);
+		CHECK_HR(hr = E_FAIL);
+	}
+
+	cuResult = cuvidCreateDecoder(&self->decoder.pInst, &self->decoder.cuInfo);
+	if(CUDA_SUCCESS != cuResult)
+	{
+		TSK_DEBUG_ERROR("cuvidCreateDecoder failed with error code=%d", (int)cuResult);
+		CHECK_HR(hr = E_FAIL);
+	}
+
+	if(!self->decoder.phMutex && !(self->decoder.phMutex = tsk_mutex_create()))
+	{
+		TSK_DEBUG_ERROR("Failed to create mutex");
+		CHECK_HR(hr = E_FAIL);
+	}
+	
+bail:
+	return SUCCEEDED(hr) ? 0 : -1;
+}
+
+int cuda_codec_h264_close_decoder(cuda_codec_h264_t* self)
+{
+	if(self)
+	{
+		if(self->decoder.pInst)
+		{
+			cuvidDestroyDecoder(self->decoder.pInst);
+			self->decoder.pInst = NULL;
+		}
+		if(self->decoder.cuContext)
+		{
+			cuCtxDestroy(self->decoder.cuContext);
+			self->decoder.cuContext = NULL;
+		}
+		SafeRelease(&self->decoder.pD3D9Device);
+		SafeRelease(&self->decoder.pD3D9);
+		if(self->decoder.cuParser)
+		{
+			cuvidDestroyVideoParser(self->decoder.cuParser);
+			self->decoder.cuParser = NULL;
+		}
+		{/* cuBuffer.XXX */
+			if(self->decoder.cuBuffer.pcuPtr)
+			{
+				cuMemFreeHost(self->decoder.cuBuffer.pcuPtr);
+				self->decoder.cuBuffer.pcuPtr = NULL;
+			}
+			self->decoder.cuBuffer.nSize = self->decoder.cuBuffer.nPitch = 0;
+			self->decoder.cuBuffer.bAvail = tsk_false;
+		}
+		
+		if(self->decoder.phMutex)
+		{
+			tsk_mutex_destroy(&self->decoder.phMutex);
+		}
+		
+		TSK_FREE(self->decoder.accumulator);
+		self->decoder.accumulator_pos = 0;
+	}
+
+	return 0;
+}
+
+int cuda_codec_h264_init(cuda_codec_h264_t* self, profile_idc_t profile)
+{
+	int ret = 0;
+	level_idc_t level;
+	tdav_codec_h264_common_t* common = (tdav_codec_h264_common_t*)self;
+
+	if(!self)
+	{
+		TSK_DEBUG_ERROR("Invalid parameter");
+		return -1;
+	}
+
+	CudaUtils::Startup();
+
+	if((ret = tdav_codec_h264_common_init(common)))
+	{
+		TSK_DEBUG_ERROR("cuda_codec_h264_common_init() faile with error code=%d", ret);
+		return ret;
+	}
+
+	if((ret = tdav_codec_h264_common_level_from_size(TMEDIA_CODEC_VIDEO(self)->out.width, TMEDIA_CODEC_VIDEO(self)->out.height, &level)))
+	{
+		TSK_DEBUG_ERROR("Failed to find level for size=[%u, %u]", TMEDIA_CODEC_VIDEO(self)->out.width, TMEDIA_CODEC_VIDEO(self)->out.height);
+		return ret;
+	}
+
+	(self)->encoder.max_bw_kpbs = tmedia_defaults_get_bandwidth_video_upload_max();
+	common->pack_mode = H264_PACKETIZATION_MODE;
+	common->profile = profile;
+	common->level = level;
+	TMEDIA_CODEC_VIDEO(self)->in.max_mbps = TMEDIA_CODEC_VIDEO(self)->out.max_mbps = H264_MAX_MBPS*1000;
+	TMEDIA_CODEC_VIDEO(self)->in.max_br = TMEDIA_CODEC_VIDEO(self)->out.max_br = H264_MAX_BR*1000;
+	
+	TMEDIA_CODEC_VIDEO(self)->in.chroma = tmedia_chroma_nv12; // decoder
+	TMEDIA_CODEC_VIDEO(self)->out.chroma = tmedia_chroma_yuv420p; // encoder
+
+	self->encoder.quality = 1;
+	
+	return ret;
+}
+
+int cuda_codec_h264_deinit(cuda_codec_h264_t* self)
+{
+	if(!self)
+	{
+		TSK_DEBUG_ERROR("Invalid parameter");
+		return -1;
+	}
+
+	cuda_codec_h264_close((tmedia_codec_t*)self);
+
+	return 0;
+}
+
+static inline tsk_size_t _cuda_codec_h264_pict_layout(cuda_codec_h264_t* self, void**output, tsk_size_t *output_size)
+{
+	if(self && self->decoder.cuBuffer.pcuPtr && self->decoder.cuBuffer.nSize)
+	{
+		const unsigned int w = TMEDIA_CODEC_VIDEO(self)->in.width;
+		const unsigned int w_div_2 = (w >> 1);
+		const unsigned int h = TMEDIA_CODEC_VIDEO(self)->in.height;
+		const unsigned int h_div_2 = (h >> 1);
+		const unsigned int pitch = self->decoder.cuBuffer.nPitch;
+		const unsigned int pitch_div_2 = (pitch >> 1);
+		const tsk_size_t xsize = (w * h * 3) >> 1;
+		// resize if too short
+		if(*output_size < xsize)
+		{
+			if((*output = tsk_realloc(*output, xsize)))
+			{
+				*output_size = xsize;
+			}
+			else
+			{
+				*output_size = 0;
+				return 0;
+			}
+		}
+		
+
+		register unsigned int y;
+        const unsigned char *p = (const unsigned char *)self->decoder.cuBuffer.pcuPtr, *q = p + (h * pitch);
+        register unsigned char *i = (unsigned char *)*output, *j = i + (h * w);
+
+        for (y = 0; y < h; y++)
+		{
+			 // luma
+            memcpy(i, p, w);
+			i += w;
+			p += pitch;
+
+			 // chroma
+			memcpy(j, &q[(y&1) ? w_div_2 : 0], w_div_2);
+			j += w_div_2;
+			if(y&1)
+			{
+				q += pitch;
+			}
+        }
+		
+		return xsize;
+	}
+	return 0;
+}
+
+
+
+static int CUDAAPI _NVCallback_HandleVideoSequence(void *pvUserData, CUVIDEOFORMAT *pFormat)
+{
+	cuda_codec_h264_t* h264 = (cuda_codec_h264_t*)pvUserData;
+	CUresult cuResult;
+
+	if(!h264 || !pFormat)
+	{
+		TSK_DEBUG_ERROR("Invalid parameter");
+		return 0;//error
+	}
+
+	tsk_mutex_lock(h264->decoder.phMutex);
+
+	int ret = 1;
+	// http://corecodec.com/products/coreavc/guide
+	// CROP 1088 to 1080
+	// H.264 encoded video size is always a multiple of 16, and sequences that are 1080 pixels high are encoded as 1088 padded at the bottom. 
+	// Also H.264 specifications provides a set of cropping parameters to signal that parts of the encoded picture are not important and should not be displayed. 
+	// Some H.264 encoders fail to specify cropping parameters when encoding 1080 video.
+	int newWidth = pFormat->coded_width;//pFormat->display_area.right - pFormat->display_area.left;
+	int newHeight = pFormat->coded_height;//pFormat->display_area.bottom - pFormat->display_area.top;
+
+	if(newWidth != TMEDIA_CODEC_VIDEO(h264)->in.width || pFormat->coded_height != newHeight)
+	{
+		TSK_DEBUG_INFO("[H.264 CUDA decoder] display area = left:%d, right:%d, bottom:%d, top:%d",
+				pFormat->display_area.left,
+				pFormat->display_area.right,
+				pFormat->display_area.bottom,
+				pFormat->display_area.top
+			);
+
+		h264->decoder.cuInfo.ulWidth = newWidth;
+		h264->decoder.cuInfo.ulTargetWidth = newWidth;
+		h264->decoder.cuInfo.ulHeight = newHeight;
+		h264->decoder.cuInfo.ulTargetHeight = newHeight;
+
+		CUresult cuResult = cuCtxPushCurrent(h264->decoder.cuContext);
+		if(cuResult != CUDA_SUCCESS)
+		{
+			TSK_DEBUG_ERROR("cuCtxPushCurrent failed with error code=%d", (int)cuResult);
+			ret = 0; //error
+			goto bail;
+		}
+
+		if(h264->decoder.pInst)
+		{
+			cuvidDestroyDecoder(h264->decoder.pInst);
+			h264->decoder.pInst = NULL;
+		}
+		cuResult = cuvidCreateDecoder(&h264->decoder.pInst, &h264->decoder.cuInfo);
+		if(CUDA_SUCCESS != cuResult)
+		{
+			TSK_DEBUG_ERROR("cuvidCreateDecoder failed with error code=%d", (int)cuResult);
+			ret = 0; //error
+			goto bail;
+		}
+		else
+		{
+			TMEDIA_CODEC_VIDEO(h264)->in.width = /*pFormat->coded_width*/newWidth;
+			TMEDIA_CODEC_VIDEO(h264)->in.height = /*pFormat->coded_height*/newHeight;
+			ret = 1; //success
+		}
+	}
+bail:
+	cuResult = cuCtxPopCurrent(NULL);
+	tsk_mutex_unlock(h264->decoder.phMutex);
+	return ret;//success
+}
+
+static int CUDAAPI _NVCallback_HandlePictureDecode(void *pvUserData, CUVIDPICPARAMS *pPicParams)
+{
+	cuda_codec_h264_t* h264 = (cuda_codec_h264_t*)pvUserData;
+	if(!h264 || !pPicParams)
+	{
+		TSK_DEBUG_ERROR("Invalid parameter");
+		return 0;//error
+	}
+
+	tsk_mutex_lock(h264->decoder.phMutex);
+	CUresult cuResult = cuvidDecodePicture(h264->decoder.pInst, pPicParams);
+	tsk_mutex_unlock(h264->decoder.phMutex);
+
+    if(cuResult != CUDA_SUCCESS)
+	{
+		TSK_DEBUG_ERROR("cuvidDecodePicture failed with error code= %d", cuResult);
+		return 0;//error
+    }
+
+	return 1;//success
+}
+
+static int CUDAAPI _NVCallback_HandlePictureDisplay(void *pvUserData, CUVIDPARSERDISPINFO *pPicParams)
+{
+	cuda_codec_h264_t* h264 = (cuda_codec_h264_t*)pvUserData;
+	CUVIDPROCPARAMS vpp = {0};
+	CUdeviceptr devPtr;
+	CUresult cuResult;
+	tsk_size_t nv12_size;
+	tsk_bool_t mapped = tsk_false;
+	int ret = 1;//success
+	
+	if(!h264 || !pPicParams)
+	{
+		TSK_DEBUG_ERROR("Invalid parameter");
+		return 0;//error
+	}
+
+	cuResult = cuCtxPushCurrent(h264->decoder.cuContext);
+	if(cuResult != CUDA_SUCCESS)
+	{
+		TSK_DEBUG_ERROR("cuCtxPushCurrent failed with error code = %d", (int)cuResult);
+		ret = 0;//error
+		goto bail;
+	}
+	
+	vpp.progressive_frame = pPicParams->progressive_frame;
+	vpp.top_field_first = pPicParams->top_field_first;
+	cuResult = cuvidMapVideoFrame(h264->decoder.pInst, pPicParams->picture_index, &devPtr, &h264->decoder.cuBuffer.nPitch, &vpp);
+	
+	if(cuResult != CUDA_SUCCESS)
+	{
+		TSK_DEBUG_ERROR("cuvidMapVideoFrame failed with error code = %d", (int)cuResult);
+		ret = 0;//error
+		goto bail;
+	}
+	mapped = tsk_true;
+	nv12_size = ((h264->decoder.cuBuffer.nPitch * TMEDIA_CODEC_VIDEO(h264)->in.height) * 3) >> 1;
+	if ((!h264->decoder.cuBuffer.pcuPtr) || (nv12_size > h264->decoder.cuBuffer.nSize))
+	{
+		h264->decoder.cuBuffer.nSize = 0;
+		if (h264->decoder.cuBuffer.pcuPtr)
+		{
+			cuResult = cuMemFreeHost(h264->decoder.cuBuffer.pcuPtr);
+			h264->decoder.cuBuffer.pcuPtr = NULL;
+		}
+		cuResult = cuMemAllocHost((void**)&h264->decoder.cuBuffer.pcuPtr, nv12_size);
+		if (cuResult != CUDA_SUCCESS)
+		{
+			TSK_DEBUG_ERROR("cuMemAllocHost failed to allocate %d bytes (error code=%d)", nv12_size, (int)cuResult);
+			h264->decoder.cuBuffer.pcuPtr = tsk_null;
+			h264->decoder.cuBuffer.nSize = 0;
+			ret = 0;//error
+		}
+		else
+		{
+			h264->decoder.cuBuffer.nSize = nv12_size;
+		}
+	}
+	if(h264->decoder.cuBuffer.pcuPtr)
+	{
+		cuResult = cuMemcpyDtoH(h264->decoder.cuBuffer.pcuPtr, devPtr, nv12_size);
+	}
+
+bail:
+	if(mapped)
+	{
+		cuResult = cuvidUnmapVideoFrame(h264->decoder.pInst, devPtr);
+	}
+	cuResult = cuCtxPopCurrent(NULL);
+	
+	h264->decoder.cuBuffer.bAvail = (ret == 1);
+	return ret;
+}
+
+static unsigned char* CUDAAPI _NVCallback_HandleAcquireBitStream(int *pBufferSize, void *pUserdata)
+{
+	cuda_codec_h264_t* h264 = (cuda_codec_h264_t*)pUserdata;
+	if(!h264 || !pBufferSize)
+	{
+		TSK_DEBUG_ERROR("Invalid parameter");
+		return tsk_null;
+	}
+	
+	*pBufferSize = (int)h264->encoder.nBufferSize;
+	return (unsigned char*)h264->encoder.pBufferPtr;
+}
+
+static void CUDAAPI _NVCallback_HandleReleaseBitStream(int nBytesInBuffer, unsigned char *cb, void *pUserdata)
+{
+	tdav_codec_h264_common_t* common = (tdav_codec_h264_common_t*)pUserdata;
+	if(!common || !cb || !nBytesInBuffer)
+	{
+		TSK_DEBUG_ERROR("Invalid parameter");
+		return;
+	}
+	tdav_codec_h264_rtp_encap(common, (const uint8_t*)cb, (tsk_size_t)nBytesInBuffer);
+}
+
+static void CUDAAPI _NVCallback_HandleOnBeginFrame(const NVVE_BeginFrameInfo *pbfi, void *pUserdata)
+{
+    return;
+}
+
+static void CUDAAPI _NVCallback_HandleOnEndFrame(const NVVE_EndFrameInfo *pefi, void *pUserdata)
+{
+    return;
+}
+\ No newline at end of file
diff --git a/plugins/pluginCUDA/plugin_cuda_config.h b/plugins/pluginCUDA/plugin_cuda_config.h
new file mode 100644
index 0000000..4fceebb
--- /dev/null
+++ b/plugins/pluginCUDA/plugin_cuda_config.h
@@ -0,0 +1,75 @@
+/* Copyright (C) 2013 Mamadou DIOP
+* Copyright (C) 2013 Doubango Telecom <http://www.doubango.org>
+*	
+* This file is part of Open Source Doubango Framework.
+*
+* DOUBANGO is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*	
+* DOUBANGO is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*	
+* You should have received a copy of the GNU General Public License
+* along with DOUBANGO.
+*/
+#ifndef PLUGIN_CUDA_CONFIG_H
+#define PLUGIN_CUDA_CONFIG_H
+
+#ifdef __SYMBIAN32__
+#undef _WIN32 /* Because of WINSCW */
+#endif
+
+
+// Windows (XP/Vista/7/CE and Windows Mobile) macro definition
+#if defined(WIN32)|| defined(_WIN32) || defined(_WIN32_WCE)
+#	define PLUGIN_CUDA_UNDER_WINDOWS	1
+#	if defined(WINAPI_FAMILY) && (WINAPI_FAMILY == WINAPI_FAMILY_PHONE_APP || WINAPI_FAMILY == WINAPI_FAMILY_APP)
+#		define PLUGIN_CUDA_UNDER_WINDOWS_RT		1
+#	endif
+#endif
+
+#if (PLUGIN_CUDA_UNDER_WINDOWS || defined(__SYMBIAN32__)) && defined(PLUGIN_CUDA_EXPORTS)
+# 	define PLUGIN_CUDA_API		__declspec(dllexport)
+# 	define PLUGIN_CUDA_GEXTERN extern __declspec(dllexport)
+#elif (PLUGIN_CUDA_UNDER_WINDOWS || defined(__SYMBIAN32__)) && !defined(PLUGIN_CUDA_IMPORTS_IGNORE)
+# 	define PLUGIN_CUDA_API __declspec(dllimport)
+# 	define PLUGIN_CUDA_GEXTERN __declspec(dllimport)
+#else
+#	define PLUGIN_CUDA_API
+#	define PLUGIN_CUDA_GEXTERN	extern
+#endif
+
+// x86
+#if defined(__x86_64__) || defined(__x86__) || defined(__i386__)
+#	define PLUGIN_CUDA_UNDER_X86				1
+#endif
+
+// Guards against C++ name mangling 
+#ifdef __cplusplus
+#	define PLUGIN_CUDA_BEGIN_DECLS extern "C" {
+#	define PLUGIN_CUDA_END_DECLS }
+#else
+#	define PLUGIN_CUDA_BEGIN_DECLS 
+#	define PLUGIN_CUDA_END_DECLS
+#endif
+
+#ifdef _MSC_VER
+#	define inline __inline
+#	define _CRT_SECURE_NO_WARNINGS
+#	define _ALLOW_KEYWORD_MACROS
+#endif
+
+#include <stdint.h>
+#ifdef __SYMBIAN32__
+#include <stdlib.h>
+#endif
+
+#if HAVE_CONFIG_H
+	#include <config.h>
+#endif
+
+#endif // PLUGIN_CUDA_CONFIG_H
diff --git a/plugins/pluginCUDA/plugin_cuda_tdav.cxx b/plugins/pluginCUDA/plugin_cuda_tdav.cxx
new file mode 100644
index 0000000..2d16b72
--- /dev/null
+++ b/plugins/pluginCUDA/plugin_cuda_tdav.cxx
@@ -0,0 +1,20 @@
+/* Copyright (C) 2013 Mamadou DIOP
+* Copyright (C) 2013 Doubango Telecom <http://www.doubango.org>
+*	
+* This file is part of Open Source Doubango Framework.
+*
+* DOUBANGO is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*	
+* DOUBANGO is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*	
+* You should have received a copy of the GNU General Public License
+* along with DOUBANGO.
+*/
+// This file is used to avoid duplication for the .obj files
+#include "../../tinyDAV/src/codecs/h264/tdav_codec_h264_rtp.c"
diff --git a/plugins/pluginCUDA/plugin_cuda_utils.cxx b/plugins/pluginCUDA/plugin_cuda_utils.cxx
new file mode 100644
index 0000000..94c7baf
--- /dev/null
+++ b/plugins/pluginCUDA/plugin_cuda_utils.cxx
@@ -0,0 +1,168 @@
+/* Copyright (C) 2013 Mamadou DIOP
+* Copyright (C) 2013 Doubango Telecom <http://www.doubango.org>
+*	
+* This file is part of Open Source Doubango Framework.
+*
+* DOUBANGO is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*	
+* DOUBANGO is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*	
+* You should have received a copy of the GNU General Public License
+* along with DOUBANGO.
+*/
+#include "plugin_cuda_utils.h"
+
+#include "tsk_debug.h"
+
+#include <NVEncoderAPI.h>
+#include <cuda.h>
+#include <cuda_runtime_api.h>
+
+bool CudaUtils::g_bStarted = false;
+bool CudaUtils::g_bH264Checked = false;
+bool CudaUtils::g_bH264Supported = false;
+int CudaUtils::g_nCores = 0;
+
+HRESULT CudaUtils::Startup()
+{
+	if(!g_bStarted)
+	{
+		CUresult cuResult = CUDA_SUCCESS;
+		HRESULT hr = CoInitializeEx(NULL, COINIT_MULTITHREADED);
+		if(SUCCEEDED(hr) || hr == 0x80010106) // 0x80010106 when called from managed code (e.g. Boghe) - More info: http://support.microsoft.com/kb/824480
+		{
+			if((cuResult = cuInit(0)) != CUDA_SUCCESS)
+			{
+				TSK_DEBUG_ERROR("cuInit() failed with error code = %08x", cuResult);
+				hr = E_FAIL;
+			}
+			else
+			{	
+				hr = S_OK;
+			}
+		}
+		g_bStarted = true;
+		return hr;
+	}
+	return S_OK;
+}
+
+HRESULT CudaUtils::Shutdown()
+{
+	// cuDeinit();
+	return S_OK;
+}
+
+bool CudaUtils::IsH264Supported()
+{
+	if(g_bH264Checked)
+	{
+		return g_bH264Supported;
+	}
+
+	HRESULT hr = S_OK;
+
+	CHECK_HR(hr = Startup());
+
+	g_bH264Checked = true;
+	
+	NVEncoder pEncoder = NULL;
+
+	CHECK_HR(hr = NVGetHWEncodeCaps());
+	CHECK_HR(hr = NVCreateEncoder(&pEncoder));
+	// Both Base and Main profiles *must* be supported
+	CHECK_HR(hr = NVIsSupportedCodecProfile(pEncoder, NV_CODEC_TYPE_H264, NVVE_H264_PROFILE_BASELINE));
+	CHECK_HR(hr = NVIsSupportedCodecProfile(pEncoder, NV_CODEC_TYPE_H264, NVVE_H264_PROFILE_MAIN));
+	
+	g_bH264Supported = true;
+	
+bail:
+	if(pEncoder)
+	{
+		NVDestroyEncoder(pEncoder);
+		pEncoder = NULL;
+	}
+
+	return g_bH264Supported;
+}
+
+int CudaUtils::ConvertSMVer2Cores(int nMajor, int nMinor)
+{
+	if(g_nCores != 0)
+	{
+		return g_nCores;
+	}
+
+	// Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
+    typedef struct
+    {
+        int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
+        int Cores;
+    } sSMtoCores;
+
+    sSMtoCores nGpuArchCoresPerSM[] =
+    {
+        { 0x10,  8 }, // Tesla Generation (SM 1.0) G80 class
+        { 0x11,  8 }, // Tesla Generation (SM 1.1) G8x class
+        { 0x12,  8 }, // Tesla Generation (SM 1.2) G9x class
+        { 0x13,  8 }, // Tesla Generation (SM 1.3) GT200 class
+        { 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class
+        { 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class
+        { 0x30, 192}, // Kepler Generation (SM 3.0) GK10x class
+        { 0x35, 192}, // Kepler Generation (SM 3.5) GK11x class
+    };
+
+    int index = 0;
+
+    while (nGpuArchCoresPerSM[index].SM != -1)
+    {
+        if (nGpuArchCoresPerSM[index].SM == ((nMajor << 4) + nMinor))
+        {
+            g_nCores = nGpuArchCoresPerSM[index].Cores;
+			break;
+        }
+
+        index++;
+    }
+
+    // If we don't find the values, we default use the previous one to run properly
+    TSK_DEBUG_INFO("MapSMtoCores for SM %d.%d is undefined.  Default to use %d Cores/SM", nMajor, nMinor, nGpuArchCoresPerSM[7].Cores);
+    g_nCores = nGpuArchCoresPerSM[7].Cores;
+
+	return g_nCores;
+}
+
+int CudaUtils::GetMaxGflopsDeviceId()
+{
+	int device_count = 0;
+	cudaGetDeviceCount( &device_count );
+
+	cudaDeviceProp device_properties;
+	int max_gflops_device = 0;
+	int max_gflops = 0;
+	
+	int current_device = 0;
+	cudaGetDeviceProperties( &device_properties, current_device );
+	max_gflops = device_properties.multiProcessorCount * device_properties.clockRate;
+	++current_device;
+
+	while( current_device < device_count )
+	{
+		cudaGetDeviceProperties( &device_properties, current_device );
+		int gflops = device_properties.multiProcessorCount * device_properties.clockRate;
+		if( gflops > max_gflops )
+		{
+			max_gflops        = gflops;
+			max_gflops_device = current_device;
+		}
+		++current_device;
+	}
+
+	return max_gflops_device;
+}
+\ No newline at end of file
diff --git a/plugins/pluginCUDA/plugin_cuda_utils.h b/plugins/pluginCUDA/plugin_cuda_utils.h
new file mode 100644
index 0000000..4829275
--- /dev/null
+++ b/plugins/pluginCUDA/plugin_cuda_utils.h
@@ -0,0 +1,56 @@
+/* Copyright (C) 2013 Mamadou DIOP
+* Copyright (C) 2013 Doubango Telecom <http://www.doubango.org>
+*	
+* This file is part of Open Source Doubango Framework.
+*
+* DOUBANGO is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*	
+* DOUBANGO is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*	
+* You should have received a copy of the GNU General Public License
+* along with DOUBANGO.
+*/
+#ifndef PLUGIN_CUDA_UTILS_H
+#define PLUGIN_CUDA_UTILS_H
+
+#include "plugin_cuda_config.h"
+
+#include <Windows.h>
+
+#undef CHECK_HR
+// In CHECK_HR(x) When (x) is a function it will be executed twice when used in "TSK_DEBUG_ERROR(x)" and "If(x)"
+#define CHECK_HR(x) { HRESULT __hr__ = (x); if (FAILED(__hr__)) { TSK_DEBUG_ERROR("Operation Failed (%08x)", __hr__); goto bail; } }
+
+#undef SafeRelease
+#define SafeRelease(ppT) \
+{ \
+    if (*ppT) \
+    { \
+        (*ppT)->Release(); \
+        *ppT = NULL; \
+    } \
+} 
+
+class CudaUtils
+{
+public:
+	static HRESULT Startup();
+	static HRESULT Shutdown();
+	static bool IsH264Supported();
+	static int ConvertSMVer2Cores(int nMajor, int nMinor);
+	static int GetMaxGflopsDeviceId();
+
+private:
+	static bool g_bStarted;
+	static bool g_bH264Checked;
+	static bool g_bH264Supported;
+	static int g_nCores;
+};
+
+#endif/* PLUGIN_CUDA_UTILS_H */
diff --git a/plugins/pluginCUDA/version.rc b/plugins/pluginCUDA/version.rc
new file mode 100644
index 0000000..bd81664
--- /dev/null
+++ b/plugins/pluginCUDA/version.rc
@@ -0,0 +1,102 @@
+// Microsoft Visual C++ generated resource script.
+//
+// #include "resource.h"
+
+#define APSTUDIO_READONLY_SYMBOLS
+/////////////////////////////////////////////////////////////////////////////
+//
+// Generated from the TEXTINCLUDE 2 resource.
+//
+#include "afxres.h"
+
+/////////////////////////////////////////////////////////////////////////////
+#undef APSTUDIO_READONLY_SYMBOLS
+
+/////////////////////////////////////////////////////////////////////////////
+// English (U.S.) resources
+
+#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU)
+#ifdef _WIN32
+LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US
+#pragma code_page(1252)
+#endif //_WIN32
+
+#ifdef APSTUDIO_INVOKED
+/////////////////////////////////////////////////////////////////////////////
+//
+// TEXTINCLUDE
+//
+
+1 TEXTINCLUDE 
+BEGIN
+    "resource.h\0"
+END
+
+2 TEXTINCLUDE 
+BEGIN
+    "#include ""afxres.h""\r\n"
+    "\0"
+END
+
+3 TEXTINCLUDE 
+BEGIN
+    "\r\n"
+    "\0"
+END
+
+#endif    // APSTUDIO_INVOKED
+
+
+/////////////////////////////////////////////////////////////////////////////
+//
+// Version
+//
+
+VS_VERSION_INFO VERSIONINFO
+ FILEVERSION 2.0.0.1156
+ PRODUCTVERSION 2.0.0.1156
+ FILEFLAGSMASK 0x17L
+#ifdef _DEBUG
+ FILEFLAGS 0x1L
+#else
+ FILEFLAGS 0x0L
+#endif
+ FILEOS 0x4L
+ FILETYPE 0x2L
+ FILESUBTYPE 0x0L
+BEGIN
+    BLOCK "StringFileInfo"
+    BEGIN
+        BLOCK "040904b0"
+        BEGIN
+            VALUE "CompanyName", "Doubango Telecom"
+            VALUE "FileDescription", "Doubango IMS Framework NVIDIA CUDA Plugin"
+            VALUE "FileVersion", "2.0.0.1156"
+            VALUE "InternalName", "pluginCUDA.dll"
+            VALUE "LegalCopyright", "(c) 2010-2013 Doubango Telecom.  All rights reserved."
+            VALUE "OriginalFilename", "pluginCUDA.dll"
+            VALUE "ProductName", "Doubango IMS Framework NVIDIA CUDA Plugin"
+            VALUE "ProductVersion", "2.0.0.1156"
+        END
+    END
+    BLOCK "VarFileInfo"
+    BEGIN
+        VALUE "Translation", 0x409, 1200
+    END
+END
+
+#endif    // English (U.S.) resources
+/////////////////////////////////////////////////////////////////////////////
+
+
+
+#ifndef APSTUDIO_INVOKED
+/////////////////////////////////////////////////////////////////////////////
+//
+// Generated from the TEXTINCLUDE 3 resource.
+//
+
+
+/////////////////////////////////////////////////////////////////////////////
+#endif    // not APSTUDIO_INVOKED
+