summaryrefslogtreecommitdiffstats
path: root/tinyDAV/src/video/directx/tdav_producer_screencast_ddraw.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'tinyDAV/src/video/directx/tdav_producer_screencast_ddraw.cxx')
-rw-r--r--tinyDAV/src/video/directx/tdav_producer_screencast_ddraw.cxx1542
1 files changed, 1542 insertions, 0 deletions
diff --git a/tinyDAV/src/video/directx/tdav_producer_screencast_ddraw.cxx b/tinyDAV/src/video/directx/tdav_producer_screencast_ddraw.cxx
new file mode 100644
index 0000000..13507db
--- /dev/null
+++ b/tinyDAV/src/video/directx/tdav_producer_screencast_ddraw.cxx
@@ -0,0 +1,1542 @@
+/* Copyright (C) 2015 Mamadou DIOP.
+* Copyright (C) 2015 Doubango Telecom.
+*
+* This file is part of Open Source Doubango Framework.
+*
+* DOUBANGO is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* DOUBANGO is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with DOUBANGO.
+*/
+#include "tinydav/video/directx/tdav_producer_screencast_ddraw.h"
+
+#if TDAV_UNDER_WINDOWS && !TDAV_UNDER_WINDOWS_RT
+
+#include <windows.h>
+#include <ddraw.h>
+
+#include "tsk_thread.h"
+#include "tsk_memory.h"
+#include "tsk_safeobj.h"
+#include "tsk_timer.h"
+#include "tsk_time.h"
+#include "tsk_string.h"
+#include "tsk_debug.h"
+
+#if defined(_MSC_VER)
+# define DDRAW_HAVE_RGB32_TO_I420 1
+# if !TDAV_UNDER_WINDOWS_CE
+# define DDRAW_HAVE_RGB32_TO_I420_INTRIN 1
+# include <intrin.h>
+# endif /* TDAV_UNDER_WINDOWS_CE */
+# if !defined(_M_X64) /*|| _MSC_VER <= 1500*/ // https://msdn.microsoft.com/en-us/library/4ks26t93.aspx: Inline assembly is not supported on the ARM and x64 processors (1500 = VS2008)
+# define DDRAW_HAVE_RGB32_TO_I420_ASM 1
+# endif
+#endif /* _MSC_VER */
+
+#if !defined(DDRAW_MEM_ALIGNMENT)
+# define DDRAW_MEM_ALIGNMENT 16 // SSE = 16, AVX = 32. Should be 16.
+#endif /* DDRAW_MEM_ALIGNMENT */
+
+#if !defined(DDRAW_IS_ALIGNED)
+# define DDRAW_IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a) - 1)))
+#endif /* DDRAW_IS_ALIGNED */
+
+#if !defined(DDRAW_HIGH_PRIO_MEMCPY)
+# define DDRAW_HIGH_PRIO_MEMCPY 0 // BOOL
+#endif /* DDRAW_HIGH_PRIO_MEMCPY */
+
+#if !defined(DDRAW_CPU_MONITOR)
+# define DDRAW_CPU_MONITOR 0 // BOOL
+#endif /* DDRAW_CPU_MONITOR */
+
+#if !defined(DDRAW_CPU_THROTTLING)
+# define DDRAW_CPU_THROTTLING 0 // BOOL
+#endif /* DDRAW_CPU_THROTTLING */
+
+#if (DDRAW_CPU_MONITOR || DDRAW_CPU_THROTTLING) && !defined(DDRAW_CPU_SCHEDULE_TIMEOUT)
+# define DDRAW_CPU_SCHEDULE_TIMEOUT 800 // millis
+#endif /* DDRAW_CPU_MONITOR */
+
+#if defined(DDRAW_CPU_THROTTLING) && !defined(DDRAW_CPU_THROTTLING_FPS_MIN)
+# define DDRAW_CPU_THROTTLING_FPS_MIN 1 // frames per second
+#endif /* DDRAW_CPU_THROTTLING_FPS_MIN */
+
+#if defined(DDRAW_CPU_THROTTLING) && !defined(DDRAW_CPU_THROTTLING_THRESHOLD)
+# define DDRAW_CPU_THROTTLING_THRESHOLD 70 // percent
+#endif /* DDRAW_CPU_THROTTLING_THRESHOLD */
+
+#if defined(DDRAW_CPU_THROTTLING) && !defined(DDRAW_CPU_THROTTLING_THRESHOLD_MARGIN)
+# define DDRAW_CPU_THROTTLING_THRESHOLD_MARGIN 5 // percent
+#endif /* DDRAW_CPU_THROTTLING_THRESHOLD_MARGIN */
+
+#if !defined(DDRAW_MT)
+# define DDRAW_MT 1 // BOOL: Multi-threading
+#endif /* DDRAW_MT */
+
+#if defined (DDRAW_MT) && !defined(DDRAW_MT_COUNT)
+# define DDRAW_MT_COUNT 3 // Number of buffers to use
+#endif /* DDRAW_MT_COUNT */
+
+#if defined(DDRAW_MT_COUNT)
+# define DDRAW_MT_EVENT_SHUTDOWN_INDEX DDRAW_MT_COUNT
+#endif
+
+#if !defined(DDRAW_MEM_SURFACE_DIRECT_ACCESS)
+# define DDRAW_MEM_SURFACE_DIRECT_ACCESS 0 // direct access to "ddsd.lpSurface" is very slow even if the memory is correctly aligned: to be investigated
+#endif /* DDRAW_MEM_SURFACE_DIRECT_ACCESS */
+
+#if !defined(DDRAW_PREVIEW)
+# if TDAV_UNDER_WINDOWS_CE && (BUILD_TYPE_GE || SIN_CITY)
+# define DDRAW_PREVIEW 0 // Do not waste time displaying the preview on "WEC7 + (GE | SINCITY)"
+# else
+# define DDRAW_PREVIEW 1
+# endif
+#endif
+
+#define DDRAW_DEBUG_INFO(FMT, ...) TSK_DEBUG_INFO("[DDRAW Producer] " FMT, ##__VA_ARGS__)
+#define DDRAW_DEBUG_WARN(FMT, ...) TSK_DEBUG_WARN("[DDRAW Producer] " FMT, ##__VA_ARGS__)
+#define DDRAW_DEBUG_ERROR(FMT, ...) TSK_DEBUG_ERROR("[DDRAW Producer] " FMT, ##__VA_ARGS__)
+#define DDRAW_DEBUG_FATAL(FMT, ...) TSK_DEBUG_FATAL("[DDRAW Producer] " FMT, ##__VA_ARGS__)
+
+#define DDRAW_SAFE_RELEASE(pp) if ((pp) && *(pp)) (*(pp))->Release(), *(pp) = NULL
+#define DDRAW_CHECK_HR(x) { HRESULT __hr__ = (x); if (FAILED(__hr__)) { DDRAW_DEBUG_ERROR("Operation Failed (%08x)", __hr__); goto bail; } }
+
+typedef struct DDrawModule {
+ LPDIRECTDRAW lpDD;
+ HMODULE hDLL;
+}DDrawModule;
+typedef struct DDrawModule FAR *LPDDrawModule;
+#define DDrawModuleSafeFree(module) DDRAW_SAFE_RELEASE(&module.lpDD); if (module.hDLL) { FreeLibrary(module.hDLL), module.hDLL = NULL; }
+
+typedef struct tdav_producer_screencast_ddraw_s
+{
+ TMEDIA_DECLARE_PRODUCER;
+
+ HWND hwnd_preview;
+ HWND hwnd_src;
+#if DDRAW_PREVIEW
+ BITMAPINFO bi_preview;
+#endif /* DDRAW_PREVIEW */
+
+#if DDRAW_CPU_MONITOR || DDRAW_CPU_THROTTLING
+ tsk_timer_manager_handle_t *p_timer_mgr;
+ struct {
+ tsk_timer_id_t id_timer;
+ int fps_target;
+ } cpu;
+#endif /* DDRAW_CPU_MONITOR || DDRAW_CPU_THROTTLING */
+
+#if DDRAW_MT
+ struct{
+ tsk_thread_handle_t* tid[1];
+ void* p_buff_yuv_aligned_array[DDRAW_MT_COUNT];
+ BOOL b_flags_array[DDRAW_MT_COUNT];
+ HANDLE h_events[DDRAW_MT_COUNT + 1]; // #DDRAW_MT_COUNT events for each buffer plus #1 for the shutdown/stop
+ } mt;
+#endif /* DDRAW_MT */
+
+ DDrawModule ddrawModule;
+ IDirectDrawSurface* p_surf_primary;
+
+ tsk_thread_handle_t* tid[1];
+
+ void* p_buff_rgb_aligned;
+ tsk_size_t n_buff_rgb;
+ tsk_size_t n_buff_rgb_bitscount;
+
+ void* p_buff_yuv_aligned;
+ tsk_size_t n_buff_yuv;
+
+ BOOL b_have_rgb32_conv; // support for RGB32 -> I420 and primary screen format is RGB32
+
+ tsk_bool_t b_started;
+ tsk_bool_t b_paused;
+ tsk_bool_t b_muted;
+
+ TSK_DECLARE_SAFEOBJ;
+}
+tdav_producer_screencast_ddraw_t;
+
+static BOOL _tdav_producer_screencast_have_ssse3();
+static tmedia_chroma_t _tdav_producer_screencast_get_chroma(const DDPIXELFORMAT* pixelFormat);
+static void* TSK_STDCALL _tdav_producer_screencast_grap_thread(void *arg);
+#if DDRAW_MT
+static void* TSK_STDCALL _tdav_producer_screencast_mt_encode_thread(void *arg);
+#endif /* DDRAW_MT */
+static int _tdav_producer_screencast_timer_cb(const void* arg, tsk_timer_id_t timer_id);
+static int _tdav_producer_screencast_grab(tdav_producer_screencast_ddraw_t* p_self);
+static HRESULT _tdav_producer_screencast_create_module(LPDDrawModule lpModule);
+static HRESULT _tdav_producer_screencast_alloc_rgb_buff(tdav_producer_screencast_ddraw_t* p_self, DWORD w, DWORD h, DWORD bitsCount);
+static HRESULT _tdav_producer_screencast_alloc_yuv_buff(tdav_producer_screencast_ddraw_t* p_self, DWORD w, DWORD h);
+
+#if DDRAW_HAVE_RGB32_TO_I420_INTRIN || DDRAW_HAVE_RGB32_TO_I420_ASM
+static __declspec(align(DDRAW_MEM_ALIGNMENT)) const int8_t kYCoeffs[16] = {
+ 13, 65, 33, 0,
+ 13, 65, 33, 0,
+ 13, 65, 33, 0,
+ 13, 65, 33, 0,
+ };
+ static __declspec(align(DDRAW_MEM_ALIGNMENT)) const int8_t kUCoeffs[16] = {
+ 112, -74, -38, 0,
+ 112, -74, -38, 0,
+ 112, -74, -38, 0,
+ 112, -74, -38, 0,
+ };
+ static __declspec(align(DDRAW_MEM_ALIGNMENT)) const int8_t kVCoeffs[16] = {
+ -18, -94, 112, 0,
+ -18, -94, 112, 0,
+ -18, -94, 112, 0,
+ -18, -94, 112, 0,
+ };
+ static __declspec(align(DDRAW_MEM_ALIGNMENT)) const int32_t kRGBAShuffleDuplicate[4] = { 0x03020100, 0x0b0a0908, 0x03020100, 0x0b0a0908 }; // RGBA(X) || RGBA(X + 2) || RGBA(X) || RGBA(X + 2) = 2U || 2V
+ static __declspec(align(DDRAW_MEM_ALIGNMENT)) const uint16_t kY16[8] = {
+ 16, 16, 16, 16,
+ 16, 16, 16, 16
+ };
+ static __declspec(align(DDRAW_MEM_ALIGNMENT)) const uint16_t kUV128[8] = {
+ 128, 128, 128, 128,
+ 128, 128, 128, 128
+ };
+#endif /* DDRAW_HAVE_RGB32_TO_I420_INTRIN || DDRAW_HAVE_RGB32_TO_I420_ASM */
+
+// public function used to check that we can use DDRAW plugin before loading it
+tsk_bool_t tdav_producer_screencast_ddraw_plugin_is_supported()
+{
+ static tsk_bool_t __checked = tsk_false; // static guard to avoid checking more than once
+ static tsk_bool_t __supported = tsk_false;
+
+ HRESULT hr = DD_OK;
+ DDSURFACEDESC ddsd;
+ DDPIXELFORMAT DDPixelFormat;
+ LPDIRECTDRAWSURFACE lpDDS = NULL;
+ DDrawModule ddrawModule = { 0 };
+
+ if (__checked) {
+ goto bail;
+ }
+
+ __checked = tsk_true;
+
+ DDRAW_CHECK_HR(hr = _tdav_producer_screencast_create_module(&ddrawModule));
+ DDRAW_CHECK_HR(hr = ddrawModule.lpDD->SetCooperativeLevel(NULL, DDSCL_NORMAL));
+
+ ZeroMemory(&ddsd, sizeof(ddsd));
+ ddsd.dwSize = sizeof(ddsd);
+ ddsd.dwFlags = DDSD_CAPS;
+ ddsd.ddsCaps.dwCaps = DDSCAPS_PRIMARYSURFACE;
+
+ DDRAW_CHECK_HR(hr = ddrawModule.lpDD->CreateSurface(&ddsd, &lpDDS, NULL));
+
+ ZeroMemory(&DDPixelFormat, sizeof(DDPixelFormat));
+ DDPixelFormat.dwSize = sizeof(DDPixelFormat);
+ DDRAW_CHECK_HR(hr = lpDDS->GetPixelFormat(&DDPixelFormat));
+ DDRAW_DEBUG_INFO("dwRGBBitCount:%d, dwRBitMask:%x, dwGBitMask:%x, dwBBitMask:%x, dwRGBAlphaBitMask:%x",
+ DDPixelFormat.dwRGBBitCount, DDPixelFormat.dwRBitMask, DDPixelFormat.dwGBitMask, DDPixelFormat.dwBBitMask, DDPixelFormat.dwRGBAlphaBitMask);
+ if (_tdav_producer_screencast_get_chroma(&DDPixelFormat) == tmedia_chroma_none) {
+ DDRAW_CHECK_HR(hr = DDERR_INVALIDCAPS);
+ }
+
+ __supported = SUCCEEDED(hr);
+
+bail:
+ DDRAW_SAFE_RELEASE(&lpDDS);
+ DDrawModuleSafeFree(ddrawModule);
+ return __supported;
+}
+
+static BOOL _tdav_producer_screencast_have_ssse3()
+{
+ static BOOL __checked = FALSE; // static guard to avoid checking more than once
+ static BOOL __supported = FALSE;
+
+ if (__checked) {
+ return __supported;
+ }
+ __checked = TRUE;
+
+#ifndef BIT
+# define BIT(n) (1<<n)
+#endif /*BIT*/
+#if DDRAW_HAVE_RGB32_TO_I420_ASM
+ #define cpuid(func, func2, a, b, c, d)\
+ __asm mov eax, func\
+ __asm mov ecx, func2\
+ __asm cpuid\
+ __asm mov a, eax\
+ __asm mov b, ebx\
+ __asm mov c, ecx\
+ __asm mov d, edx
+
+#define HAS_MMX 0x01
+#define HAS_SSE 0x02
+#define HAS_SSE2 0x04
+#define HAS_SSE3 0x08
+#define HAS_SSSE3 0x10
+#define HAS_SSE4_1 0x20
+#define HAS_AVX 0x40
+#define HAS_AVX2 0x80
+
+ unsigned int reg_eax, reg_ebx, reg_ecx, reg_edx;
+ cpuid(0, 0, reg_eax, reg_ebx, reg_ecx, reg_edx);
+ if (reg_eax < 1) {
+ DDRAW_DEBUG_ERROR("reg_eax < 1");
+ return FALSE;
+ }
+ cpuid(1, 0, reg_eax, reg_ebx, reg_ecx, reg_edx);
+ __supported = (reg_ecx & BIT(9)) ? TRUE : FALSE;
+#elif DDRAW_HAVE_RGB32_TO_I420_INTRIN
+ int cpu_info[4] = { 0 }, num_ids;
+ __cpuid(cpu_info, 0);
+ num_ids = cpu_info[0];
+ __cpuid(cpu_info, 0x80000000);
+ if (num_ids > 0) {
+ __cpuid(cpu_info, 0x00000001);
+ __supported = (cpu_info[2] & BIT(9)) ? TRUE : FALSE;
+ }
+#endif /* DDRAW_HAVE_RGB32_TO_I420_ASM */
+
+ DDRAW_DEBUG_INFO("SSSE3 supported = %s", __supported ? "YES" : "NO");
+
+ return __supported;
+}
+
+#if DDRAW_HAVE_RGB32_TO_I420_INTRIN
+
+#define DDRAW_COPY16_INTRIN(dst, src) \
+ _mm_store_si128((__m128i*)dst, _mm_load_si128((__m128i*)src))
+#define DDRAW_COPY64_INTRIN(dst, src) \
+ _mm_store_si128((__m128i*)dst, _mm_load_si128((__m128i*)src)); \
+ _mm_store_si128((__m128i*)&dst[16], _mm_load_si128((__m128i*)&src[16])); \
+ _mm_store_si128((__m128i*)&dst[32], _mm_load_si128((__m128i*)&src[32])); \
+ _mm_store_si128((__m128i*)&dst[48], _mm_load_si128((__m128i*)&src[48]))
+#define DDRAW_COPY128_INTRIN(dst, src) \
+ DDRAW_COPY64_INTRIN(dst, src); \
+ _mm_store_si128((__m128i*)&dst[64], _mm_load_si128((__m128i*)&src[64])); \
+ _mm_store_si128((__m128i*)&dst[80], _mm_load_si128((__m128i*)&src[80])); \
+ _mm_store_si128((__m128i*)&dst[96], _mm_load_si128((__m128i*)&src[96])); \
+ _mm_store_si128((__m128i*)&dst[112], _mm_load_si128((__m128i*)&src[112]))
+
+static void _tdav_producer_screencast_rgb32_to_yuv420_intrin_ssse3(uint8_t *yuvPtr, const uint8_t *rgbPtr, int width, int height)
+{
+ // rgbPtr contains (samplesCount * 16) bytes
+ // yPtr contains samplesCount bytes
+ const int samplesCount = (width * height); // "width" and "height" are in samples
+ const uint8_t *rgbPtr_;
+ uint8_t* yPtr_ = yuvPtr, *uPtr_ = (yPtr_ + samplesCount), *vPtr_ = uPtr_ + (samplesCount >> 2);
+ __m128i mmRgb0, mmRgb1, mmRgb2, mmRgb3, mmY0, mmY1, mmY;
+ __m128i mmRgbU0, mmRgbU1, mmRgbV0, mmRgbV1;
+
+ // Convert 16 RGBA samples to 16 Y samples
+ rgbPtr_ = rgbPtr;
+ /* const */__m128i yCoeffs = _mm_load_si128((__m128i*)kYCoeffs);
+ /* const */__m128i y16 = _mm_load_si128((__m128i*)kY16);
+ for(int i = 0; i < samplesCount; i += 16)
+ {
+ // load 16 RGBA samples
+ _mm_store_si128(&mmRgb0, _mm_load_si128((__m128i*)rgbPtr_)); // 4 RGBA samples
+ _mm_store_si128(&mmRgb1, _mm_load_si128((__m128i*)&rgbPtr_[16])); // 4 RGBA samples
+ _mm_store_si128(&mmRgb2, _mm_load_si128((__m128i*)&rgbPtr_[32])); // 4 RGBA samples
+ _mm_store_si128(&mmRgb3, _mm_load_si128((__m128i*)&rgbPtr_[48])); // 4 RGBA samples
+
+ _mm_store_si128(&mmRgb0, _mm_maddubs_epi16(mmRgb0/*unsigned*/, yCoeffs/*signed*/)); // mmRgb0 = ((yCoeffs[j] * mmRgb0[j]) + (yCoeffs[j + 1] * mmRgb0[j + 1]))
+ _mm_store_si128(&mmRgb1, _mm_maddubs_epi16(mmRgb1/*unsigned*/, yCoeffs/*signed*/));
+ _mm_store_si128(&mmRgb2, _mm_maddubs_epi16(mmRgb2/*unsigned*/, yCoeffs/*signed*/));
+ _mm_store_si128(&mmRgb3, _mm_maddubs_epi16(mmRgb3/*unsigned*/, yCoeffs/*signed*/));
+
+ _mm_store_si128(&mmY0, _mm_hadd_epi16(mmRgb0, mmRgb1)); // horizontal add
+ _mm_store_si128(&mmY1, _mm_hadd_epi16(mmRgb2, mmRgb3));
+
+ _mm_store_si128(&mmY0, _mm_srai_epi16(mmY0, 7)); // >> 7
+ _mm_store_si128(&mmY1, _mm_srai_epi16(mmY1, 7));
+
+ _mm_store_si128(&mmY0, _mm_add_epi16(mmY0, y16)); // + 16
+ _mm_store_si128(&mmY1, _mm_add_epi16(mmY1, y16));
+
+ _mm_store_si128(&mmY, _mm_packus_epi16(mmY0, mmY1)); // Saturate(I16 -> U8)
+
+ _mm_store_si128((__m128i*)yPtr_, mmY);
+
+ rgbPtr_ += 64; // 16samples * 4bytes
+ yPtr_ += 16; // 16samples * 1byte
+ }
+
+ // U+V planes
+ /* const */__m128i uCoeffs = _mm_load_si128((__m128i*)kUCoeffs);
+ /* const */__m128i vCoeffs = _mm_load_si128((__m128i*)kVCoeffs);
+ /* const */__m128i rgbaShuffleDuplicate = _mm_load_si128((__m128i*)kRGBAShuffleDuplicate);
+ /* const */__m128i uv128 = _mm_load_si128((__m128i*)kUV128);
+ rgbPtr_ = rgbPtr;
+ for(int i = 0; i < samplesCount; )
+ {
+ // load 16 RGBA samples
+ _mm_store_si128(&mmRgb0, _mm_load_si128((__m128i*)rgbPtr_)); // 4 RGBA samples
+ _mm_store_si128(&mmRgb1, _mm_load_si128((__m128i*)&rgbPtr_[16])); // 4 RGBA samples
+ _mm_store_si128(&mmRgb2, _mm_load_si128((__m128i*)&rgbPtr_[32])); // 4 RGBA samples
+ _mm_store_si128(&mmRgb3, _mm_load_si128((__m128i*)&rgbPtr_[48])); // 4 RGBA samples
+
+ _mm_store_si128(&mmRgb0, _mm_shuffle_epi8(mmRgb0, rgbaShuffleDuplicate));
+ _mm_store_si128(&mmRgb1, _mm_shuffle_epi8(mmRgb1, rgbaShuffleDuplicate));
+ _mm_store_si128(&mmRgb2, _mm_shuffle_epi8(mmRgb2, rgbaShuffleDuplicate));
+ _mm_store_si128(&mmRgb3, _mm_shuffle_epi8(mmRgb3, rgbaShuffleDuplicate));
+
+ _mm_store_si128(&mmRgbU0, _mm_unpacklo_epi64(mmRgb0, mmRgb1));
+ _mm_store_si128(&mmRgbV0, _mm_unpackhi_epi64(mmRgb0, mmRgb1)); // same as mmRgbU0: Use _mm_store_si128??
+ _mm_store_si128(&mmRgbU1, _mm_unpacklo_epi64(mmRgb2, mmRgb3));
+ _mm_store_si128(&mmRgbV1, _mm_unpackhi_epi64(mmRgb2, mmRgb3)); // same as mmRgbU0: Use _mm_store_si128??
+
+ _mm_store_si128(&mmRgbU0, _mm_maddubs_epi16(mmRgbU0/*unsigned*/, uCoeffs/*signed*/));
+ _mm_store_si128(&mmRgbV0, _mm_maddubs_epi16(mmRgbV0/*unsigned*/, vCoeffs/*signed*/));
+ _mm_store_si128(&mmRgbU1, _mm_maddubs_epi16(mmRgbU1/*unsigned*/, uCoeffs/*signed*/));
+ _mm_store_si128(&mmRgbV1, _mm_maddubs_epi16(mmRgbV1/*unsigned*/, vCoeffs/*signed*/));
+
+ _mm_store_si128(&mmY0, _mm_hadd_epi16(mmRgbU0, mmRgbU1)); // horizontal add
+ _mm_store_si128(&mmY1, _mm_hadd_epi16(mmRgbV0, mmRgbV1));
+
+ _mm_store_si128(&mmY0, _mm_srai_epi16(mmY0, 8)); // >> 8
+ _mm_store_si128(&mmY1, _mm_srai_epi16(mmY1, 8));
+
+ _mm_store_si128(&mmY0, _mm_add_epi16(mmY0, uv128)); // + 128
+ _mm_store_si128(&mmY1, _mm_add_epi16(mmY1, uv128));
+
+ // Y contains 8 samples for U then 8 samples for V
+ _mm_store_si128(&mmY, _mm_packus_epi16(mmY0, mmY1)); // Saturate(I16 -> U8)
+ _mm_storel_pi((__m64*)uPtr_, _mm_load_ps((float*)&mmY));
+ _mm_storeh_pi((__m64*)vPtr_, _mm_load_ps((float*)&mmY));
+
+ uPtr_ += 8; // 8samples * 1byte
+ vPtr_ += 8; // 8samples * 1byte
+
+ // move to next 16 samples
+ i += 16;
+ rgbPtr_ += 64; // 16samples * 4bytes
+
+ if (/*i % width == 0*/ !(i & (width - 1)))
+ {
+ // skip next line
+ i += width;
+ rgbPtr_ += (width * 4);
+ }
+ }
+}
+#endif /* DDRAW_HAVE_RGB32_TO_I420_INTRIN */
+
+#if DDRAW_HAVE_RGB32_TO_I420_ASM
+
+// __asm keyword must be duplicated in macro: https://msdn.microsoft.com/en-us/library/aa293825(v=vs.60).aspx
+#define DDRAW_COPY16_ASM(dst, src) \
+ __asm { \
+ __asm mov eax, dword ptr [src] \
+ __asm mov ecx, dword ptr [dst] \
+ \
+ __asm movdqa xmm0, xmmword ptr [eax] \
+ __asm movdqa xmmword ptr [ecx], xmm0 \
+ }
+#define DDRAW_COPY64_ASM(dst, src) \
+ __asm { \
+ __asm mov eax, dword ptr [src] \
+ __asm mov ecx, dword ptr [dst] \
+ \
+ __asm movdqa xmm0, xmmword ptr [eax] \
+ __asm add eax, dword ptr 16 \
+ __asm movdqa xmm1, xmmword ptr [eax] \
+ __asm add eax, dword ptr 16 \
+ __asm movdqa xmm2, xmmword ptr [eax] \
+ __asm add eax, dword ptr 16 \
+ __asm movdqa xmm3, xmmword ptr [eax] \
+ \
+ __asm movdqa xmmword ptr [ecx], xmm0 \
+ __asm add ecx, dword ptr 16 \
+ __asm movdqa xmmword ptr [ecx], xmm1 \
+ __asm add ecx, dword ptr 16 \
+ __asm movdqa xmmword ptr [ecx], xmm2 \
+ __asm add ecx, dword ptr 16 \
+ __asm movdqa xmmword ptr [ecx], xmm3 \
+ }
+#define DDRAW_COPY128_ASM(dst, src) \
+ __asm { \
+ __asm mov eax, dword ptr [src] \
+ __asm mov ecx, dword ptr [dst] \
+ \
+ __asm movdqa xmm0, xmmword ptr [eax] \
+ __asm add eax, dword ptr 16 \
+ __asm movdqa xmm1, xmmword ptr [eax] \
+ __asm add eax, dword ptr 16 \
+ __asm movdqa xmm2, xmmword ptr [eax] \
+ __asm add eax, dword ptr 16 \
+ __asm movdqa xmm3, xmmword ptr [eax] \
+ __asm add eax, dword ptr 16 \
+ __asm movdqa xmm4, xmmword ptr [eax] \
+ __asm add eax, dword ptr 16 \
+ __asm movdqa xmm5, xmmword ptr [eax] \
+ __asm add eax, dword ptr 16 \
+ __asm movdqa xmm6, xmmword ptr [eax] \
+ __asm add eax, dword ptr 16 \
+ __asm movdqa xmm7, xmmword ptr [eax] \
+ \
+ __asm movdqa xmmword ptr [ecx], xmm0 \
+ __asm add ecx, dword ptr 16 \
+ __asm movdqa xmmword ptr [ecx], xmm1 \
+ __asm add ecx, dword ptr 16 \
+ __asm movdqa xmmword ptr [ecx], xmm2 \
+ __asm add ecx, dword ptr 16 \
+ __asm movdqa xmmword ptr [ecx], xmm3 \
+ __asm add ecx, dword ptr 16 \
+ __asm movdqa xmmword ptr [ecx], xmm4 \
+ __asm add ecx, dword ptr 16 \
+ __asm movdqa xmmword ptr [ecx], xmm5 \
+ __asm add ecx, dword ptr 16 \
+ __asm movdqa xmmword ptr [ecx], xmm6 \
+ __asm add ecx, dword ptr 16 \
+ __asm movdqa xmmword ptr [ecx], xmm7 \
+ }
+
+__declspec(naked) __declspec(align(DDRAW_MEM_ALIGNMENT))
+static void _tdav_producer_screencast_rgb32_to_yuv420_asm_ssse3(uint8_t *yuvPtr, const uint8_t *rgbPtr, int width, int height)
+{
+ __asm {
+ push esi
+ push edi
+ push ebx
+ /*** Y Samples ***/
+ mov edx, [esp + 12 + 4] // yuvPtr
+ mov eax, [esp + 12 + 8] // rgbPtr
+ mov ecx, [esp + 12 + 12] // width
+ imul ecx, [esp + 12 + 16] // (width * height) = samplesCount
+
+ movdqa xmm7, kYCoeffs // yCoeffs
+ movdqa xmm6, kY16 // y16
+ /* loopY start */
+loopY:
+ // load 16 RGBA samples
+ movdqa xmm0, [eax] // mmRgb0
+ movdqa xmm1, [eax + 16] // mmRgb1
+ movdqa xmm2, [eax + 32] // mmRgb2
+ movdqa xmm3, [eax + 48] // mmRgb3
+ lea eax, [eax + 64] // rgbPtr_ += 64
+ // (yCoeffs[0] * mmRgbX[0]) + (yCoeffs[1] * mmRgbX[1])
+ pmaddubsw xmm0, xmm7
+ pmaddubsw xmm1, xmm7
+ pmaddubsw xmm2, xmm7
+ pmaddubsw xmm3, xmm7
+ // horizontal add
+ phaddw xmm0, xmm1
+ phaddw xmm2, xmm3
+ // >> 7
+ psraw xmm0, 7
+ psraw xmm2, 7
+ // + 16
+ paddw xmm0, xmm6
+ paddw xmm2, xmm6
+ // Saturate(I16 -> U8) - Packs
+ packuswb xmm0, xmm2
+ // Copy to yuvPtr
+ movdqa [edx], xmm0
+ lea edx, [edx + 16] // yPtr_ += 16
+ sub ecx, 16 // samplesCount -= 16
+ jnz loopY // goto loop if (samplesCount != 0)
+
+ //==================================//
+ //=========== UV Samples ===========//
+ //==================================//
+ mov esi, [esp + 12 + 4] // yuvPtr
+ mov eax, [esp + 12 + 8] // rgbPtr
+ mov ecx, [esp + 12 + 12] // width
+ imul ecx, [esp + 12 + 16] // (width * height) = samplesCount
+ mov edx, ecx
+ shr edx, 2 // edx = samplesCount / 4
+ add esi, ecx // [[esi = uPtr_]]
+ mov edi, esi // edi = uPtr_
+ add edi, edx // [[edi = uPtr_ + edx = uPtr_ + (samplesCount / 4) = vPtr_]]
+ xor edx, edx // edx = 0 = i
+ mov ebx, [esp + 12 + 12] // ebx = width
+ sub ebx, 1 // ebx = (width - 1)
+
+ movdqa xmm7, kUCoeffs // uCoeffs
+ movdqa xmm6, kVCoeffs // vCoeffs
+ movdqa xmm5, kRGBAShuffleDuplicate // rgbaShuffleDuplicate
+ movdqa xmm4, kUV128 // uv128
+
+ /* loopUV start */
+loopUV:
+ // load 16 RGBA samples
+ movdqa xmm0, [eax] // mmRgb0
+ movdqa xmm1, [eax + 16] // mmRgb1
+ movdqa xmm2, [eax + 32] // mmRgb2
+ movdqa xmm3, [eax + 48] // mmRgb3
+ lea eax, [eax + 64] // rgbPtr_ += 64
+
+ pshufb xmm0, xmm5
+ pshufb xmm1, xmm5
+ pshufb xmm2, xmm5
+ pshufb xmm3, xmm5
+
+ punpcklqdq xmm0, xmm1 // mmRgbU0
+ punpcklqdq xmm2, xmm3 // mmRgbU1
+ movdqa xmm1, xmm0 // mmRgbV0
+ movdqa xmm3, xmm2 // mmRgbV1
+
+ pmaddubsw xmm0, xmm7 // mmRgbU0
+ pmaddubsw xmm1, xmm6 // mmRgbV0
+ pmaddubsw xmm2, xmm7 // mmRgbU1
+ pmaddubsw xmm3, xmm6 // mmRgbV1
+
+ phaddw xmm0, xmm2 // mmY0
+ phaddw xmm1, xmm3 // mmY1
+
+ psraw xmm0, 8
+ psraw xmm1, 8
+
+ paddw xmm0, xmm4
+ paddw xmm1, xmm4
+
+ packuswb xmm0, xmm1
+ movlps [esi], xmm0
+ movhps [edi], xmm0
+
+ lea esi, [esi + 8]
+ lea edi, [edi + 8]
+
+ add edx, 16 // i += 16;
+ push edx // save edx
+ and edx, ebx // edx = (ebx & ebx) = (ebx & (width - 1)) = (ebx % width)
+ cmp edx, 0 // (ebx % width) == 0 ?
+ pop edx // restore edx
+ jne loopUV_NextLine
+
+ // loopUV_EndOfLine: ((ebx % width) == 0)
+ add ebx, 1// change ebx value from width-1 to width
+ add edx, ebx // i += width
+ lea eax, [eax + 4 * ebx]// rgbPtr_ += (width * 4);
+ sub ebx, 1// change back ebx value to width - 1
+loopUV_NextLine:
+ cmp edx, ecx
+ jl loopUV
+
+ pop ebx
+ pop edi
+ pop esi
+ ret
+ }
+}
+#endif /* DDRAW_HAVE_RGB32_TO_I420_ASM */
+
+/* ============ Media Producer Interface ================= */
+static int _tdav_producer_screencast_ddraw_set(tmedia_producer_t *p_self, const tmedia_param_t* pc_param)
+{
+ int ret = 0;
+ tdav_producer_screencast_ddraw_t* p_ddraw = (tdav_producer_screencast_ddraw_t*)p_self;
+
+ if (!p_ddraw || !pc_param) {
+ DDRAW_DEBUG_ERROR("Invalid parameter");
+ return -1;
+ }
+
+ if (pc_param->value_type == tmedia_pvt_int64) {
+ if (tsk_striequals(pc_param->key, "local-hwnd") || tsk_striequals(pc_param->key, "preview-hwnd")) {
+ p_ddraw->hwnd_preview = (HWND)*((int64_t*)pc_param->value);
+ }
+ else if (tsk_striequals(pc_param->key, "src-hwnd")) {
+ p_ddraw->hwnd_src = (HWND)*((int64_t*)pc_param->value);
+ }
+ }
+ else if (pc_param->value_type == tmedia_pvt_int32) {
+ if (tsk_striequals(pc_param->key, "mute")) {
+ p_ddraw->b_muted = (TSK_TO_INT32((uint8_t*)pc_param->value) != 0);
+ }
+ }
+
+ return ret;
+}
+
+
+static int _tdav_producer_screencast_ddraw_prepare(tmedia_producer_t* p_self, const tmedia_codec_t* pc_codec)
+{
+ tdav_producer_screencast_ddraw_t* p_ddraw = (tdav_producer_screencast_ddraw_t*)p_self;
+ int ret = 0;
+ HRESULT hr = DD_OK;
+#if 0
+ DDPIXELFORMAT DDPixelFormat;
+#endif
+ DDSURFACEDESC ddsd;
+
+ if (!p_ddraw || !pc_codec) {
+ DDRAW_DEBUG_ERROR("Invalid parameter");
+ DDRAW_CHECK_HR(hr = E_INVALIDARG);
+ }
+
+ tsk_safeobj_lock(p_ddraw);
+
+ // check support for DirectDraw again
+ if (!tdav_producer_screencast_ddraw_plugin_is_supported()) {
+ DDRAW_CHECK_HR(hr = E_FAIL);
+ }
+
+ TMEDIA_PRODUCER(p_ddraw)->video.fps = TMEDIA_CODEC_VIDEO(pc_codec)->out.fps;
+ TMEDIA_PRODUCER(p_ddraw)->video.width = TMEDIA_CODEC_VIDEO(pc_codec)->out.width;
+ TMEDIA_PRODUCER(p_ddraw)->video.height = TMEDIA_CODEC_VIDEO(pc_codec)->out.height;
+
+ // Hack the codec to avoid flipping
+ TMEDIA_CODEC_VIDEO(pc_codec)->out.flip = tsk_false;
+
+ DDRAW_DEBUG_INFO("Prepare with fps:%d, width:%d; height:%d", TMEDIA_PRODUCER(p_ddraw)->video.fps, TMEDIA_PRODUCER(p_ddraw)->video.width, TMEDIA_PRODUCER(p_ddraw)->video.height);
+
+ if (!p_ddraw->ddrawModule.lpDD || !p_ddraw->ddrawModule.hDLL) {
+ DDRAW_CHECK_HR(hr = _tdav_producer_screencast_create_module(&p_ddraw->ddrawModule));
+ }
+ DDRAW_CHECK_HR(hr = p_ddraw->ddrawModule.lpDD->SetCooperativeLevel(NULL, DDSCL_NORMAL));
+
+ if (!p_ddraw->p_surf_primary) {
+ ZeroMemory(&ddsd, sizeof(ddsd));
+ ddsd.dwSize = sizeof(ddsd);
+ ddsd.dwFlags = DDSD_CAPS;
+ ddsd.ddsCaps.dwCaps = DDSCAPS_PRIMARYSURFACE;
+
+ DDRAW_CHECK_HR(hr = p_ddraw->ddrawModule.lpDD->CreateSurface(&ddsd, &p_ddraw->p_surf_primary, NULL));
+ }
+#if 0
+ ZeroMemory(&DDPixelFormat, sizeof(DDPixelFormat));
+ DDPixelFormat.dwSize = sizeof(DDPixelFormat);
+ DDRAW_CHECK_HR(hr = DDRAW_VTBL(p_ddraw->p_surf_primary)->GetPixelFormat(p_ddraw->p_surf_primary, &DDPixelFormat));
+ DDRAW_DEBUG_INFO("dwRGBBitCount:%d, dwRBitMask:%x, dwGBitMask:%x, dwBBitMask:%x, dwRGBAlphaBitMask:%x",
+ DDPixelFormat.dwRGBBitCount, DDPixelFormat.dwRBitMask, DDPixelFormat.dwGBitMask, DDPixelFormat.dwBBitMask, DDPixelFormat.dwRGBAlphaBitMask);
+ if ((TMEDIA_PRODUCER(p_ddraw)->video.chroma = _tdav_producer_screencast_get_chroma(&DDPixelFormat)) == tmedia_chroma_none) {
+ DDRAW_CHECK_HR(hr = DDERR_INVALIDCAPS);
+ }
+#else
+ ZeroMemory(&ddsd, sizeof(ddsd));
+ ddsd.dwSize = sizeof(ddsd);
+ ddsd.dwFlags = DDSD_HEIGHT | DDSD_WIDTH | DDSD_PITCH | DDSD_PIXELFORMAT;
+ DDRAW_CHECK_HR(hr = p_ddraw->p_surf_primary->GetSurfaceDesc(&ddsd));
+ DDRAW_DEBUG_INFO("Prepare with neg. width:%d, height:%d, pitch=%ld", ddsd.dwWidth, ddsd.dwHeight, ddsd.lPitch);
+ TMEDIA_PRODUCER(p_ddraw)->video.width = ddsd.dwWidth;
+ TMEDIA_PRODUCER(p_ddraw)->video.height = ddsd.dwHeight;
+ p_ddraw->n_buff_rgb_bitscount = ddsd.ddpfPixelFormat.dwRGBBitCount;
+ DDRAW_DEBUG_INFO("Prepare with dwRGBBitCount:%d, dwRBitMask:%x, dwGBitMask:%x, dwBBitMask:%x, dwRGBAlphaBitMask:%x",
+ ddsd.ddpfPixelFormat.dwRGBBitCount, ddsd.ddpfPixelFormat.dwRBitMask, ddsd.ddpfPixelFormat.dwGBitMask, ddsd.ddpfPixelFormat.dwBBitMask, ddsd.ddpfPixelFormat.dwRGBAlphaBitMask);
+ if ((TMEDIA_PRODUCER(p_ddraw)->video.chroma = _tdav_producer_screencast_get_chroma(&ddsd.ddpfPixelFormat)) == tmedia_chroma_none) {
+ DDRAW_CHECK_HR(hr = DDERR_INVALIDCAPS);
+ }
+#endif
+ // allocate RGB buffer
+ DDRAW_CHECK_HR(hr = _tdav_producer_screencast_alloc_rgb_buff(p_ddraw, ddsd.dwWidth, ddsd.dwHeight, ddsd.ddpfPixelFormat.dwRGBBitCount));
+
+ // Check if we can use built-in chroma conversion
+#if DDRAW_HAVE_RGB32_TO_I420_INTRIN || DDRAW_HAVE_RGB32_TO_I420_ASM
+ p_ddraw->b_have_rgb32_conv =
+ _tdav_producer_screencast_have_ssse3() // SSSE3 supported
+ && DDRAW_IS_ALIGNED(TMEDIA_PRODUCER(p_ddraw)->video.width, DDRAW_MEM_ALIGNMENT) // width multiple of 16
+ /* && DDRAW_IS_ALIGNED(TMEDIA_PRODUCER(p_ddraw)->video.height, DDRAW_MEM_ALIGNMENT) // height multiple of 16 */
+ && TMEDIA_PRODUCER(p_ddraw)->video.chroma == tmedia_chroma_rgb32; // Primary screen RGB32
+ if (p_ddraw->b_have_rgb32_conv) {
+ TMEDIA_PRODUCER(p_ddraw)->video.chroma = tmedia_chroma_yuv420p;
+ }
+#endif
+ DDRAW_DEBUG_INFO("RGB32 -> I420 conversion supported: %s", p_ddraw->b_have_rgb32_conv ? "YES" : "NO");
+
+ // allocate YUV buffer
+ if (p_ddraw->b_have_rgb32_conv) {
+ DDRAW_CHECK_HR(hr = _tdav_producer_screencast_alloc_yuv_buff(p_ddraw, (DWORD)TMEDIA_PRODUCER(p_ddraw)->video.width, (DWORD)TMEDIA_PRODUCER(p_ddraw)->video.height));
+ }
+
+ // BitmapInfo for preview
+#if DDRAW_PREVIEW
+ ZeroMemory(&p_ddraw->bi_preview, sizeof(p_ddraw->bi_preview));
+ p_ddraw->bi_preview.bmiHeader.biSize = (DWORD)sizeof(BITMAPINFOHEADER);
+ p_ddraw->bi_preview.bmiHeader.biCompression = BI_RGB;
+ p_ddraw->bi_preview.bmiHeader.biPlanes = 1;
+ p_ddraw->bi_preview.bmiHeader.biWidth = ddsd.dwWidth;
+ p_ddraw->bi_preview.bmiHeader.biHeight = ddsd.dwHeight;
+ p_ddraw->bi_preview.bmiHeader.biBitCount = (WORD)ddsd.ddpfPixelFormat.dwRGBBitCount;
+ p_ddraw->bi_preview.bmiHeader.biSizeImage = (p_ddraw->bi_preview.bmiHeader.biWidth * p_ddraw->bi_preview.bmiHeader.biHeight * (p_ddraw->bi_preview.bmiHeader.biBitCount >> 3));
+#endif /* DDRAW_PREVIEW */
+
+#if DDRAW_CPU_MONITOR || DDRAW_CPU_THROTTLING
+ if (!p_ddraw->p_timer_mgr) {
+ p_ddraw->p_timer_mgr = tsk_timer_manager_create();
+ }
+#endif /* DDRAW_CPU_MONITOR ||DDRAW_CPU_THROTTLING */
+
+#if DDRAW_CPU_THROTTLING
+ p_ddraw->cpu.fps_target = (TMEDIA_PRODUCER(p_ddraw)->video.fps + DDRAW_CPU_THROTTLING_FPS_MIN) >> 1; // start with minimum fps and increase the value based on the fps
+#endif /* DDRAW_CPU_THROTTLING */
+
+bail:
+ tsk_safeobj_unlock(p_ddraw);
+ return SUCCEEDED(hr) ? 0 : -1;
+}
+
+static int _tdav_producer_screencast_ddraw_start(tmedia_producer_t* p_self)
+{
+ tdav_producer_screencast_ddraw_t* p_ddraw = (tdav_producer_screencast_ddraw_t*)p_self;
+ int ret = 0;
+
+ if (!p_ddraw) {
+ DDRAW_DEBUG_ERROR("Invalid parameter");
+ return -1;
+ }
+
+ ret = tsk_safeobj_lock(p_ddraw);
+
+ p_ddraw->b_paused = tsk_false;
+
+ if (p_ddraw->b_started) {
+ DDRAW_DEBUG_INFO("Already started");
+ goto bail;
+ }
+
+ p_ddraw->b_started = tsk_true;
+
+ // Create notify events (must be done here before starting the grabber thread)
+#if DDRAW_MT
+ for (int i = 0; i < sizeof(p_ddraw->mt.h_events) / sizeof(p_ddraw->mt.h_events[0]); ++i) {
+ if (!p_ddraw->mt.h_events[i] && !(p_ddraw->mt.h_events[i] = CreateEvent(NULL, FALSE, FALSE, NULL))) {
+ DDRAW_DEBUG_ERROR("Failed to create event at %d", i);
+ ret = -1;
+ goto bail;
+ }
+ }
+#endif /* DDRAW_MT */
+
+ ret = tsk_thread_create(&p_ddraw->tid[0], _tdav_producer_screencast_grap_thread, p_ddraw);
+ if (ret != 0) {
+ DDRAW_DEBUG_ERROR("Failed to create thread");
+ goto bail;
+ }
+ //BOOL okSetTA = CeSetThreadAffinity((HANDLE)p_ddraw->tid[0], 0x01);
+#if DDRAW_MT
+ ret = tsk_thread_create(&p_ddraw->mt.tid[0], _tdav_producer_screencast_mt_encode_thread, p_ddraw);
+ if (ret != 0) {
+ DDRAW_DEBUG_ERROR("Failed to create thread");
+ goto bail;
+ }
+ //okSetTA = CeSetThreadAffinity((HANDLE)p_ddraw->mt.tid[0], 0x02);
+#endif /* DDRAW_MT */
+#if DDRAW_HIGH_PRIO_MEMCPY
+ if (p_ddraw->tid[0]) {
+ tsk_thread_set_priority(p_ddraw->tid[0], TSK_THREAD_PRIORITY_TIME_CRITICAL);
+ }
+#if DDRAW_MT
+ if (p_ddraw->mt.tid[0]) {
+ tsk_thread_set_priority(p_ddraw->mt.tid[0], TSK_THREAD_PRIORITY_TIME_CRITICAL);
+ }
+#endif /* DDRAW_MT */
+#endif /* DDRAW_HIGH_PRIO_MEMCPY */
+#if DDRAW_CPU_MONITOR || DDRAW_CPU_THROTTLING
+ ret = tsk_timer_manager_start(p_ddraw->p_timer_mgr);
+ if (ret == 0) {
+ p_ddraw->cpu.id_timer = tsk_timer_manager_schedule(p_ddraw->p_timer_mgr, DDRAW_CPU_SCHEDULE_TIMEOUT, _tdav_producer_screencast_timer_cb, p_ddraw);
+ }
+ else {
+ ret = 0; // not fatal error
+ DDRAW_DEBUG_WARN("Failed to start CPU timer");
+ }
+#endif /* DDRAW_CPU_MONITOR || DDRAW_CPU_THROTTLING */
+
+bail:
+ if (ret) {
+ p_ddraw->b_started = tsk_false;
+ if (p_ddraw->tid[0]) {
+ tsk_thread_join(&(p_ddraw->tid[0]));
+ }
+#if DDRAW_MT
+ if (p_ddraw->mt.tid[0]) {
+ tsk_thread_join(&(p_ddraw->mt.tid[0]));
+ }
+#endif /* DDRAW_MT */
+ }
+ ret = tsk_safeobj_unlock(p_ddraw);
+
+ return ret;
+}
+
+static int _tdav_producer_screencast_ddraw_pause(tmedia_producer_t* p_self)
+{
+ tdav_producer_screencast_ddraw_t* p_ddraw = (tdav_producer_screencast_ddraw_t*)p_self;
+
+ if (!p_ddraw) {
+ DDRAW_DEBUG_ERROR("Invalid parameter");
+ return -1;
+ }
+
+ tsk_safeobj_lock(p_ddraw);
+
+ p_ddraw->b_paused = tsk_true;
+ goto bail;
+
+bail:
+ tsk_safeobj_unlock(p_ddraw);
+
+ return 0;
+}
+
+static int _tdav_producer_screencast_ddraw_stop(tmedia_producer_t* p_self)
+{
+ tdav_producer_screencast_ddraw_t* p_ddraw = (tdav_producer_screencast_ddraw_t*)p_self;
+
+ if (!p_ddraw) {
+ DDRAW_DEBUG_ERROR("Invalid parameter");
+ return -1;
+ }
+
+ tsk_safeobj_lock(p_ddraw);
+
+ if (!p_ddraw->b_started) {
+ DDRAW_DEBUG_INFO("Already stopped");
+ goto bail;
+ }
+
+ p_ddraw->b_started = tsk_false;
+ p_ddraw->b_paused = tsk_false;
+
+#if DDRAW_CPU_MONITOR || DDRAW_CPU_THROTTLING
+ if (p_ddraw->p_timer_mgr) {
+ tsk_timer_manager_stop(p_ddraw->p_timer_mgr);
+ }
+#endif /* DDRAW_CPU_MONITOR ||DDRAW_CPU_THROTTLING */
+
+ // stop grabber thread
+ if (p_ddraw->tid[0]) {
+ tsk_thread_join(&(p_ddraw->tid[0]));
+ }
+
+#if DDRAW_MT
+ if (p_ddraw->mt.h_events[DDRAW_MT_EVENT_SHUTDOWN_INDEX]){
+ SetEvent(p_ddraw->mt.h_events[DDRAW_MT_EVENT_SHUTDOWN_INDEX]);
+ }
+ if (p_ddraw->mt.tid[0]) {
+ tsk_thread_join(&(p_ddraw->mt.tid[0]));
+ }
+ for (int i = 0; i < sizeof(p_ddraw->mt.h_events) / sizeof(p_ddraw->mt.h_events[0]); ++i) {
+ if (p_ddraw->mt.h_events[i]) {
+ CloseHandle(p_ddraw->mt.h_events[i]);
+ p_ddraw->mt.h_events[i] = NULL;
+ }
+ }
+#endif
+
+bail:
+ tsk_safeobj_unlock(p_ddraw);
+
+ return 0;
+}
+
+static int _tdav_producer_screencast_grab(tdav_producer_screencast_ddraw_t* p_self)
+{
+ int ret = 0;
+ HRESULT hr = S_OK;
+ DDSURFACEDESC ddsd;
+ DWORD nSizeWithoutPadding, nRowLengthInBytes, lockFlags;
+ tmedia_producer_t* p_base = TMEDIA_PRODUCER(p_self);
+ LPVOID lpBuffToSend, lpBuffYUV;
+ BOOL bDirectMemSurfAccess = DDRAW_MEM_SURFACE_DIRECT_ACCESS;
+#if DDRAW_MT
+ INT iMtFreeBuffIndex = -1;
+#endif
+ //--uint64_t timeStart, timeEnd;
+
+ //--timeStart = tsk_time_now();
+
+ if (!p_self) {
+ DDRAW_CHECK_HR(hr = E_INVALIDARG);
+ }
+
+ if (!p_self->b_started) {
+#if defined(E_ILLEGAL_METHOD_CALL)
+ DDRAW_CHECK_HR(hr = E_ILLEGAL_METHOD_CALL);
+#else
+ DDRAW_CHECK_HR(hr = E_FAIL);
+#endif
+ }
+
+#if DDRAW_MT
+ {
+ INT iIndex = 0;
+ for (; (iIndex < DDRAW_MT_COUNT) && (p_self->mt.b_flags_array[iIndex] == TRUE); ++iIndex);
+ if (iIndex == DDRAW_MT_COUNT) {
+ goto bail;
+ }
+ }
+#endif /* DDRAW_MT */
+
+ if (p_self->p_surf_primary->IsLost() == DDERR_SURFACELOST) {
+ DDRAW_CHECK_HR(hr = p_self->p_surf_primary->Restore());
+ }
+
+ ddsd.dwSize = sizeof(ddsd);
+ ddsd.dwFlags = DDSD_HEIGHT | DDSD_WIDTH | DDSD_PITCH | DDSD_PIXELFORMAT;
+ lockFlags = DDLOCK_READONLY |
+#if TDAV_UNDER_WINDOWS_CE
+ // This flag has a slightly different name under Windows CE vs. Desktop, but it's the same behavior.
+ DDLOCK_WAITNOTBUSY;
+#else
+ DDLOCK_WAIT;
+#endif
+ DDRAW_CHECK_HR(hr = p_self->p_surf_primary->Lock(NULL, &ddsd, lockFlags, NULL));
+ // make sure surface size and number of bits per pixel haven't changed
+ if (TMEDIA_PRODUCER(p_self)->video.width != ddsd.dwWidth || TMEDIA_PRODUCER(p_self)->video.height != ddsd.dwHeight || p_self->n_buff_rgb_bitscount != ddsd.ddpfPixelFormat.dwRGBBitCount) {
+ tsk_size_t n_buff_rgb_new;
+ tmedia_chroma_t chroma_new;
+ DDRAW_DEBUG_WARN("surface has changed: width %d<>%d or height %d<>%d or rgb_bits_count %d<>%d",
+ p_base->video.width, ddsd.dwWidth,
+ p_base->video.height, ddsd.dwHeight,
+ p_self->n_buff_rgb_bitscount, ddsd.ddpfPixelFormat.dwRGBBitCount);
+ if ((chroma_new = _tdav_producer_screencast_get_chroma(&ddsd.ddpfPixelFormat)) == tmedia_chroma_none) {
+ DDRAW_CHECK_HR(hr = DDERR_INVALIDCAPS);
+ }
+ // allocate RGB buffer
+ n_buff_rgb_new = (ddsd.dwWidth * ddsd.dwHeight * (ddsd.ddpfPixelFormat.dwRGBBitCount >> 3));
+ if (p_self->n_buff_rgb < n_buff_rgb_new) {
+ hr = _tdav_producer_screencast_alloc_rgb_buff(p_self, ddsd.dwWidth, ddsd.dwHeight, ddsd.ddpfPixelFormat.dwRGBBitCount);
+ if (FAILED(hr)) {
+ p_self->p_surf_primary->Unlock(NULL); // unlock before going to bail
+ DDRAW_CHECK_HR(hr);
+ }
+ }
+ p_base->video.width = ddsd.dwWidth;
+ p_base->video.height = ddsd.dwHeight;
+ p_base->video.chroma = chroma_new;
+ p_self->n_buff_rgb_bitscount = ddsd.ddpfPixelFormat.dwRGBBitCount;
+ // Check if we can use built-in chroma conversion
+#if DDRAW_HAVE_RGB32_TO_I420_INTRIN || DDRAW_HAVE_RGB32_TO_I420_ASM
+ p_self->b_have_rgb32_conv =
+ _tdav_producer_screencast_have_ssse3() // SSSE3 supported
+ && DDRAW_IS_ALIGNED(p_base->video.width, DDRAW_MEM_ALIGNMENT) // width multiple of 16
+ /* && DDRAW_IS_ALIGNED(p_base->video.height, DDRAW_MEM_ALIGNMENT) // height multiple of 16 */
+ && p_base->video.chroma == tmedia_chroma_rgb32; // Primary screen RGB32
+ if (p_self->b_have_rgb32_conv) {
+ p_base->video.chroma = tmedia_chroma_yuv420p;
+ }
+#endif
+ DDRAW_DEBUG_INFO("RGB32 -> I420 conversion supported: %s", p_self->b_have_rgb32_conv ? "YES" : "NO");
+ // allocate YUV buffer
+ if (p_self->b_have_rgb32_conv) {
+ hr = _tdav_producer_screencast_alloc_yuv_buff(p_self, (DWORD)p_base->video.width, (DWORD)p_base->video.height);
+ if (FAILED(hr)) {
+ p_self->p_surf_primary->Unlock(NULL); // unlock before going to bail
+ DDRAW_CHECK_HR(hr);
+ }
+ }
+ // preview
+#if DDRAW_PREVIEW
+ p_self->bi_preview.bmiHeader.biWidth = ddsd.dwWidth;
+ p_self->bi_preview.bmiHeader.biHeight = ddsd.dwHeight;
+ p_self->bi_preview.bmiHeader.biBitCount = (WORD)ddsd.ddpfPixelFormat.dwRGBBitCount;
+ p_self->bi_preview.bmiHeader.biSizeImage = (p_self->bi_preview.bmiHeader.biWidth * p_self->bi_preview.bmiHeader.biHeight * (p_self->bi_preview.bmiHeader.biBitCount >> 3));
+#endif /* DDRAW_PREVIEW */
+ }
+ nRowLengthInBytes = ddsd.dwWidth * (ddsd.ddpfPixelFormat.dwRGBBitCount >> 3);
+ nSizeWithoutPadding = ddsd.dwHeight * nRowLengthInBytes;
+
+ // init lpBuffToSend
+ if (DDRAW_MEM_SURFACE_DIRECT_ACCESS && ddsd.lPitch == nRowLengthInBytes && (!p_self->b_have_rgb32_conv || DDRAW_IS_ALIGNED(ddsd.lpSurface, DDRAW_MEM_ALIGNMENT))) {
+ // no padding
+ lpBuffToSend = ddsd.lpSurface;
+ bDirectMemSurfAccess = TRUE;
+ }
+ else {
+ // with padding or copy requested
+ UINT8 *pSurfBuff = (UINT8 *)ddsd.lpSurface, *pNegBuff = (UINT8 *)p_self->p_buff_rgb_aligned;
+ DWORD y;
+ bDirectMemSurfAccess = FALSE;
+ //--timeStart = tsk_time_now();
+ if (ddsd.lPitch == nRowLengthInBytes) {
+ // copy without padding padding
+ const UINT8* src = pSurfBuff;
+ UINT8* dst = (UINT8*)p_self->p_buff_rgb_aligned;
+ if (DDRAW_IS_ALIGNED(src, 16) && (nSizeWithoutPadding & 15) == 0) {
+#if DDRAW_HAVE_RGB32_TO_I420_INTRIN || DDRAW_HAVE_RGB32_TO_I420_ASM
+ if ((nSizeWithoutPadding & 127) == 0) {
+ for (DWORD i = 0; i < nSizeWithoutPadding; i += 128, src += 128, dst += 128) {
+#if defined(DDRAW_COPY128_ASM)
+ DDRAW_COPY128_ASM(dst, src);
+#else
+ DDRAW_COPY128_INTRIN(dst, src);
+#endif /* DDRAW_COPY128_ASM */
+ }
+ }
+ else if((nSizeWithoutPadding & 63) == 0) {
+ for (DWORD i = 0; i < nSizeWithoutPadding; i += 64, src += 64, dst += 64) {
+#if defined(DDRAW_COPY64_ASM)
+ DDRAW_COPY64_ASM(dst, src);
+#else
+ DDRAW_COPY64_INTRIN(dst, src);
+#endif /* DDRAW_COPY64_ASM */
+ }
+ }
+ else { // (nSizeWithoutPadding & 15) == 0
+ for (DWORD i = 0; i < nSizeWithoutPadding; i += 16, src += 16, dst += 16) {
+#if defined(DDRAW_COPY16_ASM)
+ DDRAW_COPY16_ASM(dst, src);
+#else
+ DDRAW_COPY16_INTRIN(dst, src);
+#endif /* DDRAW_COPY16_ASM */
+ }
+ }
+#else // neither ASM nor INTRINSIC support
+ CopyMemory(dst, src, nSizeWithoutPadding);
+#endif /* DDRAW_HAVE_RGB32_TO_I420_INTRIN || DDRAW_HAVE_RGB32_TO_I420_ASM */
+ }
+ else { // not 16bytes aligned
+ CopyMemory(dst, src, nSizeWithoutPadding);
+ }
+ }
+ else {
+ // copy with padding padding
+ for (y = 0; y < ddsd.dwHeight; ++y) {
+ CopyMemory(pNegBuff, pSurfBuff, nRowLengthInBytes);
+ pSurfBuff += ddsd.lPitch;
+ pNegBuff += nRowLengthInBytes;
+ }
+ }
+ lpBuffToSend = p_self->p_buff_rgb_aligned;
+ //--timeEnd = tsk_time_now();
+ //--DDRAW_DEBUG_INFO("Mem copy: start=%llu, end=%llu, duration=%llu", timeStart, timeEnd, (timeEnd - timeStart));
+ }
+ if (!bDirectMemSurfAccess) {
+ // surface buffer no longer needed, unlock
+ DDRAW_CHECK_HR(hr = p_self->p_surf_primary->Unlock(NULL));
+ }
+ // display preview
+#if DDRAW_PREVIEW
+ if (p_self->hwnd_preview) {
+ HWND hWnd; // copy for thread-safeness
+ HDC hDC = GetDC((hWnd = p_self->hwnd_preview));
+ if (hDC) {
+ RECT rcPreview;
+ if (GetWindowRect(hWnd, &rcPreview)) {
+ LONG nPreviewWidth = (rcPreview.right - rcPreview.left);
+ LONG nPreviewHeight = (rcPreview.bottom - rcPreview.top);
+ StretchDIBits(
+ hDC,
+ 0, 0, nPreviewWidth, nPreviewHeight,
+ 0, 0, p_self->bi_preview.bmiHeader.biWidth, p_self->bi_preview.bmiHeader.biHeight,
+ lpBuffToSend,
+ &p_self->bi_preview,
+ DIB_RGB_COLORS,
+ SRCCOPY);
+ }
+ ReleaseDC(hWnd, hDC);
+ }
+ }
+#endif /* DDRAW_PREVIEW */
+
+ // check we have a free buffer
+#if DDRAW_MT
+ {
+ for (INT iIndex = 0; iIndex < DDRAW_MT_COUNT; ++iIndex) {
+ if (p_self->mt.b_flags_array[iIndex] != TRUE) {
+ iMtFreeBuffIndex = iIndex;
+ lpBuffYUV = p_self->mt.p_buff_yuv_aligned_array[iIndex];
+ break;
+ }
+ }
+ if (iMtFreeBuffIndex < 0) {
+ lpBuffToSend = NULL; // do not waste time converting or encoding
+ lpBuffYUV = NULL;
+ }
+ }
+#else
+ lpBuffYUV = p_self->p_buff_yuv_aligned;
+#endif /* DDRAW_MT */
+
+ //--timeStart = tsk_time_now();
+ if (lpBuffToSend && (lpBuffYUV || !p_self->b_have_rgb32_conv)) {
+ if (p_self->b_have_rgb32_conv) {
+ // Convert from RGB32 to I420
+#if DDRAW_HAVE_RGB32_TO_I420_ASM
+ _tdav_producer_screencast_rgb32_to_yuv420_asm_ssse3((uint8_t*)lpBuffYUV, (const uint8_t*)lpBuffToSend, (int)p_base->video.width, (int)p_base->video.height);
+#elif DDRAW_HAVE_RGB32_TO_I420_INTRIN
+ _tdav_producer_screencast_rgb32_to_yuv420_intrin_ssse3((uint8_t*)lpBuffYUV, (const uint8_t*)lpBuffToSend, (int)p_base->video.width, (int)p_base->video.height);
+#else
+ DDRAW_CHECK_HR(hr = E_NOTIMPL); // never called
+#endif
+#if DDRAW_MT
+ p_self->mt.b_flags_array[iMtFreeBuffIndex] = TRUE;
+ if (!SetEvent(p_self->mt.h_events[iMtFreeBuffIndex])) {
+ DDRAW_CHECK_HR(hr = E_FAIL);
+ }
+#else
+ p_base->enc_cb.callback(p_base->enc_cb.callback_data, lpBuffYUV, p_self->n_buff_yuv);
+#endif
+ }
+ else {
+ // Send RGB32 buffer to the encode callback and let conversion be done by libyuv
+ // do not multi-thread as we cannot perform chroma conversion and encoding in parallel
+ p_base->enc_cb.callback(p_base->enc_cb.callback_data, lpBuffToSend, nSizeWithoutPadding);
+ }
+ }
+ //--timeEnd = tsk_time_now();
+ //--DDRAW_DEBUG_INFO("Encode callback: start=%llu, end=%llu, duration=%llu", timeStart, timeEnd, (timeEnd - timeStart));
+
+ if (bDirectMemSurfAccess) {
+ // surface buffer was used in preview and encode callback, unlock now
+ DDRAW_CHECK_HR(hr = p_self->p_surf_primary->Unlock(NULL));
+ }
+
+bail:
+ if (hr == DDERR_SURFACELOST) {
+ /*hr = */p_self->p_surf_primary->Restore();
+ hr = S_OK;
+ }
+
+ //--timeEnd = tsk_time_now();
+ //--DDRAW_DEBUG_INFO("Grab and encode duration=%llu", (timeEnd - timeStart));
+
+ return SUCCEEDED(hr) ? 0 : -1;
+}
+
+static tmedia_chroma_t _tdav_producer_screencast_get_chroma(const DDPIXELFORMAT* pixelFormat)
+{
+ HRESULT hr = DD_OK;
+ if (pixelFormat->dwFlags != DDPF_RGB) {
+ DDRAW_DEBUG_ERROR("dwFlags(%d) != DDPF_RGB", pixelFormat->dwFlags);
+ DDRAW_CHECK_HR(hr = DDERR_INVALIDCAPS);
+ }
+ switch (pixelFormat->dwRGBBitCount) {
+ case 32: // RGB32
+ case 24: // RGB24
+ // pixels must be aligned for fast copy
+ if (pixelFormat->dwRBitMask != 0xff0000 || pixelFormat->dwGBitMask != 0xff00 || pixelFormat->dwBBitMask != 0xff || pixelFormat->dwRGBAlphaBitMask != 0) {
+ DDRAW_DEBUG_ERROR("Pixels not aligned");
+ }
+ return pixelFormat->dwRGBBitCount == 24 ? tmedia_chroma_bgr24 : tmedia_chroma_rgb32;
+ case 16: // RGB565
+ // pixels must be aligned for fast copy
+ if (pixelFormat->dwRBitMask != 0xF800 || pixelFormat->dwGBitMask != 0x7E0 || pixelFormat->dwBBitMask != 0x1F) {
+ DDRAW_DEBUG_ERROR("Pixels not aligned");
+ }
+ return tmedia_chroma_rgb565le;
+ default:
+ DDRAW_DEBUG_ERROR("dwRGBBitCount(%d) != 24 and 32", pixelFormat->dwRGBBitCount);
+ DDRAW_CHECK_HR(hr = DDERR_INVALIDCAPS);
+ break;
+ }
+
+bail:
+ return tmedia_chroma_none;
+}
+
+static HRESULT _tdav_producer_screencast_create_module(LPDDrawModule lpModule)
+{
+ typedef HRESULT (WINAPI *pDirectDrawCreateFunc)(_In_ GUID FAR *lpGUID,
+ _Out_ LPDIRECTDRAW FAR *lplpDD,
+ _In_ IUnknown FAR *pUnkOuter);
+ HRESULT hr = S_OK;
+ pDirectDrawCreateFunc DirectDrawCreate_ = NULL;
+
+ if (!lpModule) {
+ DDRAW_CHECK_HR(hr = E_INVALIDARG);
+ }
+
+ if (!lpModule->hDLL && !(lpModule->hDLL = LoadLibrary(TEXT("ddraw.dll")))) {
+ DDRAW_DEBUG_ERROR("Failed to load ddraw.dll: %d", GetLastError());
+ DDRAW_CHECK_HR(hr = E_FAIL);
+ }
+ if (!lpModule->lpDD) {
+ // Hum, "GetProcAddressA" is missing but ""GetProcAddressW" exists on CE
+#if TDAV_UNDER_WINDOWS_CE
+# define DirectDrawCreateName TEXT("DirectDrawCreate")
+#else
+# define DirectDrawCreateName "DirectDrawCreate"
+#endif
+ if (!(DirectDrawCreate_ = (pDirectDrawCreateFunc)GetProcAddress(lpModule->hDLL, DirectDrawCreateName))) {
+ DDRAW_DEBUG_ERROR("Failed to find DirectDrawCreate in ddraw.dll: %d", GetLastError());
+ DDRAW_CHECK_HR(hr = E_FAIL);
+ }
+ DDRAW_CHECK_HR(hr = DirectDrawCreate_(NULL, &lpModule->lpDD, NULL));
+ }
+
+bail:
+ return hr;
+}
+
+static HRESULT _tdav_producer_screencast_alloc_rgb_buff(tdav_producer_screencast_ddraw_t* p_ddraw, DWORD w, DWORD h, DWORD bitsCount)
+{
+ HRESULT hr = S_OK;
+ DWORD n_buff_rgb_new = (w * h * (bitsCount >> 3));
+
+ if (p_ddraw->n_buff_rgb < n_buff_rgb_new) {
+ p_ddraw->p_buff_rgb_aligned = tsk_realloc_aligned(p_ddraw->p_buff_rgb_aligned, n_buff_rgb_new, DDRAW_MEM_ALIGNMENT);
+ if (!p_ddraw->p_buff_rgb_aligned) {
+ p_ddraw->n_buff_rgb = 0;
+ DDRAW_CHECK_HR(hr = DDERR_OUTOFMEMORY);
+ }
+ p_ddraw->n_buff_rgb = n_buff_rgb_new;
+ }
+
+bail:
+ return hr;
+}
+
+static HRESULT _tdav_producer_screencast_alloc_yuv_buff(tdav_producer_screencast_ddraw_t* p_ddraw, DWORD w, DWORD h)
+{
+ HRESULT hr = S_OK;
+ void** pp_buff_yuv_aligned;
+ int n_buff_yuv_aligned_count;
+
+#if DDRAW_MT
+ pp_buff_yuv_aligned = p_ddraw->mt.p_buff_yuv_aligned_array;
+ n_buff_yuv_aligned_count = sizeof(p_ddraw->mt.p_buff_yuv_aligned_array)/sizeof(p_ddraw->mt.p_buff_yuv_aligned_array[0]);
+#else
+ pp_buff_yuv_aligned = &p_ddraw->p_buff_yuv_aligned;
+ n_buff_yuv_aligned_count = 1;
+#endif /* DDRAW_MT */
+
+ p_ddraw->n_buff_yuv = (w * h * 3) >> 1;
+ for (int i = 0; i < n_buff_yuv_aligned_count; ++i) {
+ pp_buff_yuv_aligned[i] = tsk_realloc_aligned(pp_buff_yuv_aligned[i], p_ddraw->n_buff_yuv, DDRAW_MEM_ALIGNMENT);
+ if (!pp_buff_yuv_aligned[i]) {
+ p_ddraw->n_buff_yuv = 0;
+ DDRAW_CHECK_HR(hr = DDERR_OUTOFMEMORY);
+ }
+ }
+
+bail:
+ return hr;
+}
+
+static void* TSK_STDCALL _tdav_producer_screencast_grap_thread(void *arg)
+{
+ tdav_producer_screencast_ddraw_t* p_ddraw = (tdav_producer_screencast_ddraw_t*)arg;
+ tmedia_producer_t* p_base = TMEDIA_PRODUCER(arg);
+ int ret = 0;
+
+ // FPS manager
+ uint64_t TimeNow, TimeLastFrame = 0;
+ uint64_t TimeFrameDuration = (1000 / p_base->video.fps);
+
+ DDRAW_DEBUG_INFO("Grab thread -- START");
+
+ while (ret == 0 && p_ddraw->b_started) {
+#if DDRAW_CPU_THROTTLING
+ TimeFrameDuration = (1000 / p_ddraw->cpu.fps_target);
+#endif /* DDRAW_CPU_THROTTLING */
+ TimeNow = tsk_time_now();
+ if ((TimeNow - TimeLastFrame) > TimeFrameDuration) {
+ if (!p_ddraw->b_muted && !p_ddraw->b_paused) {
+ if (ret = _tdav_producer_screencast_grab(p_ddraw)) {
+ goto next;
+ }
+ }
+ TimeLastFrame = TimeNow;
+ }
+ else {
+ tsk_thread_sleep(1);
+#if 0
+ DDRAW_DEBUG_INFO("Skip frame");
+#endif
+ }
+ next:
+ ;
+ }
+ DDRAW_DEBUG_INFO("Grab thread -- STOP");
+ return tsk_null;
+}
+
+#if DDRAW_MT
+static void* TSK_STDCALL _tdav_producer_screencast_mt_encode_thread(void *arg)
+{
+ tdav_producer_screencast_ddraw_t* p_ddraw = (tdav_producer_screencast_ddraw_t*)arg;
+ tmedia_producer_t* p_base = TMEDIA_PRODUCER(arg);
+ DWORD dwEvent, dwIndex;
+ int ret = 0;
+ DWORD events_count = sizeof(p_ddraw->mt.h_events) / sizeof(p_ddraw->mt.h_events[0]);
+
+ DDRAW_DEBUG_INFO("Encode MT thread -- START");
+
+ while (ret == 0 && p_ddraw->b_started) {
+ dwEvent = WaitForMultipleObjects(events_count, p_ddraw->mt.h_events, FALSE, INFINITE);
+ if (!p_ddraw->b_started) {
+ break;
+ }
+ if (dwEvent < WAIT_OBJECT_0 || dwEvent >(WAIT_OBJECT_0 + events_count)) {
+ DDRAW_DEBUG_ERROR("Invalid dwEvent(%d)", dwEvent);
+ break;
+ }
+ dwIndex = (dwEvent - WAIT_OBJECT_0);
+ if (p_ddraw->mt.b_flags_array[dwIndex] != TRUE) {
+ // must never happen
+ DDRAW_DEBUG_ERROR("Invalid b_flags_array(%d)", dwIndex);
+ break;
+ }
+
+ p_base->enc_cb.callback(p_base->enc_cb.callback_data, p_ddraw->mt.p_buff_yuv_aligned_array[dwIndex], p_ddraw->n_buff_yuv);
+ p_ddraw->mt.b_flags_array[dwIndex] = FALSE;
+ }
+ DDRAW_DEBUG_INFO("Encode MT -- STOP");
+ return tsk_null;
+}
+#endif /* DDRAW_MT */
+
+#if DDRAW_CPU_MONITOR || DDRAW_CPU_THROTTLING
+static unsigned long long FileTimeToInt64(const FILETIME & ft)
+{
+ return (((unsigned long long)(ft.dwHighDateTime))<<32) | ((unsigned long long)ft.dwLowDateTime);
+}
+static BOOL GetCpuPercents(unsigned long long* PercentIdle, unsigned long long* PercentUsage)
+{
+ static unsigned long long _prevTicks = 0;
+ static unsigned long long _prevIdleTime = 0;
+ unsigned long long ticks, idleTime;
+ BOOL bSaveValues = FALSE, bSet = FALSE;
+#if TDAV_UNDER_WINDOWS_CE
+ bSaveValues = TRUE;
+ ticks = GetTickCount();
+ idleTime = GetIdleTime();
+#else
+ {
+ FILETIME _idleTime, _kernelTime, _userTime;
+ if (GetSystemTimes(&_idleTime, &_kernelTime, &_userTime)) {
+ idleTime = FileTimeToInt64(_idleTime);
+ ticks = FileTimeToInt64(_kernelTime) + FileTimeToInt64(_userTime);
+ bSaveValues = TRUE;
+ }
+ }
+#endif
+ if (_prevTicks > 0) {
+ *PercentIdle = ((100 * (idleTime - _prevIdleTime)) / (ticks - _prevTicks));
+ *PercentUsage = 100 - *PercentIdle;
+ bSet = TRUE;
+ }
+ if (bSaveValues) {
+ _prevTicks = ticks;
+ _prevIdleTime = idleTime;
+ }
+
+ return bSet;
+}
+
+static int _tdav_producer_screencast_timer_cb(const void* arg, tsk_timer_id_t timer_id)
+{
+ tdav_producer_screencast_ddraw_t* p_ddraw = (tdav_producer_screencast_ddraw_t*)arg;
+ int ret = 0;
+
+ if (!p_ddraw->b_started) {
+ return 0;
+ }
+
+ if (p_ddraw->cpu.id_timer == timer_id) {
+ unsigned long long PercentIdle, PercentUsage;
+ if (GetCpuPercents(&PercentIdle, &PercentUsage) == TRUE) {
+ TSK_DEBUG_INFO("\n\n****\n\nCPU Usage = %lld\n\n***", PercentUsage);
+#if DDRAW_CPU_THROTTLING
+ {
+ if ((PercentUsage + DDRAW_CPU_THROTTLING_THRESHOLD_MARGIN) > DDRAW_CPU_THROTTLING_THRESHOLD) {
+ unsigned long long NewTargetPercentUsage = TSK_CLAMP(DDRAW_CPU_THROTTLING_THRESHOLD_MARGIN, DDRAW_CPU_THROTTLING_THRESHOLD - DDRAW_CPU_THROTTLING_THRESHOLD_MARGIN, INT_MAX);
+ int NewTargetFps = (int)((NewTargetPercentUsage * p_ddraw->cpu.fps_target) / PercentUsage);
+ NewTargetFps = TSK_CLAMP(DDRAW_CPU_THROTTLING_FPS_MIN, NewTargetFps, TMEDIA_PRODUCER(p_ddraw)->video.fps);
+ TSK_DEBUG_INFO("\n\n****\n\nCPU throttling = (%lld+%d)>%d, NewTargetPercentUsage=%lld, NewTargetFps=%d\n\n***",
+ PercentUsage, DDRAW_CPU_THROTTLING_THRESHOLD_MARGIN, DDRAW_CPU_THROTTLING_THRESHOLD, NewTargetPercentUsage, NewTargetFps);
+ p_ddraw->cpu.fps_target = NewTargetFps;
+ }
+ else if (PercentUsage < DDRAW_CPU_THROTTLING_THRESHOLD) {
+ if ((p_ddraw->cpu.fps_target + DDRAW_CPU_THROTTLING_THRESHOLD_MARGIN) < TMEDIA_PRODUCER(p_ddraw)->video.fps) { // not honoring the negotiated fps yet?
+ p_ddraw->cpu.fps_target += 1; // TODO: this is ok only if the timer timeout is set to 1s or less
+ }
+ }
+ }
+#endif /* DDRAW_CPU_THROTTLING */
+ }
+
+ if (p_ddraw->b_started) {
+ p_ddraw->cpu.id_timer = tsk_timer_manager_schedule(p_ddraw->p_timer_mgr, DDRAW_CPU_SCHEDULE_TIMEOUT, _tdav_producer_screencast_timer_cb, p_ddraw);
+ }
+ }
+ return 0;
+}
+
+#endif /* DDRAW_CPU_MONITOR || DDRAW_CPU_THROTTLING */
+
+//
+// ddraw screencast producer object definition
+//
+/* constructor */
+static tsk_object_t* _tdav_producer_screencast_ddraw_ctor(tsk_object_t *self, va_list * app)
+{
+ tdav_producer_screencast_ddraw_t *p_ddraw = (tdav_producer_screencast_ddraw_t *)self;
+ if (p_ddraw) {
+ /* init base */
+ tmedia_producer_init(TMEDIA_PRODUCER(p_ddraw));
+ TMEDIA_PRODUCER(p_ddraw)->video.chroma = tmedia_chroma_bgr24; // RGB24 on x86 (little endians) stored as BGR24
+ /* init self with default values*/
+ TMEDIA_PRODUCER(p_ddraw)->video.fps = 15;
+ TMEDIA_PRODUCER(p_ddraw)->video.width = 352;
+ TMEDIA_PRODUCER(p_ddraw)->video.height = 288;
+
+ tsk_safeobj_init(p_ddraw);
+ }
+ return self;
+}
+/* destructor */
+static tsk_object_t* _tdav_producer_screencast_ddraw_dtor(tsk_object_t * self)
+{
+ tdav_producer_screencast_ddraw_t *p_ddraw = (tdav_producer_screencast_ddraw_t *)self;
+ if (p_ddraw) {
+ /* stop */
+ if (p_ddraw->b_started) {
+ _tdav_producer_screencast_ddraw_stop((tmedia_producer_t*)p_ddraw);
+ }
+
+ /* deinit base */
+ tmedia_producer_deinit(TMEDIA_PRODUCER(p_ddraw));
+ /* deinit self */
+#if DDRAW_CPU_MONITOR || DDRAW_CPU_THROTTLING
+ if (p_ddraw->p_timer_mgr) {
+ tsk_timer_manager_destroy(&p_ddraw->p_timer_mgr);
+ }
+#endif /* DDRAW_CPU_MONITOR || DDRAW_CPU_THROTTLING */
+#if DDRAW_MT
+ for (int i = 0; i < sizeof(p_ddraw->mt.p_buff_yuv_aligned_array) / sizeof(p_ddraw->mt.p_buff_yuv_aligned_array[0]); ++i) {
+ TSK_FREE_ALIGNED(p_ddraw->mt.p_buff_yuv_aligned_array[i]);
+ }
+ for (int i = 0; i < sizeof(p_ddraw->mt.h_events) / sizeof(p_ddraw->mt.h_events[0]); ++i) {
+ if (p_ddraw->mt.h_events[i]) {
+ CloseHandle(p_ddraw->mt.h_events[i]);
+ p_ddraw->mt.h_events[i] = NULL;
+ }
+ }
+#endif /* DDRAW_MT */
+ TSK_FREE_ALIGNED(p_ddraw->p_buff_rgb_aligned);
+ TSK_FREE_ALIGNED(p_ddraw->p_buff_yuv_aligned);
+ DDRAW_SAFE_RELEASE(&p_ddraw->p_surf_primary);
+ DDrawModuleSafeFree(p_ddraw->ddrawModule);
+ tsk_safeobj_deinit(p_ddraw);
+
+ DDRAW_DEBUG_INFO("*** destroyed ***");
+ }
+
+ return self;
+}
+/* object definition */
+static const tsk_object_def_t tdav_producer_screencast_ddraw_def_s =
+{
+ sizeof(tdav_producer_screencast_ddraw_t),
+ _tdav_producer_screencast_ddraw_ctor,
+ _tdav_producer_screencast_ddraw_dtor,
+ tsk_null,
+};
+/* plugin definition*/
+static const tmedia_producer_plugin_def_t tdav_producer_screencast_ddraw_plugin_def_s =
+{
+ &tdav_producer_screencast_ddraw_def_s,
+ tmedia_bfcp_video,
+ "Microsoft DirectDraw screencast producer",
+
+ _tdav_producer_screencast_ddraw_set,
+ _tdav_producer_screencast_ddraw_prepare,
+ _tdav_producer_screencast_ddraw_start,
+ _tdav_producer_screencast_ddraw_pause,
+ _tdav_producer_screencast_ddraw_stop
+};
+const tmedia_producer_plugin_def_t *tdav_producer_screencast_ddraw_plugin_def_t = &tdav_producer_screencast_ddraw_plugin_def_s;
+
+#endif /* TDAV_UNDER_WINDOWS && !TDAV_UNDER_WINDOWS_RT */
OpenPOWER on IntegriCloud