summaryrefslogtreecommitdiffstats
path: root/defs.h
diff options
context:
space:
mode:
Diffstat (limited to 'defs.h')
-rwxr-xr-xdefs.h147
1 files changed, 147 insertions, 0 deletions
diff --git a/defs.h b/defs.h
new file mode 100755
index 0000000..176dbd1
--- /dev/null
+++ b/defs.h
@@ -0,0 +1,147 @@
+/*============================================================================
+ bandwidth, a benchmark to estimate memory transfer bandwidth.
+ Copyright (C) 2005-2014 by Zack T Smith.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+ The author may be reached at 1@zsmith.co.
+ *===========================================================================*/
+
+//---------------------------------------------------------------------------
+// Change log
+// 0.18 Grand unified version supports x86/intel64/arm, linux/win32/winmo.
+// 0.19 Now have 128-bit writer that goes to cache AND one that bypasses.
+// 0.20 Added my bmplib and graphing of output. Also added --slow option.
+// 0.21 Adds random testing. Min chunk size = 256 B. Allows non-2^n chunks.
+// 0.22 Adds register-to-register and register-to/from-stack transfers.
+// 0.23 Adds vector-to-vector and register-to-vector transfers, & Mac support.
+// 0.24 Adds network bandwidth tests from this PC to specified others.
+// 0.25 Made network tests bidirectional to test asymmetric networks.
+// 0.26 Fixes to prevent certain vector instructions being used w/AMD chips.
+// 0.27 Added 128-byte tests for greater precision.
+// 0.28 Added use of CPUID.
+// 0.29 Added more 128-byte tests.
+// 0.30 Adds cache identification for Intel CPUs in 64-bit mode.
+// 0.31 Adds cache identification for Intel CPUs in 32-bit mode.
+// 0.32 Added AVX support.
+// 1.0 Moved graphing logic into BMPGraphing. Added LODS support.
+// 1.1 Switched to larger font in graphing module.
+//---------------------------------------------------------------------------
+
+#ifndef _DEFS_H
+#define _DEFS_H
+
+#define RELEASE "1.1"
+
+#ifndef bool
+typedef char bool;
+enum { true = 1, false = 0 };
+#endif
+
+#define NETWORK_DEFAULT_PORTNUM (49000)
+#define NETSIZE_MIN (15)
+#define NETSIZE_MAX (28)
+#define NETWORK_CHUNK_SIZE (1<<NETSIZE_MIN)
+
+#define DOING_LODS // lodsq and lodsd
+
+extern int Reader (void *ptr, unsigned long size, unsigned long loops);
+
+extern int ReaderLODSQ (void *ptr, unsigned long size, unsigned long loops);
+extern int ReaderLODSD (void *ptr, unsigned long size, unsigned long loops);
+extern int ReaderLODSW (void *ptr, unsigned long size, unsigned long loops);
+extern int ReaderLODSB (void *ptr, unsigned long size, unsigned long loops);
+
+extern int Reader_128bytes (void *ptr, unsigned long size, unsigned long loops);
+extern int RandomReader (void *ptr, unsigned long n_chunks, unsigned long loops);
+
+extern int Writer (void *ptr, unsigned long size, unsigned long loops, unsigned long value);
+extern int Writer_128bytes (void *ptr, unsigned long size, unsigned long loops, unsigned long value);
+extern int RandomWriter (void *ptr, unsigned long size, unsigned long loops, unsigned long value);
+
+extern int RegisterToRegister (unsigned long);
+
+extern int StackReader (unsigned long);
+extern int StackWriter (unsigned long);
+
+extern int RegisterToVector (unsigned long); // SSE2
+extern int Register8ToVector (unsigned long); // SSE2
+extern int Register16ToVector (unsigned long); // SSE2
+extern int Register32ToVector (unsigned long); // SSE2
+extern int Register64ToVector (unsigned long); // SSE2
+
+extern int VectorToVector (unsigned long); // SSE2
+
+extern int VectorToVectorAVX (unsigned long);
+
+extern int VectorToRegister (unsigned long); // SSE2
+extern int Vector8ToRegister (unsigned long); // SSE2
+extern int Vector16ToRegister (unsigned long); // SSE2
+extern int Vector32ToRegister (unsigned long); // SSE2
+extern int Vector64ToRegister (unsigned long); // SSE2
+
+extern int Copy (void*, void*, unsigned long, unsigned long);
+extern int CopySSE (void*, void*, unsigned long, unsigned long);
+extern int CopyAVX (void*, void*, unsigned long, unsigned long);
+extern int CopySSE_128bytes (void*, void*, unsigned long, unsigned long);
+
+extern int ReaderAVX (void *ptr, unsigned long, unsigned long);
+extern int ReaderSSE2 (void *ptr, unsigned long, unsigned long);
+extern int ReaderSSE2_bypass (void *ptr, unsigned long, unsigned long);
+extern int RandomReaderSSE2 (unsigned long **ptr, unsigned long, unsigned long);
+extern int RandomReaderSSE2_bypass (unsigned long **ptr, unsigned long, unsigned long);
+
+extern int WriterAVX (void *ptr, unsigned long, unsigned long, unsigned long);
+extern int WriterSSE2 (void *ptr, unsigned long, unsigned long, unsigned long);
+extern int RandomWriterSSE2(unsigned long **ptr, unsigned long, unsigned long, unsigned long);
+
+extern int ReaderSSE2_128bytes(void *ptr, unsigned long, unsigned long);
+extern int WriterSSE2_128bytes(void *ptr, unsigned long, unsigned long, unsigned long);
+
+extern int ReaderSSE2_128bytes_bypass (void *ptr, unsigned long, unsigned long);
+extern int WriterSSE2_128bytes_bypass (void *ptr, unsigned long, unsigned long, unsigned long);
+
+extern int WriterAVX_bypass (void *ptr, unsigned long, unsigned long, unsigned long);
+extern int WriterSSE2_bypass (void *ptr, unsigned long, unsigned long, unsigned long);
+extern int RandomWriterSSE2_bypass (unsigned long **ptr, unsigned long, unsigned long, unsigned long);
+
+extern void get_cpuid_family (char *family_return);
+extern void get_cpuid_cache_info (uint32_t *array, int index);
+extern unsigned get_cpuid1_ecx ();
+extern unsigned get_cpuid1_edx ();
+extern unsigned get_cpuid7_ebx ();
+extern unsigned get_cpuid_80000001_ecx ();
+extern unsigned get_cpuid_80000001_edx ();
+
+#define CPUID_EDX_MMX (1<<23)
+#define CPUID_EDX_SSE (1<<25)
+#define CPUID_EDX_SSE2 (1<<26)
+#define CPUID_EDX_INTEL64 (1<<29) // "Long Mode" on AMD.
+#define CPUID_EDX_XD (1<<20)
+#define CPUID_ECX_SSE3 (1)
+#define CPUID_ECX_SSSE3 (1<<9)
+#define CPUID_ECX_SSE4A (1<<6)
+#define CPUID_ECX_SSE41 (1<<19)
+#define CPUID_ECX_SSE42 (1<<20)
+#define CPUID_ECX_AES (1<<25) // Encryption.
+#define CPUID_ECX_AVX (1<<28) // 256-bit YMM registers.
+#define CPUID_EBX_AVX2 (0x20)
+
+#define FBLOOPS_R 400
+#define FBLOOPS_W 800
+#define FB_SIZE (640*480*2)
+
+#endif
+
OpenPOWER on IntegriCloud