summaryrefslogtreecommitdiffstats
path: root/sys/amd64
diff options
context:
space:
mode:
authorpeter <peter@FreeBSD.org>1999-08-19 14:54:40 +0000
committerpeter <peter@FreeBSD.org>1999-08-19 14:54:40 +0000
commit8ffe59d0bfda567fe8d56ea92c83e8b9aa261da8 (patch)
tree6cb3778e79a4a5b9d129b9fcea4085c1e041a1d5 /sys/amd64
parentd4c0c0bd4aa267c169eeebee5cb975271791f325 (diff)
downloadFreeBSD-src-8ffe59d0bfda567fe8d56ea92c83e8b9aa261da8.zip
FreeBSD-src-8ffe59d0bfda567fe8d56ea92c83e8b9aa261da8.tar.gz
Undo my previous commit and do it differently. Break the ffs() etc macros
into two parts - one to do the bsfl and the other to convert the result (base 0) to ffs()-like (base 1) in inline C. This enables the optimizer to be a lot smarter in certain cases, like where it knows that the argument is non-zero and we want ffs(known non zero arg) - 1. This appears to produce identical code to the old inline when the argument is unknown.
Diffstat (limited to 'sys/amd64')
-rw-r--r--sys/amd64/include/cpufunc.h41
1 files changed, 27 insertions, 14 deletions
diff --git a/sys/amd64/include/cpufunc.h b/sys/amd64/include/cpufunc.h
index 60928b8..9c72d0d 100644
--- a/sys/amd64/include/cpufunc.h
+++ b/sys/amd64/include/cpufunc.h
@@ -30,7 +30,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $Id: cpufunc.h,v 1.88 1999/07/23 23:45:19 alc Exp $
+ * $Id: cpufunc.h,v 1.89 1999/08/19 00:32:48 peter Exp $
*/
/*
@@ -82,40 +82,53 @@ enable_intr(void)
__asm __volatile("sti");
}
-#define HAVE_INLINE_FFS
-#if __GNUC__ == 2 && __GNUC_MINOR__ > 8
-#define ffs(mask) __builtin_ffs(mask)
-#else
+#define HAVE_INLINE__BSFL
+
static __inline int
-ffs(int mask)
+__bsfl(int mask)
{
int result;
+
/*
* bsfl turns out to be not all that slow on 486's. It can beaten
* using a binary search to reduce to 4 bits and then a table lookup,
* but only if the code is inlined and in the cache, and the code
* is quite large so inlining it probably busts the cache.
- *
+ */
+ __asm __volatile("bsfl %0,%0" : "=r" (result) : "0" (mask));
+ return (result);
+}
+
+#define HAVE_INLINE_FFS
+
+static __inline int
+ffs(int mask)
+{
+ /*
* Note that gcc-2's builtin ffs would be used if we didn't declare
* this inline or turn off the builtin. The builtin is faster but
* broken in gcc-2.4.5 and slower but working in gcc-2.5 and 2.6.
*/
- __asm __volatile("testl %0,%0; je 1f; bsfl %0,%0; incl %0; 1:"
- : "=r" (result) : "0" (mask));
+ return mask == 0 ? mask : __bsfl(mask) + 1;
+}
+
+#define HAVE_INLINE__BSRL
+
+static __inline int
+__bsrl(int mask)
+{
+ int result;
+ __asm __volatile("bsrl %0,%0" : "=r" (result) : "0" (mask));
return (result);
}
-#endif
#define HAVE_INLINE_FLS
static __inline int
fls(int mask)
{
- int result;
- __asm __volatile("testl %0,%0; je 1f; bsrl %0,%0; incl %0; 1:"
- : "=r" (result) : "0" (mask));
- return (result);
+ return mask == 0 ? mask : __bsrl(mask) + 1;
}
#if __GNUC__ < 2
OpenPOWER on IntegriCloud