summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-xbuild_iphone.sh (renamed from iphone.sh)14
-rwxr-xr-xconfigure1
-rw-r--r--configure.ac1
-rw-r--r--src/macros.h12
-rw-r--r--src/neon.s58
5 files changed, 71 insertions, 15 deletions
diff --git a/iphone.sh b/build_iphone.sh
index c970c92..70d7845 100755
--- a/iphone.sh
+++ b/build_iphone.sh
@@ -1,8 +1,18 @@
#/bin/sh
+INSTALL_DIR="`pwd`/build"
+
export SDKVER="6.0"
export DEVROOT="/Applications/Xcode.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer"
export SDKROOT="$DEVROOT/SDKs/iPhoneOS$SDKVER.sdk"
-export CFLAGS="-O3 -Wreturn-type -Wparentheses -Wswitch -Wno-unused-parameter -Wno-unused-variable -Wunused-value -Wno-shorten-64-to-32 -Wno-trigraphs -fpascal-strings -miphoneos-version-min=5.0 -mcpu=cortex-a9 -arch armv7 -mfpu=neon -pipe -isysroot $SDKROOT -isystem $SDKROOT/usr/include -isystem $DEVROOT/usr/include -std=c11 -mno-thumb"
+export CFLAGS="-O3 -Wreturn-type -Wparentheses -Wswitch -Wno-unused-parameter -Wno-unused-variable -Wunused-value -Wno-shorten-64-to-32 -Wno-trigraphs -fpascal-strings -miphoneos-version-min=5.0 -mcpu=cortex-a9 -arch armv7 -mfpu=neon -pipe -isysroot $SDKROOT -isystem $SDKROOT/usr/include -isystem $DEVROOT/usr/include -std=c99 -mno-thumb -no-integrated-as"
export AR="$DEVROOT/usr/bin/ar"
-#export CC="$DEVROOT/usr/bin/clang"
+export CC="clang"
+
+
+mkdir -p $INSTALL_DIR
+./configure --enable-neon --build=i386-apple-darwin10.8.0 --host=arm-eabi --prefix=$INSTALL_DIR
+
+make
+make install
+
diff --git a/configure b/configure
index f985f66..2708754 100755
--- a/configure
+++ b/configure
@@ -15999,6 +15999,7 @@ $as_echo "no, using $LN_S" >&6; }
fi
+
# Checks for library functions.
#AC_FUNC_MALLOC
for ac_func in gettimeofday pow
diff --git a/configure.ac b/configure.ac
index dfd476f..2b5de20 100644
--- a/configure.ac
+++ b/configure.ac
@@ -76,6 +76,7 @@ AC_PROG_CC_STDC
AC_PROG_INSTALL
AC_PROG_LN_S
AC_PROG_LIBTOOL
+
# Checks for library functions.
#AC_FUNC_MALLOC
AC_CHECK_FUNCS([gettimeofday pow])
diff --git a/src/macros.h b/src/macros.h
index 63464f4..ade5f42 100644
--- a/src/macros.h
+++ b/src/macros.h
@@ -48,10 +48,14 @@ static inline V VLIT4(data_t f3, data_t f2, data_t f1, data_t f0) {
#define FFTS_FREE(d) (free(d))
__INLINE void STORESPR(data_t * addr, VS p) {
- __asm__ __volatile__ ("vst1.32 {%q1,%q2}, [%0, :128]\n\t"
- :
- : "r" (addr), "w" (p.val[0]), "w" (p.val[1])
- : "memory");
+
+ vst1q_f32(addr, p.val[0]);
+ vst1q_f32(addr + 4, p.val[1]);
+
+//__asm__ __volatile__ ("vst1.32 {%q1,%q2}, [%0, :128]\n\t"
+// :
+// : "r" (addr), "w" (p.val[0]), "w" (p.val[1])
+// : "memory");
}
#else
diff --git a/src/neon.s b/src/neon.s
index 2a2d107..5e93613 100644
--- a/src/neon.s
+++ b/src/neon.s
@@ -1,7 +1,13 @@
+#include "asm.s"
- .globl neon_x4
.align 4
+#ifdef __APPLE__
+ .globl _neon_x4
+_neon_x4:
+#else
+ .globl neon_x4
neon_x4:
+#endif
@ add r3, r0, #0
vld1.32 {q8,q9}, [r0, :128]
@@ -43,9 +49,14 @@ neon_x4:
vst1.32 {q6,q7}, [r6, :128]
bx lr
- .globl neon_x8
.align 4
+#ifdef __APPLE__
+ .globl _neon_x8
+_neon_x8:
+#else
+ .globl neon_x8
neon_x8:
+#endif
mov r11, #0
add r3, r0, #0 @ data0
add r5, r0, r1, lsl #1 @ data2
@@ -159,9 +170,14 @@ neon_x8_loop:
bx lr
- .globl neon_x8_t
.align 4
+#ifdef __APPLE__
+ .globl _neon_x8_t
+_neon_x8_t:
+#else
+ .globl neon_x8_t
neon_x8_t:
+#endif
mov r11, #0
add r3, r0, #0 @ data0
add r5, r0, r1, lsl #1 @ data2
@@ -281,9 +297,14 @@ neon_x8_t_loop:
@ r3-r10 = data pointers
@ r11 = loop iterations
@ r2 & lr = temps
- .globl neon_ee
.align 4
+#ifdef __APPLE__
+ .globl _neon_ee
+_neon_ee:
+#else
+ .globl neon_ee
neon_ee:
+#endif
vld1.32 {d16, d17}, [r2, :128]
_neon_ee_loop:
vld2.32 {q15}, [r10, :128]!
@@ -357,10 +378,14 @@ _neon_ee_loop:
@ r3-r10 = data pointers
@ r11 = loop iterations
@ r2 & lr = temps
- .globl neon_oo
.align 4
+#ifdef __APPLE__
+ .globl _neon_oo
+_neon_oo:
+#else
+ .globl neon_oo
neon_oo:
-
+#endif
_neon_oo_loop:
vld2.32 {q8}, [r6, :128]!
vld2.32 {q9}, [r5, :128]!
@@ -411,9 +436,14 @@ _neon_oo_loop:
@ r3-r10 = data pointers
@ r11 = addr of twiddle
@ r2 & lr = temps
- .globl neon_eo
.align 4
+#ifdef __APPLE__
+ .globl _neon_eo
+_neon_eo:
+#else
+ .globl neon_eo
neon_eo:
+#endif
vld2.32 {q9}, [r5, :128]! @tag2
vld2.32 {q13}, [r3, :128]! @tag0
vld2.32 {q12}, [r4, :128]! @tag1
@@ -488,9 +518,14 @@ neon_eo:
@ r3-r10 = data pointers
@ r11 = addr of twiddle
@ r2 & lr = temps
- .globl neon_oe
.align 4
+#ifdef __APPLE__
+ .globl _neon_oe
+_neon_oe:
+#else
+ .globl neon_oe
neon_oe:
+#endif
vld1.32 {q8}, [r5, :128]!
vld1.32 {q10}, [r6, :128]!
vld2.32 {q11}, [r4, :128]!
@@ -564,7 +599,12 @@ neon_oe:
vstmia lr!, {q4-q7}
+ .align 4
+#ifdef __APPLE__
+ .globl _neon_end
+_neon_end:
+#else
.globl neon_end
- .align 4
neon_end:
+#endif
bx lr
OpenPOWER on IntegriCloud