Re-implemention of the interpolation code used for sample rate

conversion. The new version has improved interpolation accuracy and maintains the timing relationship between the input and output signals exactly. Approved by: cg
author: orion <orion@FreeBSD.org> 2003-01-20 00:54:24 +0000
committer: orion <orion@FreeBSD.org> 2003-01-20 00:54:24 +0000
commit: cf409b71efe2214da293509f71f3e763de2fcde7 (patch)
tree: f39a1c28d8d70dd8ae544984a6c1a19a8d2b4755 /sys/dev/sound/pcm/feeder_rate.c
parent: 23d74a5e7f74e46583bd07e2749a60928c4fb413 (diff)
download: FreeBSD-src-cf409b71efe2214da293509f71f3e763de2fcde7.zip
FreeBSD-src-cf409b71efe2214da293509f71f3e763de2fcde7.tar.gz
1 files changed, 308 insertions, 53 deletions
diff --git a/sys/dev/sound/pcm/feeder_rate.c b/sys/dev/sound/pcm/feeder_rate.c
index fca795c..70f1058 100644
--- a/sys/dev/sound/pcm/feeder_rate.c
+++ b/sys/dev/sound/pcm/feeder_rate.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999 Cameron Grant <gandalf@vilnya.demon.co.uk>
+ * Copyright (c) 2003 Orion Hodson <orion@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -22,35 +22,149 @@
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
+ *
+ * MAINTAINER: Orion Hodson <orion@freebsd.org>
+ *
+ * This rate conversion code uses linear interpolation without any
+ * pre- or post- interpolation filtering to combat aliasing.  This
+ * greatly limits the sound quality and should be addressed at some
+ * stage in the future.
+ * 
+ * Since this accuracy of interpolation is sensitive and examination
+ * of the algorithm output is harder from the kernel, th code is
+ * designed to be compiled in the kernel and in a userland test
+ * harness.  This is done by selectively including and excluding code
+ * with several portions based on whether _KERNEL is defined.  It's a
+ * little ugly, but very useful.  The testsuite and its revisions can
+ * be found at http://people.freebsd.org/~orion/feedrate/ 
  */
 
-#include <dev/sound/pcm/sound.h>
+#ifdef _KERNEL
 
+#include <dev/sound/pcm/sound.h>
 #include "feeder_if.h"
 
 SND_DECLARE_FILE("$FreeBSD$");
 
 MALLOC_DEFINE(M_RATEFEEDER, "ratefeed", "pcm rate feeder");
 
+#define RATE_ASSERT(x, y) /* KASSERT(x,y) */
+
+#define RATE_TRACE(x...) /* printf(x) */
+
+#else /* _KERNEL */
+
+/* #define RATE_DEBUG */
+
+#include "test_rate_head.h"
+
+#endif /* _KERNEL */
+
 #define FEEDBUFSZ	8192
-#undef FEEDER_DEBUG
+
+struct feed_rate_info;
+
+typedef int (*rate_convert_method)(struct feed_rate_info *, 
+				   uint32_t, uint32_t, int16_t *);
+
+static int 
+convert_stereo_up(struct feed_rate_info *info, 
+		  uint32_t src_ticks, uint32_t dst_ticks, int16_t *dst);
+
+static int
+convert_stereo_down(struct feed_rate_info *info, 
+		    uint32_t src_ticks, uint32_t dst_ticks, int16_t *dst);
+
 
 struct feed_rate_info {
-	u_int32_t src, dst;
-	int srcpos, srcinc;
-	int16_t *buffer;
-	u_int16_t alpha;
+	uint32_t src, dst;	/* source and destination rates */
+	int16_t *buffer;	/* input buffer */
+	uint16_t buffer_ticks;	/* number of available samples in buffer */
+	uint16_t buffer_pos;	/* next available sample in buffer */
+	uint16_t rounds; 	/* maximum number of cycle rounds w buffer */
+	uint16_t alpha;		/* interpolation distance */
+        uint16_t sscale;        /* src clock scale */
+        uint16_t dscale;        /* dst clock scale */
+        uint16_t mscale;        /* scale factor to avoid divide per sample */
+        uint16_t mroll;         /* roll to again avoid divide per sample */
+	uint16_t channels;	/* 1 = mono, 2 = stereo */
+
+	rate_convert_method convert;
 };
 
+#define src_ticks_per_cycle(info) (info->dscale * info->rounds)
+
+#define dst_ticks_per_cycle(info) (info->sscale * info->rounds)
+
+#define bytes_per_tick(info) (info->channels * 2)
+
+#define src_bytes_per_cycle(info) 					      \
+        (src_ticks_per_cycle(info) * bytes_per_tick(info))
+
+#define dst_bytes_per_cycle(info) 					      \
+        (dst_ticks_per_cycle(info) * bytes_per_tick(info))
+
+static uint32_t
+gcd(uint32_t n1, uint32_t n2)
+{
+	return n2 ? gcd(n2, n1 % n2) : n1;
+}
+
 static int
 feed_rate_setup(struct pcm_feeder *f)
 {
 	struct feed_rate_info *info = f->data;
+        uint32_t mscale, mroll, l, r, g;
+	
+	/* Beat sample rates down by greatest common divisor */
+	g = gcd(info->src, info->dst);
+	info->sscale = info->dst / g;
+	info->dscale = info->src / g;
 
-	info->srcinc = (info->src << 16) / info->dst;
-	/* srcinc is 16.16 fixed point increment for srcpos for each dstpos */
-	info->srcpos = 0;
-	return 0;
+	info->alpha = 0;
+	info->channels = 2;
+	info->buffer_ticks = 0; 
+	info->buffer_pos = 0;
+
+	/* Pick suitable conversion routine */
+	if (info->src > info->dst) {
+		info->convert = convert_stereo_down;
+	} else {
+		info->convert = convert_stereo_up;
+	}
+
+	/*
+	 * Determine number of conversion rounds that will fit into
+	 * buffer.  NB Must set info->rounds to one before using
+	 * src_ticks_per_cycle here since it used by src_ticks_per_cycle.  
+	 */
+	info->rounds = 1;	
+	info->rounds = FEEDBUFSZ / 
+		(src_ticks_per_cycle(info) * bytes_per_tick(info)) - 1;
+
+	/*
+	 * Find scale and roll combination that allows us to trade 
+	 * costly divide operations in the main loop for multiply-rolls.
+	 */
+        for (l = 99; l > 90; l -= 3) {
+		for (mroll = 2; mroll < 16; mroll ++) {
+			mscale = (1 << mroll) / info->sscale;
+                        r = (mscale * info->sscale * 100) >> mroll;
+                        if (r > l && r <= 100) {
+                                info->mscale = mscale;
+                                info->mroll = mroll;
+                                RATE_TRACE("Converting %d to %d with "
+					   "mscale = %d and mroll = %d "
+					   "(gain = %d / 100)\n",
+					   info->src, info->dst,
+					   info->mscale, info->mroll, r);
+                                return 0;
+                        }
+                }
+        }
+	RATE_TRACE("Failed to find a converter within 90%% gain.");
+
+        return -1;
 }
 
 static int
@@ -68,6 +182,7 @@ feed_rate_set(struct pcm_feeder *f, int what, int value)
 	default:
 		return -1;
 	}
+
 	return feed_rate_setup(f);
 }
 
@@ -102,7 +217,7 @@ feed_rate_init(struct pcm_feeder *f)
 	}
 	info->src = DSP_DEFAULT_SPEED;
 	info->dst = DSP_DEFAULT_SPEED;
-	info->alpha = 0;
+
 	f->data = info;
 	return feed_rate_setup(f);
 }
@@ -122,59 +237,195 @@ feed_rate_free(struct pcm_feeder *f)
 }
 
 static int
-feed_rate(struct pcm_feeder *f, struct pcm_channel *c, u_int8_t *b, u_int32_t count, void *source)
+convert_stereo_up(struct feed_rate_info *info, 
+		  uint32_t src_ticks, 
+		  uint32_t dst_ticks, 
+		  int16_t *dst)
 {
-	struct feed_rate_info *info = f->data;
-	int16_t *destbuf = (int16_t *)b;
-	int fetch, v, alpha, hidelta, spos, dpos;
+	uint32_t max_dst_ticks;
+	int32_t alpha, dalpha, malpha, mroll, sp, dp, se, de, x, o;
+	int16_t *src;
+
+	sp = info->buffer_pos * 2;
+	se = sp + src_ticks * 2;
+
+	src = info->buffer;
+	alpha = info->alpha * info->mscale;
+	dalpha = info->dscale * info->mscale; /* Alpha increment */
+	malpha = info->sscale * info->mscale; /* Maximum allowed alpha value */
+	mroll = info->mroll;
 
 	/*
-	 * at this point:
-	 * info->srcpos is 24.8 fixed offset into the fetchbuffer.  0 <= srcpos <= 0xff
-	 *
-	 * our input and output are always AFMT_S16LE stereo.  this simplifies things.
+	 * For efficiency the main conversion loop should only depend on
+	 * one variable.  We use the state to work out the maximum number
+	 * of output samples that are available and eliminate the checking of
+	 * sp from the loop.
 	 */
+	max_dst_ticks = src_ticks * info->dst / info->src - alpha / dalpha;
+	if (max_dst_ticks < dst_ticks) {
+		dst_ticks = max_dst_ticks;
+	}
 
+	dp = 0;
+	de = dst_ticks * 2;
 	/*
-	 * we start by fetching enough source data into our buffer to generate
-	 * about as much as was requested.  we put it at offset 2 in the
-	 * buffer so that we can interpolate from the last samples in the
-	 * previous iteration- when we finish we will move our last samples
-	 * to the start of the buffer.
+	 * FYI: unrolling this loop manually does not help much here because
+	 * of the alpha, malpha comparison.
 	 */
-	spos = 0;
-	dpos = 0;
-
-	/* fetch is in bytes */
-	fetch = (count * info->srcinc) >> 16;
-	fetch = min(fetch, FEEDBUFSZ - 4) & ~3;
-	if (fetch == 0)
-		return 0;
-	fetch = FEEDER_FEED(f->source, c, ((u_int8_t *)info->buffer) + 4, fetch, source);
-	fetch /= 2;
-
-	alpha = info->alpha;
-	hidelta = min(info->srcinc >> 16, 1) * 2;
-	while ((spos + hidelta + 1) < fetch) {
-		v = (info->buffer[spos] * (0xffff - alpha)) + (info->buffer[spos + hidelta] * alpha);
-		destbuf[dpos++] = v >> 16;
-
-		v = (info->buffer[spos + 1] * (0xffff - alpha)) + (info->buffer[spos + hidelta + 1] * alpha);
-		destbuf[dpos++] = v >> 16;
-
-		alpha += info->srcinc;
-		spos += (alpha >> 16) * 2;
-		alpha &= 0xffff;
+	while (dp < de) {
+		o = malpha - alpha;
+		x = alpha * src[sp + 2] + o * src[sp];
+		dst[dp++] = x >> mroll;
+		x = alpha * src[sp + 3] + o * src[sp + 1];
+		dst[dp++] = x >> mroll;
+		alpha += dalpha;
+		if (alpha >= malpha) {
+			alpha -= malpha;
+			sp += 2;
+		}
+	}
+	RATE_ASSERT(sp <= se, ("%s: Source overrun\n", __func__)); 
+
+	info->buffer_pos = sp / info->channels;
+	info->alpha = alpha / info->mscale;
+
+	return dp / info->channels;
+}
+
+static int
+convert_stereo_down(struct feed_rate_info *info, 
+		    uint32_t src_ticks, 
+		    uint32_t dst_ticks, 
+		    int16_t *dst)
+{
+	int32_t alpha, dalpha, malpha, mroll, sp, dp, se, de, x, o, m, 
+		mdalpha, mstep;
+	int16_t *src;
+
+	sp = info->buffer_pos * 2;
+	se = sp + src_ticks * 2;
+
+	src = info->buffer;
+	alpha = info->alpha * info->mscale;
+	dalpha = info->dscale * info->mscale; /* Alpha increment */
+	malpha = info->sscale * info->mscale; /* Maximum allowed alpha value */
+	mroll = info->mroll;
+
+	dp = 0;
+	de = dst_ticks * 2;
+
+	m = dalpha / malpha;
+	mstep = m * 2;
+	mdalpha = dalpha - m * malpha;
+
+	/*
+	 * TODO: eliminate sp or dp from this loop comparison for a few 
+	 * extra % performance.
+	 */
+	while (sp < se && dp < de) {
+		o = malpha - alpha;
+		x = alpha * src[sp + 2] + o * src[sp];
+		dst[dp++] = x >> mroll;
+		x = alpha * src[sp + 3] + o * src[sp + 1];
+		dst[dp++] = x >> mroll;
+
+		alpha += mdalpha;
+		sp += mstep;
+		if (alpha >= malpha) {
+			alpha -= malpha;
+			sp += 2;
+		}
+	}
+
+	info->buffer_pos = sp / info->channels;
+	info->alpha = alpha / info->mscale;
+
+	RATE_ASSERT(info->buffer_pos <= info->buffer_ticks, 
+		    ("%s: Source overrun\n", __func__)); 
+
+	return dp / info->channels;
+}
+
+static int
+feed_rate(struct pcm_feeder	*f, 
+	  struct pcm_channel	*c, 
+	  uint8_t		*b,
+	  uint32_t		 count, 
+	  void			*source)
+{
+	struct feed_rate_info *info = f->data;
+
+	uint32_t done, s_ticks, d_ticks;
+	done = 0;
 
+	RATE_ASSERT(info->channels == 2, 
+		    ("%s: channels (%d) != 2", __func__, info->channels));
+
+	while (done < count) {
+		/* Slurp in more data if input buffer is not full */
+		if (info->buffer_ticks < src_ticks_per_cycle(info)) {
+			uint8_t *u8b;
+			int	 fetch;
+			fetch = src_bytes_per_cycle(info) - 
+				info->buffer_ticks * bytes_per_tick(info);
+			u8b = (uint8_t*)info->buffer + 
+				(info->buffer_ticks + 1) *
+				bytes_per_tick(info);
+			fetch = FEEDER_FEED(f->source, c, u8b, fetch, source);
+			RATE_ASSERT(fetch % bytes_per_tick(info) == 0,
+				    ("%s: fetched unaligned bytes (%d)",
+				     __func__, fetch));
+			info->buffer_ticks += fetch / bytes_per_tick(info);
+			RATE_ASSERT(src_ticks_per_cycle(info) >= 
+				    info->buffer_ticks,
+				    ("%s: buffer overfilled (%d > %d).",
+				     __func__, info->buffer_ticks, 
+				 src_ticks_per_cycle(info)));
+			if (fetch == 0 && info->buffer_pos == info->buffer_ticks)
+				break;
+		}
+
+		/* Find amount of input buffer data that should be processed */
+		d_ticks = (count - done) / bytes_per_tick(info);
+		s_ticks = info->buffer_ticks - info->buffer_pos;
+
+		d_ticks = info->convert(info, s_ticks, d_ticks,
+					(uint16_t*)(b + done));
+		if (d_ticks == 0)
+			break;
+		done += d_ticks * bytes_per_tick(info);
+
+		RATE_ASSERT(info->buffer_pos <= info->buffer_ticks,
+			    ("%s: buffer_ticks too big\n", __func__));
+
+		RATE_TRACE("%s: ticks %5d pos %d\n",
+		      __func__, info->buffer_ticks, info->buffer_pos);
+
+		if (src_ticks_per_cycle(info) <= info->buffer_pos) {
+			/* End of cycle reached, copy last samples to start */
+			uint8_t *u8b;
+			u8b = (uint8_t*)info->buffer;
+			bcopy(u8b + src_bytes_per_cycle(info), u8b, 
+			      bytes_per_tick(info));
+
+			RATE_ASSERT(info->alpha == 0,
+				    ("%s: completed cycle with alpha non-zero", 
+				     __func__, info->alpha));
+			
+			info->buffer_pos = 0;
+			info->buffer_ticks = 0;
+		}
 	}
-	info->alpha = alpha & 0xffff;
-	info->buffer[0] = info->buffer[spos - hidelta];
-	info->buffer[1] = info->buffer[spos - hidelta + 1];
+	
+	RATE_ASSERT(count >= done, 
+		    ("%s: generated too many bytes of data (%d > %d).",
+		     __func__, done, count));
 
-	count = dpos * 2;
-	return count;
+	return done;
 }
 
+#ifdef _KERNEL
+
 static struct pcm_feederdesc feeder_rate_desc[] = {
 	{FEEDER_RATE, AFMT_S16_LE | AFMT_STEREO, AFMT_S16_LE | AFMT_STEREO, 0},
 	{0},
@@ -189,4 +440,8 @@ static kobj_method_t feeder_rate_methods[] = {
 };
 FEEDER_DECLARE(feeder_rate, 2, NULL);
 
+#else /* _KERNEL */
+
+#include "test_rate_tail.h"
 
+#endif /* _KERNEL */
author	orion <orion@FreeBSD.org>	2003-01-20 00:54:24 +0000
committer	orion <orion@FreeBSD.org>	2003-01-20 00:54:24 +0000
commit	cf409b71efe2214da293509f71f3e763de2fcde7 (patch)
tree	f39a1c28d8d70dd8ae544984a6c1a19a8d2b4755 /sys/dev/sound/pcm/feeder_rate.c
parent	23d74a5e7f74e46583bd07e2749a60928c4fb413 (diff)
download	FreeBSD-src-cf409b71efe2214da293509f71f3e763de2fcde7.zip FreeBSD-src-cf409b71efe2214da293509f71f3e763de2fcde7.tar.gz