drm/nva3/clk: better pll calculation when no fractional fb div available

The core/mem/shader clocks don't support the fractional feedback divider, causing our calculated clocks to be off by quite a lot in some cases. To solve this we will switch to a search-based algorithm when fN is NULL. For my NVA8 at PL3, this actually generates identical cooefficients to the binary driver. Hopefully that's a good sign, and that does not break VPLL calculation for someone.. Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
author: Ben Skeggs <bskeggs@redhat.com> 2011-04-28 02:34:21 +1000
committer: Ben Skeggs <bskeggs@redhat.com> 2011-05-16 10:50:59 +1000
commit: 52eba8dd5e830a836425e92d002bc51e42d3280e (patch)
tree: 88faa691a4828e7a3ca874e4d8d45a2a6feff23f
parent: 96d1fcf8b5a3a9c66fddeaa9fb71e4e68ee2e08b (diff)
download: op-kernel-dev-52eba8dd5e830a836425e92d002bc51e42d3280e.zip
op-kernel-dev-52eba8dd5e830a836425e92d002bc51e42d3280e.tar.gz
4 files changed, 43 insertions, 37 deletions
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 444a943..9c56331 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -1353,8 +1353,8 @@ bool nv50_gpio_irq_enable(struct drm_device *, enum dcb_gpio_tag, bool on);
 /* nv50_calc. */
 int nv50_calc_pll(struct drm_device *, struct pll_lims *, int clk,
 		  int *N1, int *M1, int *N2, int *M2, int *P);
-int nv50_calc_pll2(struct drm_device *, struct pll_lims *,
-		   int clk, int *N, int *fN, int *M, int *P);
+int nva3_calc_pll(struct drm_device *, struct pll_lims *,
+		  int clk, int *N, int *fN, int *M, int *P);
 
 #ifndef ioread32_native
 #ifdef __BIG_ENDIAN
diff --git a/drivers/gpu/drm/nouveau/nv50_calc.c b/drivers/gpu/drm/nouveau/nv50_calc.c
index de81151..8cf63a8 100644
--- a/drivers/gpu/drm/nouveau/nv50_calc.c
+++ b/drivers/gpu/drm/nouveau/nv50_calc.c
@@ -23,7 +23,6 @@
  */
 
 #include "drmP.h"
-#include "drm_fixed.h"
 #include "nouveau_drv.h"
 #include "nouveau_hw.h"
 
@@ -47,45 +46,52 @@ nv50_calc_pll(struct drm_device *dev, struct pll_lims *pll, int clk,
 }
 
 int
-nv50_calc_pll2(struct drm_device *dev, struct pll_lims *pll, int clk,
-	       int *N, int *fN, int *M, int *P)
+nva3_calc_pll(struct drm_device *dev, struct pll_lims *pll, int clk,
+	      int *pN, int *pfN, int *pM, int *P)
 {
-	fixed20_12 fb_div, a, b;
-	u32 refclk = pll->refclk / 10;
-	u32 max_vco_freq = pll->vco1.maxfreq / 10;
-	u32 max_vco_inputfreq = pll->vco1.max_inputfreq / 10;
-	clk /= 10;
+	u32 best_err = ~0, err;
+	int M, lM, hM, N, fN;
 
-	*P = max_vco_freq / clk;
+	*P = pll->vco1.maxfreq / clk;
 	if (*P > pll->max_p)
 		*P = pll->max_p;
 	if (*P < pll->min_p)
 		*P = pll->min_p;
 
-	/* *M = floor((refclk + max_vco_inputfreq) / max_vco_inputfreq); */
-	a.full = dfixed_const(refclk + max_vco_inputfreq);
-	b.full = dfixed_const(max_vco_inputfreq);
-	a.full = dfixed_div(a, b);
-	a.full = dfixed_floor(a);
-	*M = dfixed_trunc(a);
+	lM = (pll->refclk + pll->vco1.max_inputfreq) / pll->vco1.max_inputfreq;
+	lM = max(lM, (int)pll->vco1.min_m);
+	hM = (pll->refclk + pll->vco1.min_inputfreq) / pll->vco1.min_inputfreq;
+	hM = min(hM, (int)pll->vco1.max_m);
 
-	/* fb_div = (vco * *M) / refclk; */
-	fb_div.full = dfixed_const(clk * *P);
-	fb_div.full = dfixed_mul(fb_div, a);
-	a.full = dfixed_const(refclk);
-	fb_div.full = dfixed_div(fb_div, a);
+	for (M = lM; M <= hM; M++) {
+		u32 tmp = clk * *P * M;
+		N  = tmp / pll->refclk;
+		fN = tmp % pll->refclk;
+		if (!pfN && fN >= pll->refclk / 2)
+			N++;
 
-	/* *N = floor(fb_div); */
-	a.full = dfixed_floor(fb_div);
-	*N = dfixed_trunc(fb_div);
+		if (N < pll->vco1.min_n)
+			continue;
+		if (N > pll->vco1.max_n)
+			break;
 
-	/* *fN = (fmod(fb_div, 1.0) * 8192) - 4096; */
-	b.full = dfixed_const(8192);
-	a.full = dfixed_mul(a, b);
-	fb_div.full = dfixed_mul(fb_div, b);
-	fb_div.full = fb_div.full - a.full;
-	*fN = dfixed_trunc(fb_div) - 4096;
-	*fN &= 0xffff;
+		err = abs(clk - (pll->refclk * N / M / *P));
+		if (err < best_err) {
+			best_err = err;
+			*pN = N;
+			*pM = M;
+		}
 
-	return clk;
+		if (pfN) {
+			*pfN = (((fN << 13) / pll->refclk) - 4096) & 0xffff;
+			return clk;
+		}
+	}
+
+	if (unlikely(best_err == ~0)) {
+		NV_ERROR(dev, "unable to find matching pll values\n");
+		return -EINVAL;
+	}
+
+	return pll->refclk * *pN / *pM / *P;
 }
diff --git a/drivers/gpu/drm/nouveau/nv50_crtc.c b/drivers/gpu/drm/nouveau/nv50_crtc.c
index e900a51..b522a3a 100644
--- a/drivers/gpu/drm/nouveau/nv50_crtc.c
+++ b/drivers/gpu/drm/nouveau/nv50_crtc.c
@@ -286,7 +286,7 @@ nv50_crtc_set_clock(struct drm_device *dev, int head, int pclk)
 		nv_wr32(dev, pll.reg + 8, reg2 | (P << 28) | (M2 << 16) | N2);
 	} else
 	if (dev_priv->chipset < NV_C0) {
-		ret = nv50_calc_pll2(dev, &pll, pclk, &N1, &N2, &M1, &P);
+		ret = nva3_calc_pll(dev, &pll, pclk, &N1, &N2, &M1, &P);
 		if (ret <= 0)
 			return 0;
 
@@ -298,7 +298,7 @@ nv50_crtc_set_clock(struct drm_device *dev, int head, int pclk)
 		nv_wr32(dev, pll.reg + 4, reg1 | (P << 16) | (M1 << 8) | N1);
 		nv_wr32(dev, pll.reg + 8, N2);
 	} else {
-		ret = nv50_calc_pll2(dev, &pll, pclk, &N1, &N2, &M1, &P);
+		ret = nva3_calc_pll(dev, &pll, pclk, &N1, &N2, &M1, &P);
 		if (ret <= 0)
 			return 0;
 
diff --git a/drivers/gpu/drm/nouveau/nva3_pm.c b/drivers/gpu/drm/nouveau/nva3_pm.c
index bc357c8..e4b2b9e 100644
--- a/drivers/gpu/drm/nouveau/nva3_pm.c
+++ b/drivers/gpu/drm/nouveau/nva3_pm.c
@@ -104,7 +104,7 @@ nva3_pm_clock_pre(struct drm_device *dev, struct nouveau_pm_level *perflvl,
 {
 	struct nva3_pm_state *pll;
 	struct pll_lims limits;
-	int N, fN, M, P, diff;
+	int N, M, P, diff;
 	int ret, off;
 
 	ret = get_pll_limits(dev, id, &limits);
@@ -136,7 +136,7 @@ nva3_pm_clock_pre(struct drm_device *dev, struct nouveau_pm_level *perflvl,
 	}
 
 	if (!pll->new_div) {
-		ret = nv50_calc_pll2(dev, &limits, khz, &N, &fN, &M, &P);
+		ret = nva3_calc_pll(dev, &limits, khz, &N, NULL, &M, &P);
 		if (ret < 0)
 			return ERR_PTR(ret);
author	Ben Skeggs <bskeggs@redhat.com>	2011-04-28 02:34:21 +1000
committer	Ben Skeggs <bskeggs@redhat.com>	2011-05-16 10:50:59 +1000
commit	52eba8dd5e830a836425e92d002bc51e42d3280e (patch)
tree	88faa691a4828e7a3ca874e4d8d45a2a6feff23f
parent	96d1fcf8b5a3a9c66fddeaa9fb71e4e68ee2e08b (diff)
download	op-kernel-dev-52eba8dd5e830a836425e92d002bc51e42d3280e.zip op-kernel-dev-52eba8dd5e830a836425e92d002bc51e42d3280e.tar.gz