diff options
Diffstat (limited to 'src/pixman')
144 files changed, 72083 insertions, 0 deletions
diff --git a/src/pixman/.gitignore b/src/pixman/.gitignore new file mode 100644 index 0000000..0f11496 --- /dev/null +++ b/src/pixman/.gitignore @@ -0,0 +1,89 @@ +Makefile +Makefile.in +.deps +.libs +.msg +*.pc +*.lo +*.la +*.a +*.o +*~ +aclocal.m4 +autom4te.cache +compile +config.guess +config.log +config.status +config.sub +configure +depcomp +install-sh +libtool +ltmain.sh +missing +stamp-h? +config.h +config.h.in +.*.swp +demos/alpha-test +demos/checkerboard +demos/clip-in +demos/clip-test +demos/composite-test +demos/conical-test +demos/convolution-test +demos/gradient-test +demos/linear-gradient +demos/quad2quad +demos/radial-test +demos/scale +demos/screen-test +demos/srgb-test +demos/srgb-trap-test +demos/trap-test +demos/tri-test +pixman/pixman-srgb.c +pixman/pixman-version.h +test/a1-trap-test +test/affine-test +test/alpha-loop +test/alphamap +test/alpha-test +test/blitters-test +test/clip-in +test/clip-test +test/combiner-test +test/composite +test/composite-test +test/composite-traps-test +test/convolution-test +test/fetch-test +test/glyph-test +test/gradient-crash-test +test/gradient-test +test/infinite-loop +test/lowlevel-blt-bench +test/oob-test +test/pdf-op-test +test/prng-test +test/radial-perf-test +test/region-contains-test +test/region-test +test/region-translate +test/region-translate-test +test/rotate-test +test/scaling-crash-test +test/scaling-helpers-test +test/scaling-test +test/screen-test +test/stress-test +test/trap-crasher +test/trap-test +test/window-test +*.pdb +*.dll +*.lib +*.ilk +*.obj +*.exe diff --git a/src/pixman/AUTHORS b/src/pixman/AUTHORS new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/src/pixman/AUTHORS diff --git a/src/pixman/CODING_STYLE b/src/pixman/CODING_STYLE new file mode 100644 index 0000000..9f5171d --- /dev/null +++ b/src/pixman/CODING_STYLE @@ -0,0 +1,199 @@ +Pixman coding style. +==================== + +The pixman coding style is close to cairo's with one exception: braces +go on their own line, rather than on the line of the if/while/for: + + if (condition) + { + do_something(); + do_something_else(); + } + +not + + if (condition) { + do_something(); + do_something_else(); + } + + + +Indentation +=========== + +Each new level is indented four spaces: + + if (condition) + do_something(); + +This may be achieved with space characters or with a combination of +tab characters and space characters. Tab characters are interpreted as + + Advance to the next column which is a multiple of 8. + + +Names +===== + +In all names, words are separated with underscores. Do not use +CamelCase for any names. + +Macros have ALL_CAPITAL_NAMES + +Type names are in lower case and end with "_t". For example +pixman_image_t. + +Labels, functions and variables have lower case names. + + +Braces +====== + +Braces always go on their own line: + + if (condition) + { + do_this (); + do_that (); + } + else + { + do_the_other (); + } + +Rules for braces and substatements of if/while/for/do: + +* If a substatement spans multiple lines, then there must be braces + around it. + +* If the condition of an if/while/for spans multiple lines, then + braces must be used for the substatements. + +* If one substatement of an if statement has braces, then the other + must too. + +* Otherwise, don't add braces. + + +Comments +======== + +For comments either like this: + + /* One line comment */ + +or like this: + + /* This is a multi-line comment + * + * It extends over multiple lines + */ + +Generally comments should say things that aren't clear from the code +itself. If too many comments say obvious things, then people will just +stop reading all comments, including the good ones. + + +Whitespace +========== + +* Put a single space after commas + +* Put spaces around arithmetic operators such a +, -, *, /: + + y * stride + x + + x / unit_x + +* Do not put spaces after the address-of operator, the * when used as + a pointer derefernce or the ! and ~ operators: + + &foo; + + ~0x00000000 + + !condition + + *result = 100 + +* Break up long lines (> ~80 characters) and use whitespace to align + things nicely. This is one way: + + some_very_long_function name ( + implementation, op, src, mask, dest, + src_x, src_y, mask_x, mask_y, dest_x, dest_y, + width, height); + + This is another: + + some_very_long_function_name (implementation, op, + src, mask, dest, + src_x, src_y, + mask_x, mask_y, + dest_x, dest_y, + width, height); + +* Separate logically distinct chunks with a single newline. This + obviously applies between functions, but also applies within a + function or block or structure definition. + +* Use a newline after a block of variable declarations. + +* Use a single space before a left parenthesis, except where the + standard will not allow it, (eg. when defining a parameterized macro). + +* Don't eliminate newlines just because things would still fit on one + line. This breaks the expected visual structure of the code making + it much harder to read and understand: + + if (condition) foo (); else bar (); /* Yuck! */ + + +Function Definitions +==================== + +Function definitions should take the following form: + + void + my_function (int argument) + { + do_my_things (); + } + +If all the parameters to a function fit naturally on one line, format +them that way. Otherwise, put one argument on each line, adding +whitespace so that the parameter names are aligned with each other. + +I.e., do either this: + + void + short_arguments (const char *str, int x, int y, int z) + { + } + +or this: + + void + long_arguments (const char *char_star_arg, + int int_arg, + double *double_star_arg, + double double_arg) + { + } + + +Mode lines +========== + +Given the rules above, what is the best way to simplify one's life as +a code monkey? Get your editor to do most of the tedious work of +beautifying your code! + +As a reward for reading this far, here are some mode lines for the more +popular editors: +/* + * vim:sw=4:sts=4:ts=8:tw=78:fo=tcroq:cindent:cino=\:0,(0 + * vim:isk=a-z,A-Z,48-57,_,.,-,> + */ + diff --git a/src/pixman/COPYING b/src/pixman/COPYING new file mode 100644 index 0000000..6168dea --- /dev/null +++ b/src/pixman/COPYING @@ -0,0 +1,42 @@ +The following is the MIT license, agreed upon by most contributors. +Copyright holders of new code should use this license statement where +possible. They may also add themselves to the list below. + +/* + * Copyright 1987, 1988, 1989, 1998 The Open Group + * Copyright 1987, 1988, 1989 Digital Equipment Corporation + * Copyright 1999, 2004, 2008 Keith Packard + * Copyright 2000 SuSE, Inc. + * Copyright 2000 Keith Packard, member of The XFree86 Project, Inc. + * Copyright 2004, 2005, 2007, 2008, 2009, 2010 Red Hat, Inc. + * Copyright 2004 Nicholas Miell + * Copyright 2005 Lars Knoll & Zack Rusin, Trolltech + * Copyright 2005 Trolltech AS + * Copyright 2007 Luca Barbato + * Copyright 2008 Aaron Plattner, NVIDIA Corporation + * Copyright 2008 Rodrigo Kumpera + * Copyright 2008 André Tupinambá + * Copyright 2008 Mozilla Corporation + * Copyright 2008 Frederic Plourde + * Copyright 2009, Oracle and/or its affiliates. All rights reserved. + * Copyright 2009, 2010 Nokia Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ diff --git a/src/pixman/ChangeLog b/src/pixman/ChangeLog new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/src/pixman/ChangeLog diff --git a/src/pixman/INSTALL b/src/pixman/INSTALL new file mode 100644 index 0000000..5458714 --- /dev/null +++ b/src/pixman/INSTALL @@ -0,0 +1,234 @@ +Installation Instructions +************************* + +Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004, 2005, +2006 Free Software Foundation, Inc. + +This file is free documentation; the Free Software Foundation gives +unlimited permission to copy, distribute and modify it. + +Basic Installation +================== + +Briefly, the shell commands `./configure; make; make install' should +configure, build, and install this package. The following +more-detailed instructions are generic; see the `README' file for +instructions specific to this package. + + The `configure' shell script attempts to guess correct values for +various system-dependent variables used during compilation. It uses +those values to create a `Makefile' in each directory of the package. +It may also create one or more `.h' files containing system-dependent +definitions. Finally, it creates a shell script `config.status' that +you can run in the future to recreate the current configuration, and a +file `config.log' containing compiler output (useful mainly for +debugging `configure'). + + It can also use an optional file (typically called `config.cache' +and enabled with `--cache-file=config.cache' or simply `-C') that saves +the results of its tests to speed up reconfiguring. Caching is +disabled by default to prevent problems with accidental use of stale +cache files. + + If you need to do unusual things to compile the package, please try +to figure out how `configure' could check whether to do them, and mail +diffs or instructions to the address given in the `README' so they can +be considered for the next release. If you are using the cache, and at +some point `config.cache' contains results you don't want to keep, you +may remove or edit it. + + The file `configure.ac' (or `configure.in') is used to create +`configure' by a program called `autoconf'. You need `configure.ac' if +you want to change it or regenerate `configure' using a newer version +of `autoconf'. + +The simplest way to compile this package is: + + 1. `cd' to the directory containing the package's source code and type + `./configure' to configure the package for your system. + + Running `configure' might take a while. While running, it prints + some messages telling which features it is checking for. + + 2. Type `make' to compile the package. + + 3. Optionally, type `make check' to run any self-tests that come with + the package. + + 4. Type `make install' to install the programs and any data files and + documentation. + + 5. You can remove the program binaries and object files from the + source code directory by typing `make clean'. To also remove the + files that `configure' created (so you can compile the package for + a different kind of computer), type `make distclean'. There is + also a `make maintainer-clean' target, but that is intended mainly + for the package's developers. If you use it, you may have to get + all sorts of other programs in order to regenerate files that came + with the distribution. + +Compilers and Options +===================== + +Some systems require unusual options for compilation or linking that the +`configure' script does not know about. Run `./configure --help' for +details on some of the pertinent environment variables. + + You can give `configure' initial values for configuration parameters +by setting variables in the command line or in the environment. Here +is an example: + + ./configure CC=c99 CFLAGS=-g LIBS=-lposix + + *Note Defining Variables::, for more details. + +Compiling For Multiple Architectures +==================================== + +You can compile the package for more than one kind of computer at the +same time, by placing the object files for each architecture in their +own directory. To do this, you can use GNU `make'. `cd' to the +directory where you want the object files and executables to go and run +the `configure' script. `configure' automatically checks for the +source code in the directory that `configure' is in and in `..'. + + With a non-GNU `make', it is safer to compile the package for one +architecture at a time in the source code directory. After you have +installed the package for one architecture, use `make distclean' before +reconfiguring for another architecture. + +Installation Names +================== + +By default, `make install' installs the package's commands under +`/usr/local/bin', include files under `/usr/local/include', etc. You +can specify an installation prefix other than `/usr/local' by giving +`configure' the option `--prefix=PREFIX'. + + You can specify separate installation prefixes for +architecture-specific files and architecture-independent files. If you +pass the option `--exec-prefix=PREFIX' to `configure', the package uses +PREFIX as the prefix for installing programs and libraries. +Documentation and other data files still use the regular prefix. + + In addition, if you use an unusual directory layout you can give +options like `--bindir=DIR' to specify different values for particular +kinds of files. Run `configure --help' for a list of the directories +you can set and what kinds of files go in them. + + If the package supports it, you can cause programs to be installed +with an extra prefix or suffix on their names by giving `configure' the +option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'. + +Optional Features +================= + +Some packages pay attention to `--enable-FEATURE' options to +`configure', where FEATURE indicates an optional part of the package. +They may also pay attention to `--with-PACKAGE' options, where PACKAGE +is something like `gnu-as' or `x' (for the X Window System). The +`README' should mention any `--enable-' and `--with-' options that the +package recognizes. + + For packages that use the X Window System, `configure' can usually +find the X include and library files automatically, but if it doesn't, +you can use the `configure' options `--x-includes=DIR' and +`--x-libraries=DIR' to specify their locations. + +Specifying the System Type +========================== + +There may be some features `configure' cannot figure out automatically, +but needs to determine by the type of machine the package will run on. +Usually, assuming the package is built to be run on the _same_ +architectures, `configure' can figure that out, but if it prints a +message saying it cannot guess the machine type, give it the +`--build=TYPE' option. TYPE can either be a short name for the system +type, such as `sun4', or a canonical name which has the form: + + CPU-COMPANY-SYSTEM + +where SYSTEM can have one of these forms: + + OS KERNEL-OS + + See the file `config.sub' for the possible values of each field. If +`config.sub' isn't included in this package, then this package doesn't +need to know the machine type. + + If you are _building_ compiler tools for cross-compiling, you should +use the option `--target=TYPE' to select the type of system they will +produce code for. + + If you want to _use_ a cross compiler, that generates code for a +platform different from the build platform, you should specify the +"host" platform (i.e., that on which the generated programs will +eventually be run) with `--host=TYPE'. + +Sharing Defaults +================ + +If you want to set default values for `configure' scripts to share, you +can create a site shell script called `config.site' that gives default +values for variables like `CC', `cache_file', and `prefix'. +`configure' looks for `PREFIX/share/config.site' if it exists, then +`PREFIX/etc/config.site' if it exists. Or, you can set the +`CONFIG_SITE' environment variable to the location of the site script. +A warning: not all `configure' scripts look for a site script. + +Defining Variables +================== + +Variables not defined in a site shell script can be set in the +environment passed to `configure'. However, some packages may run +configure again during the build, and the customized values of these +variables may be lost. In order to avoid this problem, you should set +them in the `configure' command line, using `VAR=value'. For example: + + ./configure CC=/usr/local2/bin/gcc + +causes the specified `gcc' to be used as the C compiler (unless it is +overridden in the site shell script). + +Unfortunately, this technique does not work for `CONFIG_SHELL' due to +an Autoconf bug. Until the bug is fixed you can use this workaround: + + CONFIG_SHELL=/bin/bash /bin/bash ./configure CONFIG_SHELL=/bin/bash + +`configure' Invocation +====================== + +`configure' recognizes the following options to control how it operates. + +`--help' +`-h' + Print a summary of the options to `configure', and exit. + +`--version' +`-V' + Print the version of Autoconf used to generate the `configure' + script, and exit. + +`--cache-file=FILE' + Enable the cache: use and save the results of the tests in FILE, + traditionally `config.cache'. FILE defaults to `/dev/null' to + disable caching. + +`--config-cache' +`-C' + Alias for `--cache-file=config.cache'. + +`--quiet' +`--silent' +`-q' + Do not print messages saying which checks are being made. To + suppress all normal output, redirect it to `/dev/null' (any error + messages will still be shown). + +`--srcdir=DIR' + Look for the package's source code in directory DIR. Usually + `configure' can determine that directory automatically. + +`configure' also accepts some other, not widely useful, options. Run +`configure --help' for more details. + diff --git a/src/pixman/Makefile.am b/src/pixman/Makefile.am new file mode 100644 index 0000000..5137c9e --- /dev/null +++ b/src/pixman/Makefile.am @@ -0,0 +1,137 @@ +SUBDIRS = pixman demos test + +pkgconfigdir=$(libdir)/pkgconfig +pkgconfig_DATA=pixman-1.pc + +$(pkgconfig_DATA): pixman-1.pc.in + +snapshot: + distdir="$(distdir)-`date '+%Y%m%d'`"; \ + test -d "$(srcdir)/.git" && distdir=$$distdir-`cd "$(srcdir)" && git rev-parse HEAD | cut -c 1-6`; \ + $(MAKE) $(AM_MAKEFLAGS) distdir="$$distdir" dist + +GPGKEY=3892336E +USERNAME=$$USER +RELEASE_OR_SNAPSHOT = $$(if test "x$(PIXMAN_VERSION_MINOR)" = "x$$(echo "$(PIXMAN_VERSION_MINOR)/2*2" | bc)" ; then echo release; else echo snapshot; fi) +RELEASE_CAIRO_HOST = $(USERNAME)@cairographics.org +RELEASE_CAIRO_DIR = /srv/cairo.freedesktop.org/www/$(RELEASE_OR_SNAPSHOT)s +RELEASE_CAIRO_URL = http://cairographics.org/$(RELEASE_OR_SNAPSHOT)s +RELEASE_XORG_URL = http://xorg.freedesktop.org/archive/individual/lib +RELEASE_XORG_HOST = $(USERNAME)@xorg.freedesktop.org +RELEASE_XORG_DIR = /srv/xorg.freedesktop.org/archive/individual/lib +RELEASE_ANNOUNCE_LIST = cairo-announce@cairographics.org, xorg-announce@lists.freedesktop.org, pixman@lists.freedesktop.org + +EXTRA_DIST = \ + Makefile.win32 \ + Makefile.win32.common + +tar_gz = $(PACKAGE)-$(VERSION).tar.gz +tar_bz2 = $(PACKAGE)-$(VERSION).tar.bz2 + +sha1_tgz = $(tar_gz).sha1 +md5_tgz = $(tar_gz).md5 + +sha1_tbz2 = $(tar_bz2).sha1 +md5_tbz2 = $(tar_bz2).md5 + +gpg_file = $(sha1_tgz).asc + +$(sha1_tgz): $(tar_gz) + sha1sum $^ > $@ + +$(md5_tgz): $(tar_gz) + md5sum $^ > $@ + +$(sha1_tbz2): $(tar_bz2) + sha1sum $^ > $@ + +$(md5_tbz2): $(tar_bz2) + md5sum $^ > $@ + +$(gpg_file): $(sha1_tgz) + @echo "Please enter your GPG password to sign the checksum." + gpg --armor --sign $^ + +HASHFILES = $(sha1_tgz) $(sha1_tbz2) $(md5_tgz) $(md5_tbz2) + +release-verify-newer: + @echo -n "Checking that no $(VERSION) release already exists at $(RELEASE_XORG_HOST)..." + @ssh $(RELEASE_XORG_HOST) test ! -e $(RELEASE_XORG_DIR)/$(tar_gz) \ + || (echo "Ouch." && echo "Found: $(RELEASE_XORG_HOST):$(RELEASE_XORG_DIR)/$(tar_gz)" \ + && echo "Refusing to try to generate a new release of the same name." \ + && false) + @ssh $(RELEASE_CAIRO_HOST) test ! -e $(RELEASE_CAIRO_DIR)/$(tar_gz) \ + || (echo "Ouch." && echo "Found: $(RELEASE_CAIRO_HOST):$(RELEASE_CAIRO_DIR)/$(tar_gz)" \ + && echo "Refusing to try to generate a new release of the same name." \ + && false) + @echo "Good." + +release-remove-old: + $(RM) $(tar_gz) $(tar_bz2) $(HASHFILES) $(gpg_file) + +ensure-prev: + @if [[ "$(PREV)" == "" ]]; then \ + echo "" && \ + echo "You must set the PREV variable on the make command line to" && \ + echo "the last version." && \ + echo "" && \ + echo "For example:" && \ + echo " make PREV=0.7.3" && \ + echo "" && \ + false; \ + fi + +release-check: ensure-prev release-verify-newer release-remove-old distcheck + +release-tag: + git tag -u $(GPGKEY) -m "$(PACKAGE) $(VERSION) release" $(PACKAGE)-$(VERSION) + +release-upload: release-check $(tar_gz) $(tar_bz2) $(sha1_tgz) $(sha1_tbz2) $(md5_tgz) $(gpg_file) + scp $(tar_gz) $(sha1_tgz) $(gpg_file) $(RELEASE_CAIRO_HOST):$(RELEASE_CAIRO_DIR) + scp $(tar_gz) $(tar_bz2) $(RELEASE_XORG_HOST):$(RELEASE_XORG_DIR) + ssh $(RELEASE_CAIRO_HOST) "rm -f $(RELEASE_CAIRO_DIR)/LATEST-$(PACKAGE)-[0-9]* && ln -s $(tar_gz) $(RELEASE_CAIRO_DIR)/LATEST-$(PACKAGE)-$(VERSION)" + +RELEASE_TYPE = $$(if test "x$(PIXMAN_VERSION_MINOR)" = "x$$(echo "$(PIXMAN_VERSION_MINOR)/2*2" | bc)" ; then echo "stable release in the" ; else echo "development snapshot leading up to a stable"; fi) + +release-publish-message: $(HASHFILES) ensure-prev + @echo "Please follow the instructions in RELEASING to push stuff out and" + @echo "send out the announcement mails. Here is the excerpt you need:" + @echo "" + @echo "Lists: $(RELEASE_ANNOUNCE_LIST)" + @echo "Subject: [ANNOUNCE] $(PACKAGE) release $(VERSION) now available" + @echo "============================== CUT HERE ==============================" + @echo "A new $(PACKAGE) release $(VERSION) is now available. This is a $(RELEASE_TYPE)" + @echo "" + @echo "tar.gz:" + @echo " $(RELEASE_CAIRO_URL)/$(tar_gz)" + @echo " $(RELEASE_XORG_URL)/$(tar_gz)" + @echo "" + @echo "tar.bz2:" + @echo " $(RELEASE_XORG_URL)/$(tar_bz2)" + @echo "" + @echo "Hashes:" + @echo -n " MD5: " + @cat $(md5_tgz) + @echo -n " MD5: " + @cat $(md5_tbz2) + @echo -n " SHA1: " + @cat $(sha1_tgz) + @echo -n " SHA1: " + @cat $(sha1_tbz2) + @echo "" + @echo "GPG signature:" + @echo " $(RELEASE_CAIRO_URL)/$(gpg_file)" + @echo " (signed by`gpg --list-keys $(GPGKEY) | grep uid | cut -b4- | tr -s " "`)" + @echo "" + @echo "Git:" + @echo " git://git.freedesktop.org/git/pixman" + @echo " tag: $(PACKAGE)-$(VERSION)" + @echo "" + @echo "Log:" + @git log --no-merges "$(PACKAGE)-$(PREV)".."$(PACKAGE)-$(VERSION)" | git shortlog | awk '{ printf "\t"; print ; }' | cut -b1-80 + @echo "============================== CUT HERE ==============================" + @echo "" + +release-publish: release-upload release-tag release-publish-message + +.PHONY: release-upload release-publish release-publish-message release-tag diff --git a/src/pixman/Makefile.win32 b/src/pixman/Makefile.win32 new file mode 100644 index 0000000..c3ca3bc --- /dev/null +++ b/src/pixman/Makefile.win32 @@ -0,0 +1,25 @@ +default: all + +top_srcdir = . +include $(top_srcdir)/Makefile.win32.common + +all: pixman test + +pixman: + @$(MAKE) -C pixman -f Makefile.win32 + +test: + @$(MAKE) -C test -f Makefile.win32 + +clean_r: + @$(MAKE) -C pixman -f Makefile.win32 clean + @$(MAKE) -C test -f Makefile.win32 clean + +check: + @$(MAKE) -C test -f Makefile.win32 check + + +clean: clean_r + + +.PHONY: all pixman test clean check diff --git a/src/pixman/Makefile.win32.common b/src/pixman/Makefile.win32.common new file mode 100644 index 0000000..777f94c --- /dev/null +++ b/src/pixman/Makefile.win32.common @@ -0,0 +1,56 @@ +LIBRARY = pixman-1 + +CC = cl +LD = link +AR = lib +PERL = perl + +ifeq ($(top_builddir),) +top_builddir = $(top_srcdir) +endif + +CFG_VAR = $(CFG) +ifeq ($(CFG_VAR),) +CFG_VAR = release +endif + +ifeq ($(CFG_VAR),debug) +CFG_CFLAGS = -MDd -Od -Zi +CFG_LDFLAGS = -DEBUG +else +CFG_CFLAGS = -MD -O2 +CFG_LDFLAGS = +endif + +# Package definitions, to be used instead of those provided in config.h +PKG_CFLAGS = -DPACKAGE=$(LIBRARY) -DPACKAGE_VERSION="" -DPACKAGE_BUGREPORT="" + +BASE_CFLAGS = -nologo -I. -I$(top_srcdir) -I$(top_srcdir)/pixman + +PIXMAN_CFLAGS = $(BASE_CFLAGS) $(PKG_CFLAGS) $(CFG_CFLAGS) $(CFLAGS) +PIXMAN_LDFLAGS = -nologo $(CFG_LDFLAGS) $(LDFLAGS) +PIXMAN_ARFLAGS = -nologo $(LDFLAGS) + + +inform: +ifneq ($(CFG),release) +ifneq ($(CFG),debug) +ifneq ($(CFG),) + @echo "Invalid specified configuration option: "$(CFG)"." + @echo + @echo "Possible choices for configuration are 'release' and 'debug'" + @exit 1 +endif + @echo "Using default RELEASE configuration... (use CFG=release or CFG=debug)" +endif +endif + + +$(CFG_VAR)/%.obj: %.c $(libpixman_headers) + @mkdir -p $(CFG_VAR) + @$(CC) -c $(PIXMAN_CFLAGS) -Fo"$@" $< + +clean: inform + @$(RM) $(CFG_VAR)/*.{exe,ilk,lib,obj,pdb} $(BUILT_SOURCES) || exit 0 + +.PHONY: inform clean diff --git a/src/pixman/NEWS b/src/pixman/NEWS new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/src/pixman/NEWS diff --git a/src/pixman/README b/src/pixman/README new file mode 100644 index 0000000..6d8cfd8 --- /dev/null +++ b/src/pixman/README @@ -0,0 +1,116 @@ +Pixman is a library that provides low-level pixel manipulation +features such as image compositing and trapezoid rasterization. + +Questions, bug reports and patches should be directed to the pixman +mailing list: + + http://lists.freedesktop.org/mailman/listinfo/pixman + +You can also file bugs at + + https://bugs.freedesktop.org/enter_bug.cgi?product=pixman + +For real time discussions about pixman, feel free to join the IRC +channels #cairo and #xorg-devel on the FreeNode IRC network. + + +Contributing +------------ + +In order to contribute to pixman, you will need a working knowledge of +the git version control system. For a quick getting started guide, +there is the "Everyday Git With 20 Commands Or So guide" + + http://www.kernel.org/pub/software/scm/git/docs/everyday.html + +from the Git homepage. For more in depth git documentation, see the +resources on the Git community documentation page: + + http://git-scm.com/documentation + +Pixman uses the infrastructure from the freedesktop.org umbrella +project. For instructions about how to use the git service on +freedesktop.org, see: + + http://www.freedesktop.org/wiki/Infrastructure/git/Developers + +The Pixman master repository can be found at: + + git://anongit.freedesktop.org/git/pixman + +and browsed on the web here: + + http://cgit.freedesktop.org/pixman/ + + +Sending patches +--------------- + +The general workflow for sending patches is to first make sure that +git can send mail on your system. Then, + + - create a branch off of master in your local git repository + + - make your changes as one or more commits + + - use the + + git send-email + + command to send the patch series to pixman@lists.freedesktop.org. + +In order for your patches to be accepted, please consider the +following guidelines: + + - This link: + + http://www.kernel.org/pub/software/scm/git/docs/user-manual.html#patch-series + + describes how what a good patch series is, and to create one with + git. + + - At each point in the series, pixman should compile and the test + suite should pass. + + The exception here is if you are changing the test suite to + demonstrate a bug. In this case, make one commit that makes the + test suite fail due to the bug, and then another commit that fixes + the bug. + + You can run the test suite with + + make check + + It will take around two minutes to run on a modern PC. + + - Follow the coding style described in the CODING_STYLE file + + - For bug fixes, include an update to the test suite to make sure + the bug doesn't reappear. + + - For new features, add tests of the feature to the test + suite. Also, add a program demonstrating the new feature to the + demos/ directory. + + - Write descriptive commit messages. Useful information to include: + - Benchmark results, before and after + - Description of the bug that was fixed + - Detailed rationale for any new API + - Alternative approaches that were rejected (and why they + don't work) + - If review comments were incorporated, a brief version + history describing what those changes were. + + - For big patch series, send an introductory email with an overall + description of the patch series, including benchmarks and + motivation. Each commit message should still be descriptive and + include enough information to understand why this particular commit + was necessary. + +Pixman has high standards for code quality and so almost everybody +should expect to have the first versions of their patches rejected. + +If you think that the reviewers are wrong about something, or that the +guidelines above are wrong, feel free to discuss the issue on the +list. The purpose of the guidelines and code review is to ensure high +code quality; it is not an exercise in compliance. diff --git a/src/pixman/RELEASING b/src/pixman/RELEASING new file mode 100644 index 0000000..657857d --- /dev/null +++ b/src/pixman/RELEASING @@ -0,0 +1,59 @@ +Here are the steps to follow to create a new pixman release: + +1) Ensure that there are no uncommitted changes or unpushed commits, + and that you are up to date with the latest commits in the central + repository. Here are a couple of useful commands: + + git diff (no output) + + git status (should report "nothing to commit") + + git log master...origin (no output; note: *3* dots) + +2) Increment pixman_(major|minor|micro) in configure.ac according to + the directions in that file. + +3) Make sure that new version works, including + + - make distcheck passes + + - the X server still works with the new pixman version + installed + + - the cairo test suite hasn't gained any new failures compared + to last pixman version. + +4) Use "git commit" to record the changes made in step 2 and 3. + +5) Generate and publish the tar files by running + + make PREV=<last version> GPGKEY=<your gpg key id> release-publish + + If your freedesktop user name is different from your local one, + then also set the variable USER to your freedesktop user name. + +6) Run + + make release-publish-message + + to generate a draft release announcement. Edit it as appropriate and + send it to + + cairo-announce@cairographics.org + + pixman@lists.freedesktop.org + + xorg-announce@lists.freedesktop.org + +7) Increment pixman_micro to the next larger (odd) number in + configure.ac. Commit this change, and push all commits created + during this process using + + git push + git push --tags + + You must use "--tags" here; otherwise the new tag will not + be pushed out. + +8) Change the topic of the #cairo IRC channel on freenode to advertise + the new version. diff --git a/src/pixman/autogen.sh b/src/pixman/autogen.sh new file mode 100755 index 0000000..fc34bd5 --- /dev/null +++ b/src/pixman/autogen.sh @@ -0,0 +1,14 @@ +#! /bin/sh + +srcdir=`dirname $0` +test -z "$srcdir" && srcdir=. + +ORIGDIR=`pwd` +cd $srcdir + +autoreconf -v --install || exit 1 +cd $ORIGDIR || exit $? + +if test -z "$NOCONFIGURE"; then + $srcdir/configure "$@" +fi diff --git a/src/pixman/configure.ac b/src/pixman/configure.ac new file mode 100644 index 0000000..245827b --- /dev/null +++ b/src/pixman/configure.ac @@ -0,0 +1,1131 @@ +dnl Copyright 2005 Red Hat, Inc. +dnl +dnl Permission to use, copy, modify, distribute, and sell this software and its +dnl documentation for any purpose is hereby granted without fee, provided that +dnl the above copyright notice appear in all copies and that both that +dnl copyright notice and this permission notice appear in supporting +dnl documentation, and that the name of Red Hat not be used in +dnl advertising or publicity pertaining to distribution of the software without +dnl specific, written prior permission. Red Hat makes no +dnl representations about the suitability of this software for any purpose. It +dnl is provided "as is" without express or implied warranty. +dnl +dnl RED HAT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, +dnl INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO +dnl EVENT SHALL RED HAT BE LIABLE FOR ANY SPECIAL, INDIRECT OR +dnl CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, +dnl DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +dnl TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +dnl PERFORMANCE OF THIS SOFTWARE. +dnl +dnl Process this file with autoconf to create configure. + +AC_PREREQ([2.57]) + +# Pixman versioning scheme +# +# - The version in git has an odd MICRO version number +# +# - Released versions, both development and stable, have an +# even MICRO version number +# +# - Released development versions have an odd MINOR number +# +# - Released stable versions have an even MINOR number +# +# - Versions that break ABI must have a new MAJOR number +# +# - If you break the ABI, then at least this must be done: +# +# - increment MAJOR +# +# - In the first development release where you break ABI, find +# all instances of "pixman-n" and change them to pixman-(n+1) +# +# This needs to be done at least in +# configure.ac +# all Makefile.am's +# pixman-n.pc.in +# +# This ensures that binary incompatible versions can be installed +# in parallel. See http://www106.pair.com/rhp/parallel.html for +# more information +# + +m4_define([pixman_major], 0) +m4_define([pixman_minor], 32) +m4_define([pixman_micro], 6) + +m4_define([pixman_version],[pixman_major.pixman_minor.pixman_micro]) + +AC_INIT(pixman, pixman_version, [pixman@lists.freedesktop.org], pixman) +AM_INIT_AUTOMAKE([foreign dist-bzip2]) + +# Suppress verbose compile lines +m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) + +AC_CONFIG_HEADERS(config.h) + +AC_CANONICAL_HOST + +test_CFLAGS=${CFLAGS+set} # We may override autoconf default CFLAGS. + +AC_PROG_CC +AM_PROG_AS +AC_PROG_LIBTOOL +AC_CHECK_FUNCS([getisax]) +AC_C_BIGENDIAN +AC_C_INLINE + +dnl PIXMAN_LINK_WITH_ENV(env-setup, program, true-action, false-action) +dnl +dnl Compiles and links the given program in the environment setup by env-setup +dnl and executes true-action on success and false-action on failure. +AC_DEFUN([PIXMAN_LINK_WITH_ENV],[dnl + save_CFLAGS="$CFLAGS" + save_LDFLAGS="$LDFLAGS" + save_LIBS="$LIBS" + CFLAGS="" + LDFLAGS="" + LIBS="" + $1 + CFLAGS="$save_CFLAGS $CFLAGS" + LDFLAGS="$save_LDFLAGS $LDFLAGS" + LIBS="$save_LIBS $LIBS" + AC_LINK_IFELSE( + [AC_LANG_SOURCE([$2])], + [pixman_cc_stderr=`test -f conftest.err && cat conftest.err` + pixman_cc_flag=yes], + [pixman_cc_stderr=`test -f conftest.err && cat conftest.err` + pixman_cc_flag=no]) + + if test "x$pixman_cc_stderr" != "x"; then + pixman_cc_flag=no + fi + + if test "x$pixman_cc_flag" = "xyes"; then + ifelse([$3], , :, [$3]) + else + ifelse([$4], , :, [$4]) + fi + CFLAGS="$save_CFLAGS" + LDFLAGS="$save_LDFLAGS" + LIBS="$save_LIBS" +]) + +dnl Find a -Werror for catching warnings. +WERROR= +for w in -Werror -errwarn; do + if test "z$WERROR" = "z"; then + AC_MSG_CHECKING([whether the compiler supports $w]) + PIXMAN_LINK_WITH_ENV( + [CFLAGS=$w], + [int main(int c, char **v) { (void)c; (void)v; return 0; }], + [WERROR=$w; yesno=yes], [yesno=no]) + AC_MSG_RESULT($yesno) + fi +done + +dnl PIXMAN_CHECK_CFLAG(flag, [program]) +dnl Adds flag to CFLAGS if the given program links without warnings or errors. +AC_DEFUN([PIXMAN_CHECK_CFLAG], [dnl + AC_MSG_CHECKING([whether the compiler supports $1]) + PIXMAN_LINK_WITH_ENV( + [CFLAGS="$WERROR $1"], + [$2 + int main(int c, char **v) { (void)c; (void)v; return 0; } + ], + [_yesno=yes], + [_yesno=no]) + if test "x$_yesno" = xyes; then + CFLAGS="$CFLAGS $1" + fi + AC_MSG_RESULT($_yesno) +]) + +AC_CHECK_SIZEOF(long) + +# Checks for Sun Studio compilers +AC_CHECK_DECL([__SUNPRO_C], [SUNCC="yes"], [SUNCC="no"]) +AC_CHECK_DECL([__amd64], [AMD64_ABI="yes"], [AMD64_ABI="no"]) + +# Default CFLAGS to -O -g rather than just the -g from AC_PROG_CC +# if we're using Sun Studio and neither the user nor a config.site +# has set CFLAGS. +if test $SUNCC = yes && \ + test "x$test_CFLAGS" = "x" && \ + test "$CFLAGS" = "-g" +then + CFLAGS="-O -g" +fi + +CFLAGS="$CFLAGS -Wno-unused-local-typedefs -Wno-unused-const-variable" +CFLAGS="$CFLAGS -Wno-attributes -Wno-deprecated -Wno-unused-but-set-variable" + +# +# We ignore pixman_major in the version here because the major version should +# always be encoded in the actual library name. Ie., the soname is: +# +# pixman-$(pixman_major).0.minor.micro +# +m4_define([lt_current], [pixman_minor]) +m4_define([lt_revision], [pixman_micro]) +m4_define([lt_age], [pixman_minor]) + +LT_VERSION_INFO="lt_current:lt_revision:lt_age" + +PIXMAN_VERSION_MAJOR=pixman_major() +AC_SUBST(PIXMAN_VERSION_MAJOR) +PIXMAN_VERSION_MINOR=pixman_minor() +AC_SUBST(PIXMAN_VERSION_MINOR) +PIXMAN_VERSION_MICRO=pixman_micro() +AC_SUBST(PIXMAN_VERSION_MICRO) + +AC_SUBST(LT_VERSION_INFO) + +# Check for dependencies + +PIXMAN_CHECK_CFLAG([-Wall]) +PIXMAN_CHECK_CFLAG([-Wdeclaration-after-statement]) +PIXMAN_CHECK_CFLAG([-fno-strict-aliasing]) + +dnl ========================================================================= +dnl OpenMP for the test suite? +dnl + +# Check for OpenMP support only when autoconf support that (require autoconf >=2.62) +OPENMP_CFLAGS= +m4_ifdef([AC_OPENMP], [AC_OPENMP]) + +if test "x$enable_openmp" = "xyes" && test "x$ac_cv_prog_c_openmp" = "xunsupported" ; then + AC_MSG_WARN([OpenMP support requested but found unsupported]) +fi + +dnl May not fail to link without -Wall -Werror added +dnl So try to link only when openmp is supported +dnl ac_cv_prog_c_openmp is not defined when --disable-openmp is used +if test "x$ac_cv_prog_c_openmp" != "xunsupported" && test "x$ac_cv_prog_c_openmp" != "x"; then + m4_define([openmp_test_program],[dnl + #include <stdio.h> + + extern unsigned int lcg_seed; + #pragma omp threadprivate(lcg_seed) + unsigned int lcg_seed; + + unsigned function(unsigned a, unsigned b) + { + lcg_seed ^= b; + return ((a + b) ^ a ) + lcg_seed; + } + + int main(int argc, char **argv) + { + int i; + int n1 = 0, n2 = argc; + unsigned checksum = 0; + int verbose = argv != NULL; + unsigned (*test_function)(unsigned, unsigned); + test_function = function; + #pragma omp parallel for reduction(+:checksum) default(none) \ + shared(n1, n2, test_function, verbose) + for (i = n1; i < n2; i++) + { + unsigned crc = test_function (i, 0); + if (verbose) + printf ("%d: %08X\n", i, crc); + checksum += crc; + } + printf("%u\n", checksum); + return 0; + } + ]) + + PIXMAN_LINK_WITH_ENV( + [CFLAGS="$OPENMP_CFLAGS" LDFLAGS="$OPENMP_CFLAGS"], + [openmp_test_program], + [have_openmp=yes], + [have_openmp=no]) + if test "x$have_openmp" = "xyes" ; then + AC_DEFINE(USE_OPENMP, 1, [use OpenMP in the test suite]) + fi +fi +AC_SUBST(OPENMP_CFLAGS) + +dnl ========================================================================= +dnl -fvisibility stuff + +PIXMAN_CHECK_CFLAG([-fvisibility=hidden], [dnl +#if defined(__GNUC__) && (__GNUC__ >= 4) +#ifdef _WIN32 +#error Have -fvisibility but it is ignored and generates a warning +#endif +#else +#error Need GCC 4.0 for visibility +#endif +]) + +PIXMAN_CHECK_CFLAG([-xldscope=hidden], [dnl +#if defined(__SUNPRO_C) && (__SUNPRO_C >= 0x550) +#else +#error Need Sun Studio 8 for visibility +#endif +]) + +dnl =========================================================================== +dnl Check for Loongson Multimedia Instructions + +if test "x$LS_CFLAGS" = "x" ; then + LS_CFLAGS="-march=loongson2f" +fi + +have_loongson_mmi=no +AC_MSG_CHECKING(whether to use Loongson MMI assembler) + +xserver_save_CFLAGS=$CFLAGS +CFLAGS=" $LS_CFLAGS $CFLAGS -I$srcdir" +AC_LINK_IFELSE([AC_LANG_SOURCE([[ +#ifndef __mips_loongson_vector_rev +#error "Loongson Multimedia Instructions are only available on Loongson" +#endif +#if defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 4)) +#error "Need GCC >= 4.4 for Loongson MMI compilation" +#endif +#include "pixman/loongson-mmintrin.h" +int main () { + union { + __m64 v; + char c[8]; + } a = { .c = {1, 2, 3, 4, 5, 6, 7, 8} }; + int b = 4; + __m64 c = _mm_srli_pi16 (a.v, b); + return 0; +}]])], have_loongson_mmi=yes) +CFLAGS=$xserver_save_CFLAGS + +AC_ARG_ENABLE(loongson-mmi, + [AC_HELP_STRING([--disable-loongson-mmi], + [disable Loongson MMI fast paths])], + [enable_loongson_mmi=$enableval], [enable_loongson_mmi=auto]) + +if test $enable_loongson_mmi = no ; then + have_loongson_mmi=disabled +fi + +if test $have_loongson_mmi = yes ; then + AC_DEFINE(USE_LOONGSON_MMI, 1, [use Loongson Multimedia Instructions]) +else + LS_CFLAGS= +fi + +AC_MSG_RESULT($have_loongson_mmi) +if test $enable_loongson_mmi = yes && test $have_loongson_mmi = no ; then + AC_MSG_ERROR([Loongson MMI not detected]) +fi + +AM_CONDITIONAL(USE_LOONGSON_MMI, test $have_loongson_mmi = yes) + +dnl =========================================================================== +dnl Check for MMX + +if test "x$MMX_CFLAGS" = "x" ; then + if test "x$SUNCC" = "xyes"; then + # Sun Studio doesn't have an -xarch=mmx flag, so we have to use sse + # but if we're building 64-bit, mmx & sse support is on by default and + # -xarch=sse throws an error instead + if test "$AMD64_ABI" = "no" ; then + MMX_CFLAGS="-xarch=sse" + fi + else + MMX_CFLAGS="-mmmx -Winline" + fi +fi + +have_mmx_intrinsics=no +AC_MSG_CHECKING(whether to use MMX intrinsics) +xserver_save_CFLAGS=$CFLAGS +CFLAGS="$MMX_CFLAGS $CFLAGS" +AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ +#if defined(__GNUC__) && (__GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 4)) +#error "Need GCC >= 3.4 for MMX intrinsics" +#endif +#include <mmintrin.h> +int main () { + __m64 v = _mm_cvtsi32_si64 (1); + __m64 w; + + /* Some versions of clang will choke on K */ + asm ("pshufw %2, %1, %0\n\t" + : "=y" (w) + : "y" (v), "K" (5) + ); + + /* Some versions of clang will choke on this */ + asm ("pmulhuw %1, %0\n\t" + : "+y" (w) + : "y" (v) + ); + + return _mm_cvtsi64_si32 (v); +}]])], have_mmx_intrinsics=yes) +CFLAGS=$xserver_save_CFLAGS + +AC_ARG_ENABLE(mmx, + [AC_HELP_STRING([--disable-mmx], + [disable x86 MMX fast paths])], + [enable_mmx=$enableval], [enable_mmx=auto]) + +if test $enable_mmx = no ; then + have_mmx_intrinsics=disabled +fi + +if test $have_mmx_intrinsics = yes ; then + AC_DEFINE(USE_X86_MMX, 1, [use x86 MMX compiler intrinsics]) +else + MMX_CFLAGS= +fi + +AC_MSG_RESULT($have_mmx_intrinsics) +if test $enable_mmx = yes && test $have_mmx_intrinsics = no ; then + AC_MSG_ERROR([x86 MMX intrinsics not detected]) +fi + +AM_CONDITIONAL(USE_X86_MMX, test $have_mmx_intrinsics = yes) + +dnl =========================================================================== +dnl Check for SSE2 + +if test "x$SSE2_CFLAGS" = "x" ; then + if test "x$SUNCC" = "xyes"; then + # SSE2 is enabled by default in the Sun Studio 64-bit environment + if test "$AMD64_ABI" = "no" ; then + SSE2_CFLAGS="-xarch=sse2" + fi + else + SSE2_CFLAGS="-msse2 -Winline" + fi +fi + +have_sse2_intrinsics=no +AC_MSG_CHECKING(whether to use SSE2 intrinsics) +xserver_save_CFLAGS=$CFLAGS +CFLAGS="$SSE2_CFLAGS $CFLAGS" + +AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ +#if defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 2)) +# if !defined(__amd64__) && !defined(__x86_64__) +# error "Need GCC >= 4.2 for SSE2 intrinsics on x86" +# endif +#endif +#include <mmintrin.h> +#include <xmmintrin.h> +#include <emmintrin.h> +int main () { + __m128i a = _mm_set1_epi32 (0), b = _mm_set1_epi32 (0), c; + c = _mm_xor_si128 (a, b); + return 0; +}]])], have_sse2_intrinsics=yes) +CFLAGS=$xserver_save_CFLAGS + +AC_ARG_ENABLE(sse2, + [AC_HELP_STRING([--disable-sse2], + [disable SSE2 fast paths])], + [enable_sse2=$enableval], [enable_sse2=auto]) + +if test $enable_sse2 = no ; then + have_sse2_intrinsics=disabled +fi + +if test $have_sse2_intrinsics = yes ; then + AC_DEFINE(USE_SSE2, 1, [use SSE2 compiler intrinsics]) +fi + +AC_MSG_RESULT($have_sse2_intrinsics) +if test $enable_sse2 = yes && test $have_sse2_intrinsics = no ; then + AC_MSG_ERROR([SSE2 intrinsics not detected]) +fi + +AM_CONDITIONAL(USE_SSE2, test $have_sse2_intrinsics = yes) + +dnl =========================================================================== +dnl Check for SSSE3 + +if test "x$SSSE3_CFLAGS" = "x" ; then + SSSE3_CFLAGS="-mssse3 -Winline" +fi + +have_ssse3_intrinsics=no +AC_MSG_CHECKING(whether to use SSSE3 intrinsics) +xserver_save_CFLAGS=$CFLAGS +CFLAGS="$SSSE3_CFLAGS $CFLAGS" + +AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ +#include <mmintrin.h> +#include <xmmintrin.h> +#include <emmintrin.h> +#include <tmmintrin.h> +int main () { + __m128i a = _mm_set1_epi32 (0), b = _mm_set1_epi32 (0), c; + c = _mm_maddubs_epi16 (a, b); + return 0; +}]])], have_ssse3_intrinsics=yes) +CFLAGS=$xserver_save_CFLAGS + +AC_ARG_ENABLE(ssse3, + [AC_HELP_STRING([--disable-ssse3], + [disable SSSE3 fast paths])], + [enable_ssse3=$enableval], [enable_ssse3=auto]) + +if test $enable_ssse3 = no ; then + have_ssse3_intrinsics=disabled +fi + +if test $have_ssse3_intrinsics = yes ; then + AC_DEFINE(USE_SSSE3, 1, [use SSSE3 compiler intrinsics]) +fi + +AC_MSG_RESULT($have_ssse3_intrinsics) +if test $enable_ssse3 = yes && test $have_ssse3_intrinsics = no ; then + AC_MSG_ERROR([SSSE3 intrinsics not detected]) +fi + +AM_CONDITIONAL(USE_SSSE3, test $have_ssse3_intrinsics = yes) + +dnl =========================================================================== +dnl Other special flags needed when building code using MMX or SSE instructions +case $host_os in + solaris*) + # When building 32-bit binaries, apply a mapfile to ensure that the + # binaries aren't flagged as only able to run on MMX+SSE capable CPUs + # since they check at runtime before using those instructions. + # Not all linkers grok the mapfile format so we check for that first. + if test "$AMD64_ABI" = "no" ; then + use_hwcap_mapfile=no + AC_MSG_CHECKING(whether to use a hardware capability map file) + hwcap_save_LDFLAGS="$LDFLAGS" + HWCAP_LDFLAGS='-Wl,-M,$(srcdir)/solaris-hwcap.mapfile' + LDFLAGS="$LDFLAGS -Wl,-M,pixman/solaris-hwcap.mapfile" + AC_LINK_IFELSE([AC_LANG_SOURCE([[int main() { return 0; }]])], + use_hwcap_mapfile=yes, + HWCAP_LDFLAGS="") + LDFLAGS="$hwcap_save_LDFLAGS" + AC_MSG_RESULT($use_hwcap_mapfile) + fi + if test "x$MMX_LDFLAGS" = "x" ; then + MMX_LDFLAGS="$HWCAP_LDFLAGS" + fi + if test "x$SSE2_LDFLAGS" = "x" ; then + SSE2_LDFLAGS="$HWCAP_LDFLAGS" + fi + ;; +esac + +AC_SUBST(LS_CFLAGS) +AC_SUBST(IWMMXT_CFLAGS) +AC_SUBST(MMX_CFLAGS) +AC_SUBST(MMX_LDFLAGS) +AC_SUBST(SSE2_CFLAGS) +AC_SUBST(SSE2_LDFLAGS) +AC_SUBST(SSSE3_CFLAGS) + +dnl =========================================================================== +dnl Check for VMX/Altivec +if test -n "`$CC -v 2>&1 | grep version | grep Apple`"; then + VMX_CFLAGS="-faltivec" +else + VMX_CFLAGS="-maltivec -mabi=altivec" +fi + +have_vmx_intrinsics=no +AC_MSG_CHECKING(whether to use VMX/Altivec intrinsics) +xserver_save_CFLAGS=$CFLAGS +CFLAGS="$VMX_CFLAGS $CFLAGS" +AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ +#if defined(__GNUC__) && (__GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 4)) +#error "Need GCC >= 3.4 for sane altivec support" +#endif +#include <altivec.h> +int main () { + vector unsigned int v = vec_splat_u32 (1); + v = vec_sub (v, v); + return 0; +}]])], have_vmx_intrinsics=yes) +CFLAGS=$xserver_save_CFLAGS + +AC_ARG_ENABLE(vmx, + [AC_HELP_STRING([--disable-vmx], + [disable VMX fast paths])], + [enable_vmx=$enableval], [enable_vmx=auto]) + +if test $enable_vmx = no ; then + have_vmx_intrinsics=disabled +fi + +if test $have_vmx_intrinsics = yes ; then + AC_DEFINE(USE_VMX, 1, [use VMX compiler intrinsics]) +else + VMX_CFLAGS= +fi + +AC_MSG_RESULT($have_vmx_intrinsics) +if test $enable_vmx = yes && test $have_vmx_intrinsics = no ; then + AC_MSG_ERROR([VMX intrinsics not detected]) +fi + +AC_SUBST(VMX_CFLAGS) + +AM_CONDITIONAL(USE_VMX, test $have_vmx_intrinsics = yes) + +dnl ========================================================================== +dnl Check if assembler is gas compatible and supports ARM SIMD instructions +have_arm_simd=no +AC_MSG_CHECKING(whether to use ARM SIMD assembler) +xserver_save_CFLAGS=$CFLAGS +CFLAGS="-x assembler-with-cpp $CFLAGS" +AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ +.text +.arch armv6 +.object_arch armv4 +.arm +.altmacro +#ifndef __ARM_EABI__ +#error EABI is required (to be sure that calling conventions are compatible) +#endif +pld [r0] +uqadd8 r0, r0, r0]])], have_arm_simd=yes) +CFLAGS=$xserver_save_CFLAGS + +AC_ARG_ENABLE(arm-simd, + [AC_HELP_STRING([--disable-arm-simd], + [disable ARM SIMD fast paths])], + [enable_arm_simd=$enableval], [enable_arm_simd=auto]) + +if test $enable_arm_simd = no ; then + have_arm_simd=disabled +fi + +if test $have_arm_simd = yes ; then + AC_DEFINE(USE_ARM_SIMD, 1, [use ARM SIMD assembly optimizations]) +fi + +AM_CONDITIONAL(USE_ARM_SIMD, test $have_arm_simd = yes) + +AC_MSG_RESULT($have_arm_simd) +if test $enable_arm_simd = yes && test $have_arm_simd = no ; then + AC_MSG_ERROR([ARM SIMD intrinsics not detected]) +fi + +dnl ========================================================================== +dnl Check if assembler is gas compatible and supports NEON instructions +have_arm_neon=no +AC_MSG_CHECKING(whether to use ARM NEON assembler) +xserver_save_CFLAGS=$CFLAGS +CFLAGS="-x assembler-with-cpp $CFLAGS" +AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ +.text +.fpu neon +.arch armv7a +.object_arch armv4 +.eabi_attribute 10, 0 +.arm +.altmacro +#ifndef __ARM_EABI__ +#error EABI is required (to be sure that calling conventions are compatible) +#endif +pld [r0] +vmovn.u16 d0, q0]])], have_arm_neon=yes) +CFLAGS=$xserver_save_CFLAGS + +AC_ARG_ENABLE(arm-neon, + [AC_HELP_STRING([--disable-arm-neon], + [disable ARM NEON fast paths])], + [enable_arm_neon=$enableval], [enable_arm_neon=auto]) + +if test $enable_arm_neon = no ; then + have_arm_neon=disabled +fi + +if test $have_arm_neon = yes ; then + AC_DEFINE(USE_ARM_NEON, 1, [use ARM NEON assembly optimizations]) +fi + +AM_CONDITIONAL(USE_ARM_NEON, test $have_arm_neon = yes) + +AC_MSG_RESULT($have_arm_neon) +if test $enable_arm_neon = yes && test $have_arm_neon = no ; then + AC_MSG_ERROR([ARM NEON intrinsics not detected]) +fi + +dnl =========================================================================== +dnl Check for IWMMXT + +AC_ARG_ENABLE(arm-iwmmxt, + [AC_HELP_STRING([--disable-arm-iwmmxt], + [disable ARM IWMMXT fast paths])], + [enable_iwmmxt=$enableval], [enable_iwmmxt=auto]) + +AC_ARG_ENABLE(arm-iwmmxt2, + [AC_HELP_STRING([--disable-arm-iwmmxt2], + [build ARM IWMMXT fast paths with -march=iwmmxt instead of -march=iwmmxt2])], + [enable_iwmmxt2=$enableval], [enable_iwmmxt2=auto]) + +if test "x$IWMMXT_CFLAGS" = "x" ; then + IWMMXT_CFLAGS="-flax-vector-conversions -Winline -march=iwmmxt" + if test $enable_iwmmxt2 != no ; then + IWMMXT_CFLAGS="${IWMMXT_CFLAGS}2" + fi +fi + +have_iwmmxt_intrinsics=no +AC_MSG_CHECKING(whether to use ARM IWMMXT intrinsics) +xserver_save_CFLAGS=$CFLAGS +CFLAGS="$CFLAGS $IWMMXT_CFLAGS" +AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ +#ifndef __arm__ +#error "IWMMXT is only available on ARM" +#endif +#ifndef __IWMMXT__ +#error "IWMMXT not enabled (with -march=iwmmxt)" +#endif +#if defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 8)) +#error "Need GCC >= 4.8 for IWMMXT intrinsics" +#endif +#include <mmintrin.h> +int main () { + union { + __m64 v; + char c[8]; + } a = { .c = {1, 2, 3, 4, 5, 6, 7, 8} }; + int b = 4; + __m64 c = _mm_srli_si64 (a.v, b); +}]])], have_iwmmxt_intrinsics=yes) +CFLAGS=$xserver_save_CFLAGS + +if test $enable_iwmmxt = no ; then + have_iwmmxt_intrinsics=disabled +fi + +if test $have_iwmmxt_intrinsics = yes ; then + AC_DEFINE(USE_ARM_IWMMXT, 1, [use ARM IWMMXT compiler intrinsics]) +else + IWMMXT_CFLAGS= +fi + +AC_MSG_RESULT($have_iwmmxt_intrinsics) +if test $enable_iwmmxt = yes && test $have_iwmmxt_intrinsics = no ; then + AC_MSG_ERROR([IWMMXT intrinsics not detected]) +fi + +AM_CONDITIONAL(USE_ARM_IWMMXT, test $have_iwmmxt_intrinsics = yes) + +dnl ========================================================================== +dnl Check if assembler is gas compatible and supports MIPS DSPr2 instructions + +have_mips_dspr2=no +AC_MSG_CHECKING(whether to use MIPS DSPr2 assembler) +xserver_save_CFLAGS=$CFLAGS +CFLAGS="-mdspr2 $CFLAGS" + +AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ +#if !(defined(__mips__) && __mips_isa_rev >= 2) +#error MIPS DSPr2 is currently only available on MIPS32r2 platforms. +#endif +int +main () +{ + int c = 0, a = 0, b = 0; + __asm__ __volatile__ ( + "precr.qb.ph %[c], %[a], %[b] \n\t" + : [c] "=r" (c) + : [a] "r" (a), [b] "r" (b) + ); + return c; +}]])], have_mips_dspr2=yes) +CFLAGS=$xserver_save_CFLAGS + +AC_ARG_ENABLE(mips-dspr2, + [AC_HELP_STRING([--disable-mips-dspr2], + [disable MIPS DSPr2 fast paths])], + [enable_mips_dspr2=$enableval], [enable_mips_dspr2=auto]) + +if test $enable_mips_dspr2 = no ; then + have_mips_dspr2=disabled +fi + +if test $have_mips_dspr2 = yes ; then + AC_DEFINE(USE_MIPS_DSPR2, 1, [use MIPS DSPr2 assembly optimizations]) +fi + +AM_CONDITIONAL(USE_MIPS_DSPR2, test $have_mips_dspr2 = yes) + +AC_MSG_RESULT($have_mips_dspr2) +if test $enable_mips_dspr2 = yes && test $have_mips_dspr2 = no ; then + AC_MSG_ERROR([MIPS DSPr2 instructions not detected]) +fi + +dnl ========================================================================================= +dnl Check for GNU-style inline assembly support + +have_gcc_inline_asm=no +AC_MSG_CHECKING(whether to use GNU-style inline assembler) +AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ +int main () { + /* Most modern architectures have a NOP instruction, so this is a fairly generic test. */ + asm volatile ( "\tnop\n" : : : "cc", "memory" ); + return 0; +}]])], have_gcc_inline_asm=yes) + +AC_ARG_ENABLE(gcc-inline-asm, + [AC_HELP_STRING([--disable-gcc-inline-asm], + [disable GNU-style inline assembler])], + [enable_gcc_inline_asm=$enableval], [enable_gcc_inline_asm=auto]) + +if test $enable_gcc_inline_asm = no ; then + have_gcc_inline_asm=disabled +fi + +if test $have_gcc_inline_asm = yes ; then + AC_DEFINE(USE_GCC_INLINE_ASM, 1, [use GNU-style inline assembler]) +fi + +AC_MSG_RESULT($have_gcc_inline_asm) +if test $enable_gcc_inline_asm = yes && test $have_gcc_inline_asm = no ; then + AC_MSG_ERROR([GNU-style inline assembler not detected]) +fi + +AM_CONDITIONAL(USE_GCC_INLINE_ASM, test $have_gcc_inline_asm = yes) + +dnl ============================================== +dnl Static test programs + +AC_ARG_ENABLE(static-testprogs, + [AC_HELP_STRING([--enable-static-testprogs], + [build test programs as static binaries [default=no]])], + [enable_static_testprogs=$enableval], [enable_static_testprogs=no]) + +TESTPROGS_EXTRA_LDFLAGS= +if test "x$enable_static_testprogs" = "xyes" ; then + TESTPROGS_EXTRA_LDFLAGS="-all-static" +fi +AC_SUBST(TESTPROGS_EXTRA_LDFLAGS) + +dnl ============================================== +dnl Timers + +AC_ARG_ENABLE(timers, + [AC_HELP_STRING([--enable-timers], + [enable TIMER_BEGIN and TIMER_END macros [default=no]])], + [enable_timers=$enableval], [enable_timers=no]) + +if test $enable_timers = yes ; then + AC_DEFINE(PIXMAN_TIMERS, 1, [enable TIMER_BEGIN/TIMER_END macros]) +fi +AC_SUBST(PIXMAN_TIMERS) + +dnl =================================== +dnl GTK+ + +AC_ARG_ENABLE(gtk, + [AC_HELP_STRING([--enable-gtk], + [enable tests using GTK+ [default=auto]])], + [enable_gtk=$enableval], [enable_gtk=auto]) + +PKG_PROG_PKG_CONFIG + +if test $enable_gtk = yes ; then + AC_CHECK_LIB([pixman-1], [pixman_version_string]) + PKG_CHECK_MODULES(GTK, [gtk+-2.0 >= 2.16 pixman-1]) +fi + +if test $enable_gtk = auto ; then + AC_CHECK_LIB([pixman-1], [pixman_version_string], [enable_gtk=auto], [enable_gtk=no]) +fi + +if test $enable_gtk = auto ; then + PKG_CHECK_MODULES(GTK, [gtk+-2.0 >= 2.16 pixman-1], [enable_gtk=yes], [enable_gtk=no]) +fi + +AM_CONDITIONAL(HAVE_GTK, [test "x$enable_gtk" = xyes]) + +AC_SUBST(GTK_CFLAGS) +AC_SUBST(GTK_LIBS) + +dnl ===================================== +dnl posix_memalign, sigaction, alarm, gettimeofday + +AC_CHECK_FUNC(posix_memalign, have_posix_memalign=yes, have_posix_memalign=no) +if test x$have_posix_memalign = xyes; then + AC_DEFINE(HAVE_POSIX_MEMALIGN, 1, [Whether we have posix_memalign()]) +fi + +AC_CHECK_FUNC(sigaction, have_sigaction=yes, have_sigaction=no) +if test x$have_sigaction = xyes; then + AC_DEFINE(HAVE_SIGACTION, 1, [Whether we have sigaction()]) +fi + +AC_CHECK_FUNC(alarm, have_alarm=yes, have_alarm=no) +if test x$have_alarm = xyes; then + AC_DEFINE(HAVE_ALARM, 1, [Whether we have alarm()]) +fi + +AC_CHECK_HEADER([sys/mman.h], + [AC_DEFINE(HAVE_SYS_MMAN_H, [1], [Define to 1 if we have <sys/mman.h>])]) + +AC_CHECK_FUNC(mmap, have_mmap=yes, have_mmap=no) +if test x$have_mmap = xyes; then + AC_DEFINE(HAVE_MMAP, 1, [Whether we have mmap()]) +fi + +AC_CHECK_FUNC(mprotect, have_mprotect=yes, have_mprotect=no) +if test x$have_mprotect = xyes; then + AC_DEFINE(HAVE_MPROTECT, 1, [Whether we have mprotect()]) +fi + +AC_CHECK_FUNC(getpagesize, have_getpagesize=yes, have_getpagesize=no) +if test x$have_getpagesize = xyes; then + AC_DEFINE(HAVE_GETPAGESIZE, 1, [Whether we have getpagesize()]) +fi + +AC_CHECK_HEADER([fenv.h], + [AC_DEFINE(HAVE_FENV_H, [1], [Define to 1 if we have <fenv.h>])]) + +AC_CHECK_LIB(m, feenableexcept, have_feenableexcept=yes, have_feenableexcept=no) +if test x$have_feenableexcept = xyes; then + AC_DEFINE(HAVE_FEENABLEEXCEPT, 1, [Whether we have feenableexcept()]) +fi + +AC_CHECK_FUNC(gettimeofday, have_gettimeofday=yes, have_gettimeofday=no) +AC_CHECK_HEADER(sys/time.h, have_sys_time_h=yes, have_sys_time_h=no) +if test x$have_gettimeofday = xyes && test x$have_sys_time_h = xyes; then + AC_DEFINE(HAVE_GETTIMEOFDAY, 1, [Whether we have gettimeofday()]) +fi + +dnl ===================================== +dnl Check for missing sqrtf() as, e.g., for Solaris 9 + +AC_SEARCH_LIBS([sqrtf], [m], [], + [AC_DEFINE([sqrtf], [sqrt], + [Define to sqrt if you do not have the `sqrtf' function.])]) + +dnl ===================================== +dnl Thread local storage + +AC_MSG_CHECKING(for thread local storage (TLS) support) +AC_CACHE_VAL(ac_cv_tls, [ + ac_cv_tls=none + keywords="__thread __declspec(thread)" + for kw in $keywords ; do + AC_TRY_COMPILE([ +#if defined(__MINGW32__) && !(__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)) +#error This MinGW version has broken __thread support +#endif +#ifdef __OpenBSD__ +#error OpenBSD has broken __thread support +#endif + +int $kw test;], [], [ac_cv_tls=$kw; break]) + done +]) +AC_MSG_RESULT($ac_cv_tls) + +if test "$ac_cv_tls" != "none"; then + AC_DEFINE_UNQUOTED([TLS], $ac_cv_tls, [The compiler supported TLS storage class]) +fi + +dnl +dnl posix tls +dnl + +m4_define([pthread_test_program],AC_LANG_SOURCE([[dnl +#include <stdlib.h> +#include <pthread.h> + +static pthread_once_t once_control = PTHREAD_ONCE_INIT; +static pthread_key_t key; + +static void +make_key (void) +{ + pthread_key_create (&key, NULL); +} + +int +main () +{ + void *value = NULL; + + if (pthread_once (&once_control, make_key) != 0) + { + value = NULL; + } + else + { + value = pthread_getspecific (key); + if (!value) + { + value = malloc (100); + pthread_setspecific (key, value); + } + } + return 0; +} +]])) + +AC_DEFUN([PIXMAN_CHECK_PTHREAD],[dnl + if test "z$support_for_pthreads" != "zyes"; then + PIXMAN_LINK_WITH_ENV( + [$1], [pthread_test_program], + [PTHREAD_CFLAGS="$CFLAGS" + PTHREAD_LIBS="$LIBS" + PTHREAD_LDFLAGS="$LDFLAGS" + support_for_pthreads=yes]) + fi +]) + +support_for_pthreads=no + +AC_MSG_CHECKING(for pthreads) + +PIXMAN_CHECK_PTHREAD([CFLAGS="-pthread"; LDFLAGS="-pthread"]) +PIXMAN_CHECK_PTHREAD([CFLAGS="-D_REENTRANT"; LIBS="-lpthread"]) +PIXMAN_CHECK_PTHREAD([CFLAGS="-D_REENTRANT"; LDFLAGS="-lroot"]) + +if test $support_for_pthreads = yes; then + AC_DEFINE([HAVE_PTHREADS], [], [Whether pthreads is supported]) + if test $ac_cv_tls = none ; then + CFLAGS="$CFLAGS $PTHREAD_CFLAGS" + fi +fi + +AC_MSG_RESULT($support_for_pthreads) + +AC_SUBST(TOOLCHAIN_SUPPORTS__THREAD) +AC_SUBST(HAVE_PTHREADS) +AC_SUBST(PTHREAD_LDFLAGS) +AC_SUBST(PTHREAD_LIBS) +AC_SUBST(PTHREAD_CFLAGS) + +dnl ===================================== +dnl __attribute__((constructor)) + +support_for_attribute_constructor=no + +AC_MSG_CHECKING(for __attribute__((constructor))) +AC_LINK_IFELSE([AC_LANG_SOURCE([[ +#if defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 7)) +/* attribute 'constructor' is supported since gcc 2.7, but some compilers + * may only pretend to be gcc, so let's try to actually use it + */ +static int x = 1; +static void __attribute__((constructor)) constructor_function () { x = 0; } +int main (void) { return x; } +#else +#error not gcc or gcc version is older than 2.7 +#endif +]])], support_for_attribute_constructor=yes) + +if test x$support_for_attribute_constructor = xyes; then + AC_DEFINE([TOOLCHAIN_SUPPORTS_ATTRIBUTE_CONSTRUCTOR], + [],[Whether the tool chain supports __attribute__((constructor))]) +fi + +AC_MSG_RESULT($support_for_attribute_constructor) +AC_SUBST(TOOLCHAIN_SUPPORTS_ATTRIBUTE_CONSTRUCTOR) + +dnl ===================================== +dnl __float128 + +support_for_float128=no + +AC_MSG_CHECKING(for __float128) +AC_LINK_IFELSE([AC_LANG_SOURCE([[ +__float128 a = 1.0Q, b = 2.0Q; int main (void) { return a + b; } +]])], support_for_float128=yes) + +if test x$support_for_float128 = xyes; then + AC_DEFINE([HAVE_FLOAT128], [], [Whether the tool chain supports __float128]) +fi + +AC_MSG_RESULT($support_for_float128) + +dnl ===================================== +dnl __builtin_clz + +support_for_builtin_clz=no + +AC_MSG_CHECKING(for __builtin_clz) +AC_LINK_IFELSE([AC_LANG_SOURCE([[ +unsigned int x = 11; int main (void) { return __builtin_clz(x); } +]])], support_for_builtin_clz=yes) + +if test x$support_for_builtin_clz = xyes; then + AC_DEFINE([HAVE_BUILTIN_CLZ], [], [Whether the compiler supports __builtin_clz]) +fi + +AC_MSG_RESULT($support_for_builtin_clz) + +dnl ===================================== +dnl GCC vector extensions + +support_for_gcc_vector_extensions=no + +AC_MSG_CHECKING(for GCC vector extensions) +AC_LINK_IFELSE([AC_LANG_SOURCE([[ +unsigned int __attribute__ ((vector_size(16))) e, a, b; +int main (void) { e = a - ((b << 27) + (b >> (32 - 27))) + 1; return e[0]; } +]])], support_for_gcc_vector_extensions=yes) + +if test x$support_for_gcc_vector_extensions = xyes; then + AC_DEFINE([HAVE_GCC_VECTOR_EXTENSIONS], [], + [Whether the compiler supports GCC vector extensions]) +fi + +AC_MSG_RESULT($support_for_gcc_vector_extensions) + +dnl ================== +dnl libpng + +AC_ARG_ENABLE(libpng, AS_HELP_STRING([--enable-libpng], [Build support for libpng (default: auto)]), + [have_libpng=$enableval], [have_libpng=auto]) + +case x$have_libpng in + xyes) PKG_CHECK_MODULES(PNG, [libpng]) ;; + xno) ;; + *) PKG_CHECK_MODULES(PNG, [libpng], have_libpng=yes, have_libpng=no) ;; +esac + +if test x$have_libpng = xyes; then + AC_DEFINE([HAVE_LIBPNG], [1], [Whether we have libpng]) +fi + +AC_SUBST(HAVE_LIBPNG) + +AC_OUTPUT([pixman-1.pc + pixman-1-uninstalled.pc + Makefile + pixman/Makefile + pixman/pixman-version.h + demos/Makefile + test/Makefile]) + +m4_if(m4_eval(pixman_minor % 2), [1], [ + echo + echo "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" + echo + echo " Thanks for testing this development snapshot of pixman. Please" + echo " report any problems you find, either by sending email to " + echo + echo " pixman@lists.freedesktop.org" + echo + echo " or by filing a bug at " + echo + echo " https://bugs.freedesktop.org/enter_bug.cgi?product=pixman " + echo + echo " If you are looking for a stable release of pixman, please note " + echo " that stable releases have _even_ minor version numbers. Ie., " + echo " pixman-0.]m4_eval(pixman_minor & ~1)[.x are stable releases, whereas pixman-$PIXMAN_VERSION_MAJOR.$PIXMAN_VERSION_MINOR.$PIXMAN_VERSION_MICRO is a " + echo " development snapshot that may contain bugs and experimental " + echo " features. " + echo + echo "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" + echo +]) diff --git a/src/pixman/demos/Makefile.am b/src/pixman/demos/Makefile.am new file mode 100644 index 0000000..e04743d --- /dev/null +++ b/src/pixman/demos/Makefile.am @@ -0,0 +1,52 @@ +EXTRA_DIST = parrot.c parrot.jpg scale.ui + +if HAVE_GTK + +AM_CFLAGS = $(OPENMP_CFLAGS) +AM_LDFLAGS = $(OPENMP_CFLAGS) + +LDADD = $(top_builddir)/pixman/libpixman-1.la -lm $(GTK_LIBS) $(PNG_LIBS) +AM_CPPFLAGS = -I$(top_srcdir)/pixman -I$(top_builddir)/pixman $(GTK_CFLAGS) $(PNG_CFLAGS) + +GTK_UTILS = gtk-utils.c gtk-utils.h ../test/utils.c ../test/utils.h \ + ../test/utils-prng.c ../test/utils-prng.h + +DEMOS = \ + clip-test \ + clip-in \ + composite-test \ + gradient-test \ + radial-test \ + linear-gradient \ + conical-test \ + alpha-test \ + screen-test \ + convolution-test \ + trap-test \ + tri-test \ + quad2quad \ + checkerboard \ + srgb-trap-test \ + srgb-test \ + scale + +gradient_test_SOURCES = gradient-test.c $(GTK_UTILS) +alpha_test_SOURCES = alpha-test.c $(GTK_UTILS) +composite_test_SOURCES = composite-test.c $(GTK_UTILS) +clip_test_SOURCES = clip-test.c $(GTK_UTILS) +clip_in_SOURCES = clip-in.c $(GTK_UTILS) +trap_test_SOURCES = trap-test.c $(GTK_UTILS) +screen_test_SOURCES = screen-test.c $(GTK_UTILS) +convolution_test_SOURCES = convolution-test.c $(GTK_UTILS) +radial_test_SOURCES = radial-test.c $(GTK_UTILS) +linear_gradient_SOURCES = linear-gradient.c $(GTK_UTILS) +conical_test_SOURCES = conical-test.c $(GTK_UTILS) +tri_test_SOURCES = tri-test.c $(GTK_UTILS) +checkerboard_SOURCES = checkerboard.c $(GTK_UTILS) +srgb_test_SOURCES = srgb-test.c $(GTK_UTILS) +srgb_trap_test_SOURCES = srgb-trap-test.c $(GTK_UTILS) +scale_SOURCES = scale.c $(GTK_UTILS) + +noinst_PROGRAMS = $(DEMOS) + +endif diff --git a/src/pixman/demos/alpha-test.c b/src/pixman/demos/alpha-test.c new file mode 100644 index 0000000..54e30fa --- /dev/null +++ b/src/pixman/demos/alpha-test.c @@ -0,0 +1,119 @@ +#include <stdio.h> +#include <stdlib.h> +#include "pixman.h" +#include "gtk-utils.h" + +int +main (int argc, char **argv) +{ +#define WIDTH 400 +#define HEIGHT 200 + + uint32_t *alpha = malloc (WIDTH * HEIGHT * 4); + uint32_t *dest = malloc (WIDTH * HEIGHT * 4); + uint32_t *src = malloc (WIDTH * HEIGHT * 4); + pixman_image_t *grad_img; + pixman_image_t *alpha_img; + pixman_image_t *dest_img; + pixman_image_t *src_img; + int i; + pixman_gradient_stop_t stops[2] = + { + { pixman_int_to_fixed (0), { 0x0000, 0x0000, 0x0000, 0x0000 } }, + { pixman_int_to_fixed (1), { 0xffff, 0x0000, 0x1111, 0xffff } } + }; + pixman_point_fixed_t p1 = { pixman_double_to_fixed (0), 0 }; + pixman_point_fixed_t p2 = { pixman_double_to_fixed (WIDTH), + pixman_int_to_fixed (0) }; +#if 0 + pixman_transform_t trans = { + { { pixman_double_to_fixed (2), pixman_double_to_fixed (0.5), pixman_double_to_fixed (-100), }, + { pixman_double_to_fixed (0), pixman_double_to_fixed (3), pixman_double_to_fixed (0), }, + { pixman_double_to_fixed (0), pixman_double_to_fixed (0.000), pixman_double_to_fixed (1.0) } + } + }; +#else + pixman_transform_t trans = { + { { pixman_fixed_1, 0, 0 }, + { 0, pixman_fixed_1, 0 }, + { 0, 0, pixman_fixed_1 } } + }; +#endif + +#if 0 + pixman_point_fixed_t c_inner; + pixman_point_fixed_t c_outer; + pixman_fixed_t r_inner; + pixman_fixed_t r_outer; +#endif + + for (i = 0; i < WIDTH * HEIGHT; ++i) + alpha[i] = 0x4f00004f; /* pale blue */ + + alpha_img = pixman_image_create_bits (PIXMAN_a8r8g8b8, + WIDTH, HEIGHT, + alpha, + WIDTH * 4); + + for (i = 0; i < WIDTH * HEIGHT; ++i) + dest[i] = 0xffffff00; /* yellow */ + + dest_img = pixman_image_create_bits (PIXMAN_a8r8g8b8, + WIDTH, HEIGHT, + dest, + WIDTH * 4); + + for (i = 0; i < WIDTH * HEIGHT; ++i) + src[i] = 0xffff0000; + + src_img = pixman_image_create_bits (PIXMAN_a8r8g8b8, + WIDTH, HEIGHT, + src, + WIDTH * 4); + +#if 0 + c_inner.x = pixman_double_to_fixed (50.0); + c_inner.y = pixman_double_to_fixed (50.0); + c_outer.x = pixman_double_to_fixed (50.0); + c_outer.y = pixman_double_to_fixed (50.0); + r_inner = 0; + r_outer = pixman_double_to_fixed (50.0); + + grad_img = pixman_image_create_conical_gradient (&c_inner, r_inner, + stops, 2); +#endif +#if 0 + grad_img = pixman_image_create_conical_gradient (&c_inner, r_inner, + stops, 2); + grad_img = pixman_image_create_linear_gradient (&c_inner, &c_outer, + r_inner, r_outer, + stops, 2); +#endif + + grad_img = pixman_image_create_linear_gradient (&p1, &p2, + stops, 2); + + pixman_image_set_transform (grad_img, &trans); + pixman_image_set_repeat (grad_img, PIXMAN_REPEAT_PAD); + + pixman_image_composite (PIXMAN_OP_OVER, grad_img, NULL, alpha_img, + 0, 0, 0, 0, 0, 0, 10 * WIDTH, HEIGHT); + + pixman_image_set_alpha_map (src_img, alpha_img, 10, 10); + + pixman_image_composite (PIXMAN_OP_OVER, src_img, NULL, dest_img, + 0, 0, 0, 0, 0, 0, 10 * WIDTH, HEIGHT); + + printf ("0, 0: %x\n", dest[0]); + printf ("10, 10: %x\n", dest[10 * 10 + 10]); + printf ("w, h: %x\n", dest[(HEIGHT - 1) * 100 + (WIDTH - 1)]); + + show_image (dest_img); + + pixman_image_unref (src_img); + pixman_image_unref (grad_img); + pixman_image_unref (alpha_img); + free (dest); + + return 0; +} diff --git a/src/pixman/demos/checkerboard.c b/src/pixman/demos/checkerboard.c new file mode 100644 index 0000000..449fedb --- /dev/null +++ b/src/pixman/demos/checkerboard.c @@ -0,0 +1,71 @@ +#include <stdio.h> +#include <stdlib.h> +#include "pixman.h" +#include "gtk-utils.h" + +int +main (int argc, char **argv) +{ +#define WIDTH 400 +#define HEIGHT 400 +#define TILE_SIZE 25 + + pixman_image_t *checkerboard; + pixman_image_t *destination; +#define D2F(d) (pixman_double_to_fixed(d)) + pixman_transform_t trans = { { + { D2F (-1.96830), D2F (-1.82250), D2F (512.12250)}, + { D2F (0.00000), D2F (-7.29000), D2F (1458.00000)}, + { D2F (0.00000), D2F (-0.00911), D2F (0.59231)}, + }}; + int i, j; + + checkerboard = pixman_image_create_bits (PIXMAN_a8r8g8b8, + WIDTH, HEIGHT, + NULL, 0); + + destination = pixman_image_create_bits (PIXMAN_a8r8g8b8, + WIDTH, HEIGHT, + NULL, 0); + + for (i = 0; i < HEIGHT / TILE_SIZE; ++i) + { + for (j = 0; j < WIDTH / TILE_SIZE; ++j) + { + double u = (double)(j + 1) / (WIDTH / TILE_SIZE); + double v = (double)(i + 1) / (HEIGHT / TILE_SIZE); + pixman_color_t black = { 0, 0, 0, 0xffff }; + pixman_color_t white = { + v * 0xffff, + u * 0xffff, + (1 - (double)u) * 0xffff, + 0xffff }; + pixman_color_t *c; + pixman_image_t *fill; + + if ((j & 1) != (i & 1)) + c = &black; + else + c = &white; + + fill = pixman_image_create_solid_fill (c); + + pixman_image_composite (PIXMAN_OP_SRC, fill, NULL, checkerboard, + 0, 0, 0, 0, j * TILE_SIZE, i * TILE_SIZE, + TILE_SIZE, TILE_SIZE); + } + } + + pixman_image_set_transform (checkerboard, &trans); + pixman_image_set_filter (checkerboard, PIXMAN_FILTER_BEST, NULL, 0); + pixman_image_set_repeat (checkerboard, PIXMAN_REPEAT_NONE); + + pixman_image_composite (PIXMAN_OP_SRC, + checkerboard, NULL, destination, + 0, 0, 0, 0, 0, 0, + WIDTH, HEIGHT); + + show_image (destination); + + return 0; +} diff --git a/src/pixman/demos/clip-in.c b/src/pixman/demos/clip-in.c new file mode 100644 index 0000000..5157981 --- /dev/null +++ b/src/pixman/demos/clip-in.c @@ -0,0 +1,50 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "pixman.h" +#include "gtk-utils.h" + +/* This test demonstrates that clipping is done totally different depending + * on whether the source is transformed or not. + */ +int +main (int argc, char **argv) +{ +#define WIDTH 200 +#define HEIGHT 200 + +#define SMALL 25 + + uint32_t *sbits = malloc (SMALL * SMALL * 4); + uint32_t *bits = malloc (WIDTH * HEIGHT * 4); + pixman_transform_t trans = { + { + { pixman_double_to_fixed (1.0), pixman_double_to_fixed (0), pixman_double_to_fixed (-0.1), }, + { pixman_double_to_fixed (0), pixman_double_to_fixed (1), pixman_double_to_fixed (-0.1), }, + { pixman_double_to_fixed (0), pixman_double_to_fixed (0), pixman_double_to_fixed (1.0) } + } }; + + pixman_image_t *src_img = pixman_image_create_bits (PIXMAN_a8r8g8b8, SMALL, SMALL, sbits, 4 * SMALL); + pixman_image_t *dest_img = pixman_image_create_bits (PIXMAN_a8r8g8b8, WIDTH, HEIGHT, bits, 4 * WIDTH); + + memset (bits, 0xff, WIDTH * HEIGHT * 4); + memset (sbits, 0x00, SMALL * SMALL * 4); + + pixman_image_composite (PIXMAN_OP_IN, + src_img, NULL, dest_img, + 0, 0, 0, 0, SMALL, SMALL, 200, 200); + + pixman_image_set_transform (src_img, &trans); + + pixman_image_composite (PIXMAN_OP_IN, + src_img, NULL, dest_img, + 0, 0, 0, 0, SMALL * 2, SMALL * 2, 200, 200); + + show_image (dest_img); + + pixman_image_unref (src_img); + pixman_image_unref (dest_img); + free (bits); + + return 0; +} diff --git a/src/pixman/demos/clip-test.c b/src/pixman/demos/clip-test.c new file mode 100644 index 0000000..aa0df44 --- /dev/null +++ b/src/pixman/demos/clip-test.c @@ -0,0 +1,97 @@ +#include <stdio.h> +#include <stdlib.h> +#include "pixman.h" +#include "gtk-utils.h" + +#define WIDTH 200 +#define HEIGHT 200 + +static pixman_image_t * +create_solid_bits (uint32_t pixel) +{ + uint32_t *pixels = malloc (WIDTH * HEIGHT * 4); + int i; + + for (i = 0; i < WIDTH * HEIGHT; ++i) + pixels[i] = pixel; + + return pixman_image_create_bits (PIXMAN_a8r8g8b8, + WIDTH, HEIGHT, + pixels, + WIDTH * 4); +} + +int +main (int argc, char **argv) +{ + pixman_image_t *gradient_img; + pixman_image_t *src_img, *dst_img; + pixman_gradient_stop_t stops[2] = + { + { pixman_int_to_fixed (0), { 0xffff, 0x0000, 0x0000, 0xffff } }, + { pixman_int_to_fixed (1), { 0xffff, 0xffff, 0x0000, 0xffff } } + }; +#if 0 + pixman_point_fixed_t p1 = { 0, 0 }; + pixman_point_fixed_t p2 = { pixman_int_to_fixed (WIDTH), + pixman_int_to_fixed (HEIGHT) }; +#endif + pixman_point_fixed_t c_inner; + pixman_point_fixed_t c_outer; + pixman_fixed_t r_inner; + pixman_fixed_t r_outer; + pixman_region32_t clip_region; + pixman_transform_t trans = { + { { pixman_double_to_fixed (1.3), pixman_double_to_fixed (0), pixman_double_to_fixed (-0.5), }, + { pixman_double_to_fixed (0), pixman_double_to_fixed (1), pixman_double_to_fixed (-0.5), }, + { pixman_double_to_fixed (0), pixman_double_to_fixed (0), pixman_double_to_fixed (1.0) } + } + }; + + src_img = create_solid_bits (0xff0000ff); + + c_inner.x = pixman_double_to_fixed (100.0); + c_inner.y = pixman_double_to_fixed (100.0); + c_outer.x = pixman_double_to_fixed (100.0); + c_outer.y = pixman_double_to_fixed (100.0); + r_inner = 0; + r_outer = pixman_double_to_fixed (100.0); + + gradient_img = pixman_image_create_radial_gradient (&c_inner, &c_outer, + r_inner, r_outer, + stops, 2); + +#if 0 + gradient_img = pixman_image_create_linear_gradient (&p1, &p2, + stops, 2); + +#endif + + pixman_image_composite (PIXMAN_OP_OVER, gradient_img, NULL, src_img, + 0, 0, 0, 0, 0, 0, WIDTH, HEIGHT); + + pixman_region32_init_rect (&clip_region, 50, 0, 100, 200); + pixman_image_set_clip_region32 (src_img, &clip_region); + pixman_image_set_source_clipping (src_img, TRUE); + pixman_image_set_has_client_clip (src_img, TRUE); + pixman_image_set_transform (src_img, &trans); + pixman_image_set_repeat (src_img, PIXMAN_REPEAT_NORMAL); + + dst_img = create_solid_bits (0xffff0000); + pixman_image_composite (PIXMAN_OP_OVER, src_img, NULL, dst_img, + 0, 0, 0, 0, 0, 0, WIDTH, HEIGHT); + + +#if 0 + printf ("0, 0: %x\n", src[0]); + printf ("10, 10: %x\n", src[10 * 10 + 10]); + printf ("w, h: %x\n", src[(HEIGHT - 1) * 100 + (WIDTH - 1)]); +#endif + + show_image (dst_img); + + pixman_image_unref (gradient_img); + pixman_image_unref (src_img); + + return 0; +} diff --git a/src/pixman/demos/composite-test.c b/src/pixman/demos/composite-test.c new file mode 100644 index 0000000..8213e2f --- /dev/null +++ b/src/pixman/demos/composite-test.c @@ -0,0 +1,186 @@ +#include <gtk/gtk.h> +#include <stdlib.h> +#include <stdio.h> +#include "pixman.h" +#include "gtk-utils.h" +#include "parrot.c" + +#define WIDTH 80 +#define HEIGHT 80 + +typedef struct { + const char *name; + pixman_op_t op; +} operator_t; + +static const operator_t operators[] = { + { "CLEAR", PIXMAN_OP_CLEAR }, + { "SRC", PIXMAN_OP_SRC }, + { "DST", PIXMAN_OP_DST }, + { "OVER", PIXMAN_OP_OVER }, + { "OVER_REVERSE", PIXMAN_OP_OVER_REVERSE }, + { "IN", PIXMAN_OP_IN }, + { "IN_REVERSE", PIXMAN_OP_IN_REVERSE }, + { "OUT", PIXMAN_OP_OUT }, + { "OUT_REVERSE", PIXMAN_OP_OUT_REVERSE }, + { "ATOP", PIXMAN_OP_ATOP }, + { "ATOP_REVERSE", PIXMAN_OP_ATOP_REVERSE }, + { "XOR", PIXMAN_OP_XOR }, + { "ADD", PIXMAN_OP_ADD }, + { "SATURATE", PIXMAN_OP_SATURATE }, + + { "MULTIPLY", PIXMAN_OP_MULTIPLY }, + { "SCREEN", PIXMAN_OP_SCREEN }, + { "OVERLAY", PIXMAN_OP_OVERLAY }, + { "DARKEN", PIXMAN_OP_DARKEN }, + { "LIGHTEN", PIXMAN_OP_LIGHTEN }, + { "COLOR_DODGE", PIXMAN_OP_COLOR_DODGE }, + { "COLOR_BURN", PIXMAN_OP_COLOR_BURN }, + { "HARD_LIGHT", PIXMAN_OP_HARD_LIGHT }, + { "SOFT_LIGHT", PIXMAN_OP_SOFT_LIGHT }, + { "DIFFERENCE", PIXMAN_OP_DIFFERENCE }, + { "EXCLUSION", PIXMAN_OP_EXCLUSION }, + { "HSL_HUE", PIXMAN_OP_HSL_HUE }, + { "HSL_SATURATION", PIXMAN_OP_HSL_SATURATION }, + { "HSL_COLOR", PIXMAN_OP_HSL_COLOR }, + { "HSL_LUMINOSITY", PIXMAN_OP_HSL_LUMINOSITY }, +}; + +static uint32_t +reader (const void *src, int size) +{ + switch (size) + { + case 1: + return *(uint8_t *)src; + case 2: + return *(uint16_t *)src; + case 4: + return *(uint32_t *)src; + default: + g_assert_not_reached(); + } +} + +static void +writer (void *src, uint32_t value, int size) +{ + switch (size) + { + case 1: + *(uint8_t *)src = value; + break; + + case 2: + *(uint16_t *)src = value; + break; + + case 4: + *(uint32_t *)src = value; + break; + + default: + break; + } +} + +int +main (int argc, char **argv) +{ +#define d2f pixman_double_to_fixed + + GtkWidget *window, *swindow; + GtkWidget *table; + uint32_t *dest = malloc (WIDTH * HEIGHT * 4); + uint32_t *src = malloc (WIDTH * HEIGHT * 4); + pixman_image_t *gradient, *parrot; + pixman_image_t *dest_img; + pixman_point_fixed_t p1 = { -10 << 16, 10 << 16 }; + pixman_point_fixed_t p2 = { (WIDTH + 10) << 16, (HEIGHT - 10) << 16 }; + uint16_t alpha = 0xdddd; + pixman_gradient_stop_t stops[6] = + { + { d2f (0.0), { 0xf2f2, 0x8787, 0x7d7d, alpha } }, + { d2f (0.22), { 0xf3f3, 0xeaea, 0x8383, alpha } }, + { d2f (0.42), { 0x6b6b, 0xc0c0, 0x7777, alpha } }, + { d2f (0.57), { 0x4b4b, 0xc9c9, 0xf5f5, alpha } }, + { d2f (0.75), { 0x6a6a, 0x7f7f, 0xbebe, alpha } }, + { d2f (1.0), { 0xeded, 0x8282, 0xb0b0, alpha } }, + }; + + int i; + + gtk_init (&argc, &argv); + + window = gtk_window_new (GTK_WINDOW_TOPLEVEL); + + gtk_window_set_default_size (GTK_WINDOW (window), 800, 600); + + g_signal_connect (window, "delete-event", + G_CALLBACK (gtk_main_quit), + NULL); + table = gtk_table_new (G_N_ELEMENTS (operators) / 6, 6, TRUE); + + gradient = pixman_image_create_linear_gradient (&p1, &p2, stops, G_N_ELEMENTS (stops)); + parrot = pixman_image_create_bits (PIXMAN_a8r8g8b8, WIDTH, HEIGHT, (uint32_t *)parrot_bits, WIDTH * 4); + + pixman_image_set_repeat (gradient, PIXMAN_REPEAT_PAD); + + dest_img = pixman_image_create_bits (PIXMAN_a8r8g8b8, + WIDTH, HEIGHT, + NULL, + WIDTH * 4); + pixman_image_set_accessors (dest_img, reader, writer); + + for (i = 0; i < G_N_ELEMENTS (operators); ++i) + { + GtkWidget *image; + GdkPixbuf *pixbuf; + GtkWidget *vbox; + GtkWidget *label; + + vbox = gtk_vbox_new (FALSE, 0); + + label = gtk_label_new (operators[i].name); + gtk_box_pack_start (GTK_BOX (vbox), label, FALSE, FALSE, 6); + gtk_widget_show (label); + + pixman_image_composite (PIXMAN_OP_SRC, gradient, NULL, dest_img, + 0, 0, 0, 0, 0, 0, WIDTH, HEIGHT); + pixman_image_composite (operators[i].op, parrot, NULL, dest_img, + 0, 0, 0, 0, 0, 0, WIDTH, HEIGHT); + pixbuf = pixbuf_from_argb32 (pixman_image_get_data (dest_img), + WIDTH, HEIGHT, WIDTH * 4); + image = gtk_image_new_from_pixbuf (pixbuf); + gtk_box_pack_start (GTK_BOX (vbox), image, FALSE, FALSE, 0); + gtk_widget_show (image); + + gtk_table_attach_defaults (GTK_TABLE (table), vbox, + i % 6, (i % 6) + 1, i / 6, (i / 6) + 1); + gtk_widget_show (vbox); + + g_object_unref (pixbuf); + } + + pixman_image_unref (gradient); + free (src); + pixman_image_unref (dest_img); + free (dest); + + swindow = gtk_scrolled_window_new (NULL, NULL); + gtk_scrolled_window_set_policy (GTK_SCROLLED_WINDOW (swindow), + GTK_POLICY_AUTOMATIC, + GTK_POLICY_AUTOMATIC); + + gtk_scrolled_window_add_with_viewport (GTK_SCROLLED_WINDOW (swindow), table); + gtk_widget_show (table); + + gtk_container_add (GTK_CONTAINER (window), swindow); + gtk_widget_show (swindow); + + gtk_widget_show (window); + + gtk_main (); + + return 0; +} diff --git a/src/pixman/demos/conical-test.c b/src/pixman/demos/conical-test.c new file mode 100644 index 0000000..6b32430 --- /dev/null +++ b/src/pixman/demos/conical-test.c @@ -0,0 +1,100 @@ +#include "../test/utils.h" +#include "gtk-utils.h" + +#define SIZE 128 +#define GRADIENTS_PER_ROW 7 +#define NUM_ROWS ((NUM_GRADIENTS + GRADIENTS_PER_ROW - 1) / GRADIENTS_PER_ROW) +#define WIDTH (SIZE * GRADIENTS_PER_ROW) +#define HEIGHT (SIZE * NUM_ROWS) +#define NUM_GRADIENTS 35 + +#define double_to_color(x) \ + (((uint32_t) ((x)*65536)) - (((uint32_t) ((x)*65536)) >> 16)) + +#define PIXMAN_STOP(offset,r,g,b,a) \ + { pixman_double_to_fixed (offset), \ + { \ + double_to_color (r), \ + double_to_color (g), \ + double_to_color (b), \ + double_to_color (a) \ + } \ + } + + +static const pixman_gradient_stop_t stops[] = { + PIXMAN_STOP (0.25, 1, 0, 0, 0.7), + PIXMAN_STOP (0.5, 1, 1, 0, 0.7), + PIXMAN_STOP (0.75, 0, 1, 0, 0.7), + PIXMAN_STOP (1.0, 0, 0, 1, 0.7) +}; + +#define NUM_STOPS (sizeof (stops) / sizeof (stops[0])) + +static pixman_image_t * +create_conical (int index) +{ + pixman_point_fixed_t c; + double angle; + + c.x = pixman_double_to_fixed (0); + c.y = pixman_double_to_fixed (0); + + angle = (0.5 / NUM_GRADIENTS + index / (double)NUM_GRADIENTS) * 720 - 180; + + return pixman_image_create_conical_gradient ( + &c, pixman_double_to_fixed (angle), stops, NUM_STOPS); +} + +int +main (int argc, char **argv) +{ + pixman_transform_t transform; + pixman_image_t *src_img, *dest_img; + int i; + + enable_divbyzero_exceptions (); + + dest_img = pixman_image_create_bits (PIXMAN_a8r8g8b8, + WIDTH, HEIGHT, + NULL, 0); + + draw_checkerboard (dest_img, 25, 0xffaaaaaa, 0xff888888); + + pixman_transform_init_identity (&transform); + + pixman_transform_translate (NULL, &transform, + pixman_double_to_fixed (0.5), + pixman_double_to_fixed (0.5)); + + pixman_transform_scale (NULL, &transform, + pixman_double_to_fixed (SIZE), + pixman_double_to_fixed (SIZE)); + pixman_transform_translate (NULL, &transform, + pixman_double_to_fixed (0.5), + pixman_double_to_fixed (0.5)); + + for (i = 0; i < NUM_GRADIENTS; i++) + { + int column = i % GRADIENTS_PER_ROW; + int row = i / GRADIENTS_PER_ROW; + + src_img = create_conical (i); + pixman_image_set_repeat (src_img, PIXMAN_REPEAT_NORMAL); + + pixman_image_set_transform (src_img, &transform); + + pixman_image_composite32 ( + PIXMAN_OP_OVER, src_img, NULL,dest_img, + 0, 0, 0, 0, column * SIZE, row * SIZE, + SIZE, SIZE); + + pixman_image_unref (src_img); + } + + show_image (dest_img); + + pixman_image_unref (dest_img); + + return 0; +} diff --git a/src/pixman/demos/convolution-test.c b/src/pixman/demos/convolution-test.c new file mode 100644 index 0000000..da284af --- /dev/null +++ b/src/pixman/demos/convolution-test.c @@ -0,0 +1,47 @@ +#include <stdio.h> +#include <stdlib.h> +#include "pixman.h" +#include "gtk-utils.h" + +int +main (int argc, char **argv) +{ +#define WIDTH 200 +#define HEIGHT 200 + +#define d2f pixman_double_to_fixed + + uint32_t *src = malloc (WIDTH * HEIGHT * 4); + uint32_t *mask = malloc (WIDTH * HEIGHT * 4); + uint32_t *dest = malloc (WIDTH * HEIGHT * 4); + pixman_fixed_t convolution[] = + { + d2f (3), d2f (3), + d2f (0.5), d2f (0.5), d2f (0.5), + d2f (0.5), d2f (0.5), d2f (0.5), + d2f (0.5), d2f (0.5), d2f (0.5), + }; + pixman_image_t *simg, *mimg, *dimg; + + int i; + + for (i = 0; i < WIDTH * HEIGHT; ++i) + { + src[i] = 0x7f007f00; + mask[i] = (i % 256) * 0x01000000; + dest[i] = 0; + } + + simg = pixman_image_create_bits (PIXMAN_a8r8g8b8, WIDTH, HEIGHT, src, WIDTH * 4); + mimg = pixman_image_create_bits (PIXMAN_a8r8g8b8, WIDTH, HEIGHT, mask, WIDTH * 4); + dimg = pixman_image_create_bits (PIXMAN_a8r8g8b8, WIDTH, HEIGHT, dest, WIDTH * 4); + + pixman_image_set_filter (mimg, PIXMAN_FILTER_CONVOLUTION, + convolution, 11); + + pixman_image_composite (PIXMAN_OP_OVER, simg, mimg, dimg, 0, 0, 0, 0, 0, 0, WIDTH, HEIGHT); + + show_image (dimg); + + return 0; +} diff --git a/src/pixman/demos/gradient-test.c b/src/pixman/demos/gradient-test.c new file mode 100644 index 0000000..e68f69a --- /dev/null +++ b/src/pixman/demos/gradient-test.c @@ -0,0 +1,92 @@ +#include <stdio.h> +#include <stdlib.h> +#include "pixman.h" +#include "gtk-utils.h" + +int +main (int argc, char **argv) +{ +#define WIDTH 400 +#define HEIGHT 200 + + uint32_t *dest = malloc (WIDTH * HEIGHT * 4); + pixman_image_t *src_img; + pixman_image_t *dest_img; + int i; + pixman_gradient_stop_t stops[2] = + { + { pixman_int_to_fixed (0), { 0x0000, 0x0000, 0xffff, 0xffff } }, + { pixman_int_to_fixed (1), { 0xffff, 0x1111, 0x1111, 0xffff } } + }; + pixman_point_fixed_t p1 = { pixman_double_to_fixed (50), 0 }; + pixman_point_fixed_t p2 = { pixman_double_to_fixed (200), 0 }; +#if 0 + pixman_transform_t trans = { + { { pixman_double_to_fixed (2), pixman_double_to_fixed (0.5), pixman_double_to_fixed (-100), }, + { pixman_double_to_fixed (0), pixman_double_to_fixed (3), pixman_double_to_fixed (0), }, + { pixman_double_to_fixed (0), pixman_double_to_fixed (0.000), pixman_double_to_fixed (1.0) } + } + }; +#else + pixman_transform_t trans = { + { { pixman_fixed_1, 0, 0 }, + { 0, pixman_fixed_1, 0 }, + { 0, 0, pixman_fixed_1 } } + }; +#endif + +#if 0 + pixman_point_fixed_t c_inner; + pixman_point_fixed_t c_outer; + pixman_fixed_t r_inner; + pixman_fixed_t r_outer; +#endif + + for (i = 0; i < WIDTH * HEIGHT; ++i) + dest[i] = 0xff00ff00; + + dest_img = pixman_image_create_bits (PIXMAN_a8r8g8b8, + WIDTH, HEIGHT, + dest, + WIDTH * 4); + +#if 0 + c_inner.x = pixman_double_to_fixed (50.0); + c_inner.y = pixman_double_to_fixed (50.0); + c_outer.x = pixman_double_to_fixed (50.0); + c_outer.y = pixman_double_to_fixed (50.0); + r_inner = 0; + r_outer = pixman_double_to_fixed (50.0); + + src_img = pixman_image_create_conical_gradient (&c_inner, r_inner, + stops, 2); +#endif +#if 0 + src_img = pixman_image_create_conical_gradient (&c_inner, r_inner, + stops, 2); + src_img = pixman_image_create_linear_gradient (&c_inner, &c_outer, + r_inner, r_outer, + stops, 2); +#endif + + src_img = pixman_image_create_linear_gradient (&p1, &p2, + stops, 2); + + pixman_image_set_transform (src_img, &trans); + pixman_image_set_repeat (src_img, PIXMAN_REPEAT_NONE); + + pixman_image_composite (PIXMAN_OP_OVER, src_img, NULL, dest_img, + 0, 0, 0, 0, 0, 0, 10 * WIDTH, HEIGHT); + + printf ("0, 0: %x\n", dest[0]); + printf ("10, 10: %x\n", dest[10 * 10 + 10]); + printf ("w, h: %x\n", dest[(HEIGHT - 1) * 100 + (WIDTH - 1)]); + + show_image (dest_img); + + pixman_image_unref (src_img); + pixman_image_unref (dest_img); + free (dest); + + return 0; +} diff --git a/src/pixman/demos/gtk-utils.c b/src/pixman/demos/gtk-utils.c new file mode 100644 index 0000000..32d4aec --- /dev/null +++ b/src/pixman/demos/gtk-utils.c @@ -0,0 +1,179 @@ +#include <gtk/gtk.h> +#include <config.h> +#include "../test/utils.h" +#include "gtk-utils.h" + +pixman_image_t * +pixman_image_from_file (const char *filename, pixman_format_code_t format) +{ + GdkPixbuf *pixbuf; + pixman_image_t *image; + int width, height; + uint32_t *data, *d; + uint8_t *gdk_data; + int n_channels; + int j, i; + int stride; + + if (!(pixbuf = gdk_pixbuf_new_from_file (filename, NULL))) + return NULL; + + image = NULL; + + width = gdk_pixbuf_get_width (pixbuf); + height = gdk_pixbuf_get_height (pixbuf); + n_channels = gdk_pixbuf_get_n_channels (pixbuf); + gdk_data = gdk_pixbuf_get_pixels (pixbuf); + stride = gdk_pixbuf_get_rowstride (pixbuf); + + if (!(data = malloc (width * height * sizeof (uint32_t)))) + goto out; + + d = data; + for (j = 0; j < height; ++j) + { + uint8_t *gdk_line = gdk_data; + + for (i = 0; i < width; ++i) + { + int r, g, b, a; + uint32_t pixel; + + r = gdk_line[0]; + g = gdk_line[1]; + b = gdk_line[2]; + + if (n_channels == 4) + a = gdk_line[3]; + else + a = 0xff; + + r = (r * a + 127) / 255; + g = (g * a + 127) / 255; + b = (b * a + 127) / 255; + + pixel = (a << 24) | (r << 16) | (g << 8) | b; + + *d++ = pixel; + gdk_line += n_channels; + } + + gdk_data += stride; + } + + image = pixman_image_create_bits ( + format, width, height, data, width * 4); + +out: + g_object_unref (pixbuf); + return image; +} + +GdkPixbuf * +pixbuf_from_argb32 (uint32_t *bits, + int width, + int height, + int stride) +{ + GdkPixbuf *pixbuf = gdk_pixbuf_new (GDK_COLORSPACE_RGB, TRUE, + 8, width, height); + int p_stride = gdk_pixbuf_get_rowstride (pixbuf); + guint32 *p_bits = (guint32 *)gdk_pixbuf_get_pixels (pixbuf); + int i; + + for (i = 0; i < height; ++i) + { + uint32_t *src_row = &bits[i * (stride / 4)]; + uint32_t *dst_row = p_bits + i * (p_stride / 4); + + a8r8g8b8_to_rgba_np (dst_row, src_row, width); + } + + return pixbuf; +} + +static gboolean +on_expose (GtkWidget *widget, GdkEventExpose *expose, gpointer data) +{ + pixman_image_t *pimage = data; + int width = pixman_image_get_width (pimage); + int height = pixman_image_get_height (pimage); + int stride = pixman_image_get_stride (pimage); + cairo_surface_t *cimage; + cairo_format_t format; + cairo_t *cr; + + if (pixman_image_get_format (pimage) == PIXMAN_x8r8g8b8) + format = CAIRO_FORMAT_RGB24; + else + format = CAIRO_FORMAT_ARGB32; + + cimage = cairo_image_surface_create_for_data ( + (uint8_t *)pixman_image_get_data (pimage), + format, width, height, stride); + + cr = gdk_cairo_create (widget->window); + + cairo_rectangle (cr, 0, 0, width, height); + cairo_set_source_surface (cr, cimage, 0, 0); + cairo_fill (cr); + + cairo_destroy (cr); + cairo_surface_destroy (cimage); + + return TRUE; +} + +void +show_image (pixman_image_t *image) +{ + GtkWidget *window; + int width, height; + int argc; + char **argv; + char *arg0 = g_strdup ("pixman-test-program"); + pixman_format_code_t format; + pixman_image_t *copy; + + argc = 1; + argv = (char **)&arg0; + + gtk_init (&argc, &argv); + + window = gtk_window_new (GTK_WINDOW_TOPLEVEL); + width = pixman_image_get_width (image); + height = pixman_image_get_height (image); + + gtk_window_set_default_size (GTK_WINDOW (window), width, height); + + format = pixman_image_get_format (image); + + /* We always display the image as if it contains sRGB data. That + * means that no conversion should take place when the image + * has the a8r8g8b8_sRGB format. + */ + switch (format) + { + case PIXMAN_a8r8g8b8_sRGB: + case PIXMAN_a8r8g8b8: + case PIXMAN_x8r8g8b8: + copy = pixman_image_ref (image); + break; + + default: + copy = pixman_image_create_bits (PIXMAN_a8r8g8b8, + width, height, NULL, -1); + pixman_image_composite32 (PIXMAN_OP_SRC, + image, NULL, copy, + 0, 0, 0, 0, 0, 0, + width, height); + break; + } + + g_signal_connect (window, "expose_event", G_CALLBACK (on_expose), copy); + g_signal_connect (window, "delete_event", G_CALLBACK (gtk_main_quit), NULL); + + gtk_widget_show (window); + + gtk_main (); +} diff --git a/src/pixman/demos/gtk-utils.h b/src/pixman/demos/gtk-utils.h new file mode 100644 index 0000000..36be4de --- /dev/null +++ b/src/pixman/demos/gtk-utils.h @@ -0,0 +1,15 @@ +#include <stdio.h> +#include <stdlib.h> +#include <glib.h> +#include <gtk/gtk.h> +#include "pixman.h" + +void show_image (pixman_image_t *image); + +pixman_image_t * +pixman_image_from_file (const char *filename, pixman_format_code_t format); + +GdkPixbuf *pixbuf_from_argb32 (uint32_t *bits, + int width, + int height, + int stride); diff --git a/src/pixman/demos/linear-gradient.c b/src/pixman/demos/linear-gradient.c new file mode 100644 index 0000000..46433a6 --- /dev/null +++ b/src/pixman/demos/linear-gradient.c @@ -0,0 +1,50 @@ +#include "../test/utils.h" +#include "gtk-utils.h" + +#define WIDTH 1024 +#define HEIGHT 640 + +int +main (int argc, char **argv) +{ + pixman_image_t *src_img, *dest_img; + pixman_gradient_stop_t stops[] = { + { 0x00000, { 0x0000, 0x0000, 0x4444, 0xdddd } }, + { 0x10000, { 0xeeee, 0xeeee, 0x8888, 0xdddd } }, +#if 0 + /* These colors make it very obvious that dithering + * is useful even for 8-bit gradients + */ + { 0x00000, { 0x6666, 0x3333, 0x3333, 0xffff } }, + { 0x10000, { 0x3333, 0x6666, 0x6666, 0xffff } }, +#endif + }; + pixman_point_fixed_t p1, p2; + + enable_divbyzero_exceptions (); + + dest_img = pixman_image_create_bits (PIXMAN_x8r8g8b8, + WIDTH, HEIGHT, + NULL, 0); + + p1.x = p1.y = 0x0000; + p2.x = WIDTH << 16; + p2.y = HEIGHT << 16; + + src_img = pixman_image_create_linear_gradient (&p1, &p2, stops, ARRAY_LENGTH (stops)); + + pixman_image_composite32 (PIXMAN_OP_OVER, + src_img, + NULL, + dest_img, + 0, 0, + 0, 0, + 0, 0, + WIDTH, HEIGHT); + + show_image (dest_img); + + pixman_image_unref (dest_img); + + return 0; +} diff --git a/src/pixman/demos/parrot.c b/src/pixman/demos/parrot.c new file mode 100644 index 0000000..60fd270 --- /dev/null +++ b/src/pixman/demos/parrot.c @@ -0,0 +1,1079 @@ +/* This parrot is a finger painting by Rubens LP: + * + * http://www.flickr.com/photos/dorubens/4030604504/in/set-72157622586088192/ + * + * Used here under Creative Commons Attribution. The artist's web site: + * + * http://www.rubenslp.com.br/ + * + */ +static const uint32_t parrot_bits[] = +{ + 0x716f7070, 0x1c1b1b1b, 0x110f1010, 0x16151415, 0x14121313, 0x2c292b2b, + 0x403e3f3f, 0x19181818, 0x06050605, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x02010101, 0x08070707, + 0x05040404, 0x0a060908, 0x27262426, 0xb3b0b1b2, 0x99979897, 0x2b2a2929, + 0x100f0f0f, 0x0f0d0e0e, 0x0e0e0c0d, 0x0d0d0b0c, 0x12111111, 0x10100e0f, + 0x0e0e0c0d, 0x0e0e0c0d, 0x12101211, 0x13121212, 0x17151516, 0x100f0e0f, + 0x15141414, 0x423f4042, 0x3b393a3a, 0x13121212, 0x16151515, 0x2b282b29, + 0x13121112, 0x100f0f0f, 0x0f0d0f0e, 0x08070807, 0x0d0c0c0c, 0x0a090a09, + 0x0e0e0c0d, 0x0c0c0a0b, 0x10100f0f, 0x0f0e0e0e, 0x07060706, 0x0d0c0d0c, + 0x0e0d0e0d, 0x05040504, 0x08070807, 0x0c0b0c0b, 0x0d0c0d0c, 0x05040504, + 0x110f1110, 0x08070707, 0x04030303, 0x09080808, 0x06050605, 0x01000000, + 0x08070707, 0x06050505, 0x05040504, 0x100e100f, 0x0b0a0b0a, 0x01000100, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x04030403, + 0x03020302, 0x0b0a0b0a, 0x14131313, 0x0e0d0d0d, 0x0e0d0e0d, 0x231f2222, + 0x4d4b4b4d, 0xa7a5a6a6, 0x5b595a5a, 0x07060606, 0x00000000, 0x00000000, + 0x01000100, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x02010201, + 0x05040404, 0x07050706, 0x04020303, 0x403e3e3f, 0xb6b3b5b5, 0x84828283, + 0x1a191819, 0x0e0d0d0d, 0x0d0c0b0c, 0x0f0f0d0e, 0x0e0d0d0d, 0x0f0f0e0e, + 0x0e0e0c0d, 0x0c0b0a0b, 0x0b0a090a, 0x11111010, 0x100f0f0f, 0x100f0f0f, + 0x1b19191a, 0x1f1e1e1e, 0x46434544, 0x3a37383a, 0x1c1b1a1b, 0x1e1d1d1d, + 0x29272828, 0x19171818, 0x0e0d0d0d, 0x0f0e0e0e, 0x06050505, 0x0c0b0b0b, + 0x100e100f, 0x09080908, 0x0c0c0a0b, 0x0f0f0e0e, 0x0c0c0a0b, 0x05040404, + 0x08070807, 0x0c0b0c0b, 0x05040504, 0x06050605, 0x100e100f, 0x09080908, + 0x09080908, 0x12101211, 0x09080908, 0x03020202, 0x08070707, 0x01000100, + 0x04030403, 0x07060606, 0x08070707, 0x08070707, 0x0f0e0f0e, 0x0b0a0b0a, + 0x03020302, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x03020202, + 0x09080908, 0x05040504, 0x00000000, 0x00000000, 0x0b0a0b0a, 0x0f0e0f0e, + 0x1a191a19, 0x77757576, 0xc3c1c2c2, 0x75737374, 0x1f1e1e1f, 0x06050505, + 0x07030605, 0x00000000, 0x03020302, 0x00000000, 0x00000000, 0x00000000, + 0x03020202, 0x04030403, 0x04030303, 0x02010101, 0x5b5a5959, 0xafacaeae, + 0x5a575859, 0x1b19191a, 0x0e0d0d0d, 0x100e100f, 0x100f0f0f, 0x11101010, + 0x12121111, 0x0c0c0a0b, 0x09090708, 0x0d0d0b0c, 0x0f0e0d0e, 0x0d0d0b0c, + 0x14131313, 0x1c1b1b1b, 0x322f3132, 0x514f504f, 0x2d2b2b2b, 0x2e2b2c2e, + 0x21202020, 0x201f1f1f, 0x15141414, 0x12101211, 0x0e0d0d0d, 0x08070807, + 0x0b0a0a0a, 0x100e0f0f, 0x07060706, 0x0a090a09, 0x0f0f0d0e, 0x0c0c0a0b, + 0x09090708, 0x0d0c0c0c, 0x0b0a0b0a, 0x06050605, 0x0b0a0b0a, 0x0c0b0c0b, + 0x08070807, 0x07060706, 0x0f0e0f0e, 0x0a090a09, 0x01000000, 0x05040504, + 0x03020202, 0x01000000, 0x08070707, 0x05040504, 0x09080908, 0x0d0c0d0c, + 0x07060606, 0x04030403, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x02010201, 0x06050505, 0x08070807, 0x01000100, 0x00000000, 0x00000000, + 0x0f0d0e0e, 0x2c2a2a2b, 0x9c9a9b9b, 0xcac7cac9, 0x4d4c4b4d, 0x1b19181a, + 0x0a090909, 0x00000000, 0x02010201, 0x05040504, 0x00000000, 0x00000000, + 0x00000000, 0x02010101, 0x06040605, 0x03020302, 0x100d0e10, 0x615f5f60, + 0x9d9a9c9c, 0x32313131, 0x14131313, 0x0d0c0c0c, 0x0f0e0e0e, 0x0e0d0d0d, + 0x100f0f0f, 0x11101010, 0x08070807, 0x06050405, 0x0f0e0e0e, 0x100e0f0f, + 0x0d0d0c0c, 0x2a282929, 0x3d3c3b3c, 0x38373637, 0x4f4d4d4f, 0x19181718, + 0x27262626, 0x14131313, 0x29272828, 0x2e2b2c2d, 0x201d1e20, 0x15121414, + 0x04030403, 0x07060606, 0x0c0b0c0b, 0x0a090a09, 0x08070707, 0x0e0d0e0d, + 0x0b0a0a0a, 0x07060606, 0x0c0b0b0b, 0x0a090909, 0x04030403, 0x0a090909, + 0x0e0d0e0d, 0x0a090a09, 0x09080908, 0x0e0d0e0d, 0x07060706, 0x08070807, + 0x08070807, 0x00000000, 0x01000000, 0x07060606, 0x04030403, 0x08070807, + 0x0e0d0e0d, 0x07060706, 0x06050605, 0x01000100, 0x00000000, 0x00000000, + 0x00000000, 0x02010201, 0x07060706, 0x01000100, 0x00000000, 0x00000000, + 0x01000100, 0x01000100, 0x322e3131, 0xa9a8a8a8, 0xb9b8b8b8, 0x39383639, + 0x1d1b1b1c, 0x0c0b0b0b, 0x04030303, 0x05040404, 0x07060706, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x06030605, 0x09060807, 0x0e0d0c0d, + 0x605e5e5f, 0x99959898, 0x2e2c2c2e, 0x13121212, 0x0d0d0b0c, 0x11111010, + 0x12121111, 0x13131212, 0x0f0e0e0e, 0x09080908, 0x03020302, 0x0c0b0b0b, + 0x0e0d0d0d, 0x11101010, 0x38363737, 0x514f4f50, 0x34333134, 0x46434546, + 0x24222124, 0x29262827, 0x04030303, 0x05040404, 0x14131313, 0x15151414, + 0x100f100f, 0x07060706, 0x07060606, 0x0d0c0d0c, 0x0a090909, 0x07060706, + 0x0f0e0f0e, 0x0c0b0c0b, 0x01000100, 0x0c0b0c0b, 0x0a090a09, 0x01000100, + 0x08070707, 0x12101211, 0x0b0a0b0a, 0x06050605, 0x0f0e0f0e, 0x07060706, + 0x04030403, 0x06050605, 0x02010201, 0x00000000, 0x05040504, 0x03020302, + 0x06050605, 0x0d0c0d0c, 0x08070707, 0x07060706, 0x100e100f, 0x05040504, + 0x01000100, 0x00000000, 0x02010201, 0x07060606, 0x03020202, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x06050605, 0xb8b6b6b7, 0xa4a2a3a3, + 0x2c2b2b2b, 0x1c191a1b, 0x0e0c0d0d, 0x08070707, 0x35323433, 0x1a191919, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x04020403, + 0x1b19191a, 0x68666667, 0x7d7a7c7c, 0x23212023, 0x1c191a1c, 0x0f0e0d0e, + 0x11111010, 0x11111010, 0x10100f0f, 0x0c0b0b0b, 0x09080908, 0x04030403, + 0x0e0d0e0d, 0x0e0d0d0d, 0x18171717, 0x52505150, 0x5d5a5b5c, 0x3b3a383a, + 0x3d3a3b3d, 0x24212224, 0x29252729, 0x06050505, 0x04030303, 0x04030403, + 0x06050505, 0x0c0b0b0b, 0x09080908, 0x04030303, 0x0d0c0c0c, 0x06050605, + 0x05040504, 0x0c0b0b0b, 0x08070807, 0x06050605, 0x09080908, 0x0c0b0c0b, + 0x05040504, 0x0a090909, 0x0e0d0e0d, 0x0a090a09, 0x09080908, 0x0f0e0e0e, + 0x09080908, 0x04030403, 0x09080908, 0x02010201, 0x00000000, 0x07060706, + 0x05040504, 0x07060606, 0x0f0d0f0e, 0x06050605, 0x08070807, 0x11101010, + 0x0c0b0b0b, 0x11101010, 0x09080808, 0x03020302, 0x05040404, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x7c7a7b7b, 0x1d1c1c1d, 0x100f0f0f, 0x110e1010, 0x07070607, 0x5b585a59, + 0x3e3b3d3c, 0x05040404, 0x00000000, 0x01010001, 0x00000000, 0x00000000, + 0x02010101, 0x1d1b1b1c, 0x615f6060, 0x54525253, 0x1e1c1b1e, 0x18151617, + 0x1b18181b, 0x1a171819, 0x1b181a1a, 0x1b18191a, 0x100e0f0f, 0x03020302, + 0x07060606, 0x0d0c0c0c, 0x120f1111, 0x27252626, 0x73727272, 0x706e6f6f, + 0x524f5152, 0x2f2c2d2f, 0x1e1c1d1d, 0x1f1c1e1e, 0x09070808, 0x03020202, + 0x04030303, 0x03020302, 0x0b0a0a0a, 0x08070807, 0x02010101, 0x0b0a0b0a, + 0x0b0a0b0a, 0x04030303, 0x0d0c0d0c, 0x09080808, 0x05040504, 0x0b0a0b0a, + 0x08070807, 0x02010201, 0x0c0b0c0b, 0x0c0b0b0b, 0x0a090a09, 0x08070807, + 0x100e100f, 0x06050605, 0x04030403, 0x07060706, 0x02010201, 0x00000000, + 0x06050605, 0x03020302, 0x09080908, 0x0d0c0d0c, 0x0d0c0c0c, 0x0a090909, + 0x0d0c0c0c, 0x15131314, 0x1b19191a, 0x1d1b1b1c, 0x11101010, 0x02010201, + 0x01000100, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x02010201, 0x1c1b1b1b, 0x100f0f0f, 0x07060606, 0x0b0b090b, + 0x8f898e8c, 0x37353636, 0x01010001, 0x00000000, 0x00000000, 0x01000000, + 0x00000000, 0x00000000, 0x14121313, 0x57545557, 0x43414142, 0x211e1f21, + 0x18161517, 0x18161518, 0x1f1d1c1f, 0x1b191a1a, 0x13111212, 0x0c0b0b0b, + 0x02010101, 0x08070807, 0x0f0e0e0e, 0x14121313, 0x403e3e40, 0x8b888a8a, + 0x68666767, 0x4c4a4a4c, 0x28252628, 0x23202123, 0x16141615, 0x03020202, + 0x06050605, 0x04030403, 0x04030403, 0x0d0c0c0c, 0x0c0b0c0b, 0x00000000, + 0x0a090a09, 0x0b0a0b0a, 0x03020302, 0x09080908, 0x0c0b0b0b, 0x04030403, + 0x0c0b0c0b, 0x0b0a0b0a, 0x01000100, 0x09080908, 0x0f0e0e0e, 0x09080908, + 0x0c0b0b0b, 0x0b0a0909, 0x03020202, 0x06050605, 0x08070707, 0x04030303, + 0x00000000, 0x06050605, 0x02010201, 0x0c0b0c0b, 0x0f0d0e0e, 0x05040504, + 0x0c0c0a0b, 0x100f0f0f, 0x0f0f0d0e, 0x14121313, 0x18161717, 0x100e0f0f, + 0x02010101, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x0b0a0a0a, 0x0d0c0c0c, 0x08070707, + 0x201f1f1f, 0xa29ca19f, 0x27262626, 0x00000000, 0x09080808, 0x05040404, + 0x02010101, 0x03010302, 0x03020302, 0x1b19191a, 0x35323335, 0x39373738, + 0x1c1a1a1c, 0x19161718, 0x1c1a191c, 0x211f1f21, 0x201d1d20, 0x14121313, + 0x09080808, 0x04030403, 0x09080908, 0x0b0a0a0a, 0x1d1b1b1c, 0x4f4c4d4e, + 0x87838685, 0x4a494749, 0x32303031, 0x1b1a1a1a, 0x1a191919, 0x13121312, + 0x03020302, 0x09080908, 0x0d0c0d0c, 0x0d0c0d0c, 0x100e0f0f, 0x09080908, + 0x01000000, 0x09080808, 0x09080808, 0x02010201, 0x0a090a09, 0x09080908, + 0x04030403, 0x0c0b0c0b, 0x07060706, 0x00000000, 0x08070807, 0x0a090909, + 0x09080808, 0x08070707, 0x0c0b090a, 0x03020000, 0x04030101, 0x06050405, + 0x03020202, 0x00000000, 0x05040504, 0x04030403, 0x07060606, 0x100e100f, + 0x07060706, 0x09080808, 0x11111010, 0x0d0d0b0c, 0x0e0e0c0d, 0x100f0f0f, + 0x0f0e0f0e, 0x03020302, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x08070707, 0x10100f0f, + 0x0c0a0b0b, 0x41403f40, 0x89858887, 0x1e1c1d1d, 0x00000000, 0x110f1010, + 0x37353636, 0x1f1e1e1e, 0x05040404, 0x06050505, 0x201d1f1f, 0x322f3031, + 0x33303132, 0x18151618, 0x19161719, 0x1f1d1c1f, 0x1c1a1a1b, 0x1c1a1a1b, + 0x17151516, 0x09080808, 0x04030403, 0x09080908, 0x0f0d0e0e, 0x1a191819, + 0x66646465, 0x77747676, 0x2b292a29, 0x07050706, 0x07060606, 0x27242726, + 0x25232424, 0x08070707, 0x09080808, 0x11101010, 0x12111111, 0x15131414, + 0x0b0a0b0a, 0x04030403, 0x04030203, 0x06050304, 0x01000000, 0x06050505, + 0x07060606, 0x07060606, 0x09080808, 0x09080808, 0x02010101, 0x07060606, + 0x100e0f0f, 0x09080808, 0x0c0b090a, 0x0a090708, 0x05040203, 0x03020000, + 0x05040203, 0x04030102, 0x03020000, 0x05040304, 0x03020202, 0x09080808, + 0x0a090a09, 0x08070807, 0x0a090a09, 0x110f1110, 0x0d0d0b0c, 0x0e0e0c0d, + 0x11110f10, 0x0c0b0b0b, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x04030303, + 0x0d0c0c0c, 0x0a090909, 0x504d4e4f, 0x78757776, 0x1a191919, 0x00000000, + 0x16151515, 0x31303030, 0x312f3030, 0x100f0f0f, 0x0b0a0b0a, 0x14121313, + 0x2c292a2c, 0x26232426, 0x19171619, 0x1b171a1b, 0x1f1c1d1f, 0x201d1d20, + 0x0d0c0c0c, 0x0a090909, 0x06050505, 0x05040504, 0x0c0b0b0b, 0x0b0a0a0a, + 0x22212121, 0x817d7f7e, 0x59575857, 0x17161616, 0x04030303, 0x01000000, + 0x07060706, 0x0c0b0b0b, 0x02010201, 0x04030403, 0x0b0a0a0a, 0x201e1e1f, + 0x17151616, 0x0d0c0c0c, 0x02010201, 0x04030303, 0x04030102, 0x04030101, + 0x05040202, 0x06050304, 0x05040203, 0x09080707, 0x05040303, 0x03020102, + 0x07060405, 0x09080607, 0x09080506, 0x0a090708, 0x0c0b090a, 0x06050303, + 0x04030101, 0x06050302, 0x03020000, 0x03020001, 0x04030102, 0x05040202, + 0x06050304, 0x0a090808, 0x07060505, 0x08070707, 0x0a090909, 0x0a090909, + 0x09080808, 0x12111111, 0x09080808, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x03020202, + 0x03020302, 0x09080908, 0x0a090a09, 0x110f1110, 0x66646465, 0x100e0f0f, + 0x05030504, 0x1f1d1e1f, 0x25242424, 0x22212121, 0x0f0e0e0e, 0x17151616, + 0x0f0e0e0e, 0x1e1d1d1d, 0x211e1e21, 0x1a18171a, 0x17151417, 0x1d1a1a1d, + 0x201d1e20, 0x19161719, 0x00000000, 0x00000000, 0x01000100, 0x15121414, + 0x16141415, 0x32303031, 0x78747776, 0x2f2e2d2d, 0x09080808, 0x06030605, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x09080808, 0x18171617, 0x11101010, 0x02010201, 0x01000000, 0x05040303, + 0x03020000, 0x05040202, 0x06050203, 0x04030102, 0x06050304, 0x04030001, + 0x03020001, 0x09090605, 0x09090607, 0x09080608, 0x16131216, 0x0a0a0709, + 0x06050305, 0x05040204, 0x07060406, 0x03020002, 0x03020001, 0x04030102, + 0x02010000, 0x06050304, 0x06050304, 0x03020001, 0x09080607, 0x100f0c0d, + 0x0e0d0a0b, 0x0d0c090a, 0x0d0c0b0b, 0x05040303, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x03020302, 0x0b0a0a0a, 0x09080908, 0x0e0d0e0d, 0x0f0e0f0e, + 0x0f0c0d0e, 0x07060606, 0x18171717, 0x27242526, 0x1d1b1c1c, 0x0b0a0a0a, + 0x16141515, 0x0d0c0c0c, 0x17161617, 0x1a18191a, 0x1a17181a, 0x18151618, + 0x1e1b1d1d, 0x201e1e1f, 0x17141516, 0x0e0b0c0e, 0x00000000, 0x00000000, + 0x03020202, 0x15121314, 0x2f2c2d2e, 0x58565657, 0x18161717, 0x06030505, + 0x01000100, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x08070707, 0x0c0b0b0b, 0x15131314, 0x07060606, 0x01000000, + 0x03020001, 0x03020000, 0x05040202, 0x09080607, 0x05040102, 0x0b0a0809, + 0x0e0c0b0c, 0x18121110, 0x271b1d19, 0x39262a25, 0x4f363b33, 0x6042483f, + 0x3f2a2a1f, 0x36272314, 0x3a2c2615, 0x3a2e2717, 0x382e2617, 0x3c322b1c, + 0x362e271b, 0x29221c12, 0x28231e17, 0x1815120f, 0x0d0b0909, 0x0b0a0808, + 0x0c0c090b, 0x100f0c0d, 0x11100d0e, 0x100f0c0d, 0x03020001, 0x02010000, + 0x03020000, 0x01000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x05040504, 0x03020302, 0x00000000, 0x0a090909, 0x0a090909, 0x08070707, + 0x0c0b0c0b, 0x09080908, 0x09080908, 0x0a090909, 0x1b191a1a, 0x211e1f20, + 0x14111214, 0x2c292a2b, 0x19171619, 0x1e1c1c1e, 0x1d1b1b1d, 0x14131213, + 0x0d0c0c0c, 0x13121212, 0x13121212, 0x0b0a0b0a, 0x00000000, 0x00000000, + 0x00000000, 0x04030403, 0x17151516, 0x1a191919, 0x2d2b2c2c, 0x04030303, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x03020302, 0x100e0f0f, 0x1c191b1b, 0x0f0e0f0e, 0x03020202, + 0x00000000, 0x07050404, 0x09060507, 0x0a080606, 0x05050202, 0x09060607, + 0x32272a2f, 0x5e43535c, 0x8f617882, 0xd18b9f91, 0xffacbb99, 0xffafb687, + 0xffb3b37a, 0xffb6b071, 0xffb8ae69, 0xffb8ab61, 0xffbaaa5c, 0xffbca955, + 0xffbea854, 0xffbfa958, 0xf9bba553, 0xefb29e4c, 0xe7ae9d56, 0xb88e7f49, + 0xa27d7046, 0x7c5d5436, 0x4c3c3629, 0x1f1c1a1a, 0x12120f12, 0x08060404, + 0x03010000, 0x03020000, 0x03020000, 0x03020000, 0x01000000, 0x00000000, + 0x00000000, 0x02010201, 0x06050605, 0x02010201, 0x07060706, 0x0a090a09, + 0x0b0a0a0a, 0x17141616, 0x0a090a09, 0x08070807, 0x05040504, 0x0e0d0d0d, + 0x11101010, 0x0d0b0b0d, 0x4b49494a, 0x4f4c4e4e, 0x100e0f0f, 0x0f0e0e0e, + 0x08070707, 0x0b0a0a0a, 0x0a090a09, 0x0d0c0d0c, 0x07060706, 0x00000000, + 0x00000000, 0x00000000, 0x03020302, 0x0e0d0e0d, 0x1a181819, 0x0e0d0d0d, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x02010201, 0x01000000, 0x12111111, 0x14131313, 0x13121212, + 0x03020102, 0x02010000, 0x02010000, 0x0e0c090b, 0x03030000, 0x0d0a090a, + 0x4330393e, 0x9269808b, 0xd193b0b5, 0xf7a9c1ab, 0xffa8bea4, 0xffa5bda7, + 0xffa9b998, 0xffafb27d, 0xffb6ae6c, 0xffb6ac65, 0xffb8aa5c, 0xffbda650, + 0xffbda54d, 0xffbea54c, 0xffbfa54c, 0xffbea54c, 0xffbfa44b, 0xffbfa64c, + 0xffbfa54c, 0xffbca34b, 0xffb09a46, 0xffa48f40, 0xeb988541, 0xb67d7040, + 0x59463f2d, 0x07070506, 0x03030002, 0x04030002, 0x03020000, 0x03020000, + 0x02010000, 0x01000000, 0x06050605, 0x08070707, 0x01000100, 0x06050605, + 0x0a090a09, 0x07060706, 0x0e0c0d0d, 0x120f1111, 0x06050605, 0x0b0a0b0a, + 0x05040404, 0x02020102, 0x12111111, 0x5b595a5a, 0x48464747, 0x06050505, + 0x03020202, 0x02010201, 0x01000101, 0x00000000, 0x00000000, 0x01000100, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x05040504, 0x04030403, + 0x0c0a0a0b, 0x06050505, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x100f0f0f, 0x22201f21, + 0x14131112, 0x08080305, 0x02010000, 0x02010000, 0x00000000, 0x06040404, + 0x62455259, 0xbf86a4b0, 0xf4abcccb, 0xfdadbd9b, 0xffadb891, 0xffabbb9b, + 0xffacb686, 0xffb6ab66, 0xffbaa958, 0xffb9a85c, 0xffb6ac67, 0xffbaa95c, + 0xffbba752, 0xffbda64e, 0xffc0a54d, 0xffc0a44c, 0xffbea64d, 0xffbfa64c, + 0xffbea64c, 0xffbfa64c, 0xfebfa54a, 0xffbea54b, 0xffbba249, 0xffb39b45, + 0xff9b8739, 0xff8f7b32, 0xf8907e37, 0xac756b43, 0x1c1a1711, 0x02020001, + 0x02000000, 0x02010000, 0x03020000, 0x05040303, 0x07060606, 0x01000100, + 0x07060706, 0x09080908, 0x07060706, 0x0d0c0d0c, 0x0b0a0a0a, 0x0f0c0d0e, + 0x07060706, 0x04030403, 0x00000000, 0x0a090909, 0x33323232, 0x211f2021, + 0x02020101, 0x00000000, 0x00000000, 0x00000000, 0x01000100, 0x03010202, + 0x05040404, 0x03010202, 0x05030403, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x02010101, 0x03020001, + 0x08070506, 0x12100e10, 0x0a080606, 0x06040203, 0x0b070707, 0x241b1c1f, + 0x7752656c, 0xe9a1c9d6, 0xffb7ddea, 0xfcb7cbb7, 0xf9abb890, 0xfeb1b98a, + 0xfeb4b175, 0xffb8ad69, 0xffb7ab65, 0xffb2af70, 0xffafb484, 0xffb1b380, + 0xffb8aa5c, 0xffbea54e, 0xffbfa64b, 0xffc0a64d, 0xffc0a64b, 0xffbfa64b, + 0xffc0a54d, 0xffbfa54a, 0xffbca34a, 0xffbba149, 0xffbba149, 0xffbaa04a, + 0xffb59c44, 0xffb09845, 0xffab9543, 0xfe9d883a, 0xfe8d7931, 0xff88772f, + 0xde837744, 0x4c3b372d, 0x06050303, 0x03020001, 0x03020000, 0x05040203, + 0x03020202, 0x01000100, 0x07060706, 0x06050605, 0x0e0d0e0d, 0x0d0c0d0c, + 0x04030403, 0x08070807, 0x03020202, 0x00000000, 0x00000000, 0x08070707, + 0x06060505, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x06050505, 0x05040404, 0x04030302, 0x0a09080a, 0x03020302, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x01000000, 0x02010201, 0x04030303, + 0x03020101, 0x02010000, 0x03020000, 0x04030002, 0x1c141617, 0x452d383c, + 0x8858707b, 0xe89fc8d4, 0xffb6dadf, 0xffbbd3c8, 0xfebac5a4, 0xffbab885, + 0xffb9ba89, 0xf7b0b68a, 0xfeb1b88a, 0xffacb484, 0xffaeb484, 0xffafb382, + 0xffb7ab62, 0xffbfa547, 0xffc0a44b, 0xffbea64b, 0xffbca248, 0xffb9a049, + 0xffb8a04a, 0xffb69f4d, 0xffb39c49, 0xffad9644, 0xffa89242, 0xffa58f3f, + 0xffa18b3b, 0xffa08b39, 0xffa5903e, 0xffa38c3b, 0xff958035, 0xff907c34, + 0xfe887531, 0xfe7e6c29, 0xff7c6a29, 0xe6837540, 0x5a4b473d, 0x08070506, + 0x03020001, 0x04030102, 0x0a090909, 0x09080908, 0x04030403, 0x0e0d0e0d, + 0x0e0d0e0d, 0x04030403, 0x04030403, 0x04030303, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x03010201, 0x01000100, 0x01000100, + 0x05030403, 0x02010101, 0x01000100, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x03020302, + 0x08070707, 0x06050304, 0x05040202, 0x02010000, 0x02010000, 0x5d3f4d51, + 0xc37da2af, 0xeb8bbed2, 0xffacd1d2, 0xffb6d2cc, 0xffb9d5d0, 0xfeb7cab6, + 0xffbac39f, 0xffb9c4a1, 0xfeb6c19a, 0xffb5b785, 0xffb5b279, 0xfeb1af76, + 0xffb1b179, 0xffbaa859, 0xffbea64d, 0xffc0a74e, 0xffb9a248, 0xffa48d3a, + 0xffab9752, 0xffb1a268, 0xffbeb282, 0xffcdc4a0, 0xffd2caa8, 0xffbfb79a, + 0xffb9b197, 0xff938b6e, 0xffcbc3a7, 0xff9b9171, 0xff998b60, 0xff897737, + 0xff83702a, 0xff82702b, 0xff7e6c2b, 0xff7a6827, 0xff766525, 0xff7a6828, + 0xc9887c57, 0x17161211, 0x06050304, 0x05040203, 0x05040404, 0x00000000, + 0x0b0a0b0a, 0x0a090a09, 0x07060706, 0x07060706, 0x02010101, 0x00000000, + 0x00000000, 0x00000000, 0x01000100, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x03020202, 0x09080808, 0x01000100, 0x00000000, 0x00000000, 0x00000000, + 0x02010201, 0x03020302, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x05050404, 0x0d0c0c0c, 0x08070507, 0x06050204, 0x07060305, 0x03030000, + 0x38263032, 0xd482acb7, 0xff96c5c8, 0xff9ac1bf, 0xff9acbd5, 0xfe9cd2e5, + 0xffb1d5d8, 0xffaec7b4, 0xffb1ba90, 0xffb6af72, 0xffb9aa5c, 0xffb7ac64, + 0xffb1b07c, 0xffaeb27d, 0xffb0b178, 0xffa8b68e, 0xffa5c0aa, 0xffb29e4b, + 0xffa48f3f, 0xffdfd7b7, 0xfff9f7f1, 0xfffaf8f4, 0xffe5e5e2, 0xffcacac8, + 0xffd2d2d1, 0xffebebea, 0xffd8d8d7, 0xffb0b0ae, 0xfff2f2f1, 0xffc6c6c6, + 0xffc3c2c1, 0xffe7e3da, 0xffada78c, 0xff8f7f45, 0xff786424, 0xff7a682c, + 0xfe8b7c48, 0xff978a57, 0xff887a42, 0x805d5b4a, 0x13120f13, 0x0b0a0808, + 0x09090607, 0x0d0c0c0c, 0x0a090a09, 0x02010201, 0x05040504, 0x01000100, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x01000100, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x03020202, 0x08070707, 0x05050405, 0x01000100, 0x00000000, + 0x00000000, 0x00000000, 0x05040504, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x03020202, 0x0a090909, 0x05040202, 0x07070202, 0x07060302, + 0x0e090a09, 0x89567077, 0xff94c4cc, 0xff8bbfc3, 0xfe92c7d5, 0xff91cfeb, + 0xff9dd4ec, 0xffa0cfd8, 0xff9bbeb4, 0xffa9b487, 0xffb3ad68, 0xffafb074, + 0xffadb486, 0xffa7b894, 0xffabb68f, 0xffa8b99b, 0xffa3bca9, 0xffa4b189, + 0xffaba15b, 0xffb5a460, 0xffe5dfc8, 0xfffefefe, 0xfffdfdfe, 0xfffdfdfd, + 0xffd2d2d3, 0xffeeeeee, 0xfff2f2f2, 0xffcecece, 0xffc9c9c9, 0xffaaaaaa, + 0xfff7f7f7, 0xffe4e4e4, 0xffc5c5c4, 0xffe2e1e3, 0xffc3c2c5, 0xffdad5cc, + 0xffa89d71, 0xffeceada, 0xfffffffe, 0xfffafafd, 0xfff3f2e7, 0x8978766e, + 0x100f0c0f, 0x0c0b0909, 0x13121011, 0x0e0d0d0d, 0x00000000, 0x04030403, + 0x03020302, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x01000100, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x04040303, 0x09090809, 0x09080808, 0x0f0e0d0f, 0x03030202, 0x02010101, + 0x07050606, 0x00000000, 0x02010201, 0x08070807, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x02010101, 0x07060505, 0x04030102, 0x04040000, + 0x02020000, 0x35242a2f, 0xc06b99ac, 0xff7bc1d9, 0xff79c0d5, 0xfe7bc3e1, + 0xff8bc6d3, 0xffa8c5aa, 0xffb1bc85, 0xffafbb85, 0xffacbe95, 0xffb1b884, + 0xffadbb94, 0xffa9bc9e, 0xffa6b99a, 0xffa2b99f, 0xff9db497, 0xff9ca57b, + 0xffa09552, 0xffaf9b55, 0xffd4c8a2, 0xfff8f5ee, 0xffffffff, 0xfffefefc, + 0xfffefefe, 0xfff1f1f1, 0xffe2e2e2, 0xff737373, 0xffdadada, 0xffe7e7e7, + 0xffc2c2c2, 0xffc8c8c8, 0xffcfcfcf, 0xffececec, 0xffcececd, 0xffb2b2b2, + 0xfff6f6f6, 0xffc1c2c4, 0xffe5e6e6, 0xfffcfbfc, 0xff8c8b8d, 0xfec5c5c3, + 0xffffffff, 0xc8c7c7c7, 0x11110e0f, 0x0c0b080a, 0x0d0d090a, 0x0a090909, + 0x04030403, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x01000100, 0x05040504, 0x01000100, 0x00000000, 0x00000000, 0x00000000, + 0x06050505, 0x0c0b0b0c, 0x0a0a0909, 0x07070607, 0x02010101, 0x01000000, + 0x03020202, 0x06050605, 0x08060707, 0x06050605, 0x04030403, 0x00000000, + 0x00000000, 0x00000000, 0x03020101, 0x09080508, 0x0c0c0808, 0x05040203, + 0x02020000, 0x06050303, 0x6745555a, 0xde6fabc2, 0xff6bc0e0, 0xff6cbee2, + 0xfe5fbae1, 0xff7bbcb7, 0xffccca5e, 0xffedce35, 0xffeace3e, 0xffe7d146, + 0xffebd13f, 0xffeece35, 0xffeccc3c, 0xffdccf56, 0xffcdd077, 0xffd6d373, + 0xffdbcb60, 0xffdac24e, 0xffdfc451, 0xfff1ebcb, 0xffffffff, 0xffffffff, + 0xffffffff, 0xffffffff, 0xffffffff, 0xfffcfcfc, 0xffdadada, 0xffb8b8b8, + 0xffd9d9d9, 0xffd9d9d9, 0xffe5e5e5, 0xffe3e3e3, 0xffd3d3d3, 0xffdedfde, + 0xffe3e3e3, 0xffd3d3d4, 0xffefefef, 0xffd8d8d7, 0xfff4f4f4, 0xffacabab, + 0xff868686, 0xffffffff, 0xfefcfcfa, 0xe3a09f9f, 0x2d262325, 0x04040102, + 0x05040102, 0x05040303, 0x01000101, 0x00000000, 0x02010201, 0x00000000, + 0x01000000, 0x00000000, 0x06050505, 0x0d0c0c0c, 0x00000000, 0x00000000, + 0x02010201, 0x19181718, 0x11100f11, 0x08080707, 0x04040303, 0x00000000, + 0x00000000, 0x00000000, 0x05040504, 0x02010101, 0x07060706, 0x04030303, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x08070506, 0x11100c0f, + 0x06040305, 0x00000000, 0x2b1e2223, 0xce89acb6, 0xf872bddb, 0xff5eb9e2, + 0xff64bce3, 0xfe55b5e6, 0xff5eb6d3, 0xffc3ce6a, 0xffffda27, 0xfffeda25, + 0xfffdda2e, 0xffffd928, 0xffffd926, 0xffffd925, 0xffffda25, 0xfffdd92e, + 0xfffade3f, 0xfffeda33, 0xfffed933, 0xffffdc36, 0xffe2cd57, 0xffb6a878, + 0xffb1afa7, 0xffb1b0b2, 0xffacacac, 0xffababab, 0xffefefef, 0xffffffff, + 0xfffafafa, 0xfff7f7f7, 0xfff8f8f8, 0xfffbfbfb, 0xffe8e8e8, 0xffd6d6d6, + 0xffececec, 0xfff5f5f5, 0xffededed, 0xfffcfcfc, 0xfffbfbfb, 0xfffdfdfd, + 0xfffefefe, 0xffe7e7e7, 0xfff7f7f7, 0xffffffff, 0xfeb4b3b4, 0xff504c4d, + 0x9a404242, 0x00000000, 0x02010000, 0x02010000, 0x00000000, 0x00000000, + 0x00000000, 0x01000100, 0x01000100, 0x0a090909, 0x0c0b0b0b, 0x312f3030, + 0x0d0c0c0c, 0x0e0d0d0d, 0x0c0b0b0b, 0x04010302, 0x03010201, 0x04040303, + 0x04030303, 0x00000000, 0x00000000, 0x01000100, 0x06050605, 0x0d0c0d0c, + 0x03020302, 0x00000000, 0x00000000, 0x00000000, 0x01000000, 0x05040404, + 0x09080607, 0x09080605, 0x07060405, 0x29212022, 0xca85a8b2, 0xff79c1de, + 0xfe4baad5, 0xff55b3db, 0xff57b6e0, 0xff62b8cd, 0xffbacb74, 0xffffda26, + 0xfffed924, 0xfffed82a, 0xfffed92d, 0xfffed828, 0xfffed926, 0xffffd829, + 0xfffcdb30, 0xfffdda34, 0xfffed92f, 0xfffdd92e, 0xfffddc2f, 0xfffdd829, + 0xfffad734, 0xfff1d565, 0xffedece7, 0xfff5f5f6, 0xfffefefe, 0xfffdfdfd, + 0xffb9b9b9, 0xffbebebe, 0xfff3f3f3, 0xfff8f8f8, 0xfffbfbfb, 0xfffdfdfd, + 0xfff7f7f7, 0xffeaeaea, 0xffe6e6e6, 0xfff6f6f6, 0xfff7f7f7, 0xffc7c7c7, + 0xfff8f8f8, 0xfffbfbfb, 0xfffefefe, 0xffffffff, 0xfef9f9f9, 0xfeaca9ab, + 0xff3d393d, 0xff4d484c, 0xe9424242, 0x01010101, 0x04030101, 0x02010000, + 0x00000000, 0x03020202, 0x0a090909, 0x00000000, 0x07060706, 0x0d0c0c0c, + 0x28262727, 0x47454547, 0x08070707, 0x0e0d0d0d, 0x0f0f0d0e, 0x08070707, + 0x07060606, 0x05040405, 0x03020203, 0x02010102, 0x02010201, 0x0a090a09, + 0x09080908, 0x01000100, 0x00000000, 0x00000000, 0x00000000, 0x04030304, + 0x0c0a0b0b, 0x0b0a0909, 0x0b0a0708, 0x09080608, 0x1d17171a, 0xaf6e8f97, + 0xff79c2dd, 0xff49a8d3, 0xff4eaed8, 0xff55b6e2, 0xff6ebbc1, 0xffc9ce64, + 0xffffda26, 0xfffdd927, 0xfffed926, 0xfffcd929, 0xfffddc40, 0xfffdda2f, + 0xfffed926, 0xfffdda2b, 0xfffedc33, 0xfffdda2f, 0xfffdd92b, 0xffffd827, + 0xfffdd827, 0xffffda27, 0xffe7d16d, 0xff959590, 0xff6f6b6e, 0xff8f8d8e, + 0xffb0afaf, 0xffd2d1d1, 0xffdfdfdf, 0xffededed, 0xffc4c4c4, 0xffc8c8c8, + 0xffd6d6d6, 0xffdbdbdb, 0xffd8d8d8, 0xffd4d4d4, 0xff9d9d9d, 0xff8e8d8e, + 0xffb1b0b1, 0xfff7f7f7, 0xffe0e0e0, 0xfffcfdfd, 0xfffdfdfd, 0xffddddde, + 0xfb9f9d9e, 0xfc2d292c, 0xff2e282c, 0xfe5a565b, 0xf63c383b, 0x54333333, + 0x08080708, 0x07060304, 0x02010101, 0x02010201, 0x08070807, 0x06050505, + 0x0d0c0c0c, 0x201e1f1f, 0x34313333, 0x18171617, 0x0e0e0d0e, 0x17171617, + 0x1d1c1b1d, 0x02010101, 0x00000000, 0x02000001, 0x01000001, 0x03010103, + 0x07060706, 0x09080908, 0x04030403, 0x00000000, 0x00000000, 0x00000000, + 0x03020203, 0x09050607, 0x0b090909, 0x13111012, 0x09070607, 0x110e0d0f, + 0xbc7899a6, 0xff77c1dd, 0xff4ba8d4, 0xfe4dadd6, 0xff56b7e1, 0xff78bcbb, + 0xffd8d152, 0xffffda26, 0xffffd827, 0xfffcdd43, 0xfffcdc3f, 0xfffbdf5e, + 0xfff9dd4e, 0xfffddc3f, 0xfffddb38, 0xfffed927, 0xfffdd928, 0xfffdd92d, + 0xfffdd829, 0xfffed829, 0xfffed928, 0xfff5d430, 0xffa39036, 0xff534d49, + 0xffaba8aa, 0xfffdfdfd, 0xffdadada, 0xffd8d7d7, 0xffc4c4c4, 0xffbfbfbf, + 0xffc7c7c7, 0xffcecece, 0xffc3c3c3, 0xffbfbfbf, 0xffb8b8b8, 0xffb1b1b1, + 0xffb8b8b8, 0xffdedede, 0xffb6b6b6, 0xfff1f1f1, 0xffffffff, 0xffc2c1c1, + 0xff757274, 0xfe4b474b, 0xff2b252a, 0xff2e292d, 0xfe332e32, 0xff514c50, + 0xfe514d50, 0xab606262, 0x1a1a1a1a, 0x0b0b0809, 0x04030203, 0x03020302, + 0x0a090a09, 0x0e0d0d0d, 0x13121112, 0x29262729, 0x19171818, 0x06050605, + 0x0d0c0c0d, 0x1b1a1a1a, 0x1a171819, 0x03010103, 0x01000000, 0x01000001, + 0x02000002, 0x02000002, 0x00000000, 0x03020302, 0x06050605, 0x01000000, + 0x00000000, 0x01000000, 0x04000002, 0x06000001, 0x0f07080a, 0x0d0a070a, + 0x03030101, 0xbd789ba2, 0xff75c1de, 0xff4eacd5, 0xfe4bacd2, 0xff56b4e1, + 0xff69bacf, 0xffbecb6f, 0xfffed928, 0xffffd822, 0xfffbdc3c, 0xfffbe068, + 0xfff9e179, 0xfff8e37d, 0xfffcdf65, 0xfffcdd4d, 0xfffbdb37, 0xfffed928, + 0xfffddb31, 0xfffdd932, 0xfffed82a, 0xfffeda29, 0xffffda2b, 0xfff5d333, + 0xff82722d, 0xff2d2925, 0xff959298, 0xffffffff, 0xffffffff, 0xffffffff, + 0xffffffff, 0xfff6f6f6, 0xfff0f0f0, 0xffe6e6e6, 0xffe2e2e2, 0xffe5e5e5, + 0xffededed, 0xfff5f5f5, 0xfff7f7f7, 0xffb9b9b9, 0xffe4e4e3, 0xffe9e8e9, + 0xff5c5a5d, 0xff342f33, 0xff2e292d, 0xff2d282c, 0xff2d282c, 0xff2e292d, + 0xff322d31, 0xff464044, 0xff4a464a, 0xea8f8e8f, 0x5c524f52, 0x0c0c080a, + 0x04030102, 0x07060706, 0x0e0d0e0d, 0x11101010, 0x16151515, 0x14131213, + 0x05040404, 0x00000000, 0x22212121, 0x0e0d0c0e, 0x04020204, 0x01000001, + 0x02000002, 0x01000001, 0x01000001, 0x03000002, 0x04030403, 0x02010101, + 0x00000000, 0x03020302, 0x02000101, 0x05000001, 0x05000001, 0x08010203, + 0x0f060407, 0x0c050407, 0xa069838b, 0xff89c6df, 0xfe59b3d7, 0xff4ba8d3, + 0xff57b5e1, 0xff5ebad5, 0xff9fc28d, 0xfffad930, 0xffffd824, 0xfffdd830, + 0xfffbdf57, 0xfff8e272, 0xfff6e589, 0xfff7e283, 0xfff6e382, 0xfff8df64, + 0xfffddc4a, 0xfffdda2c, 0xfffdd82b, 0xfffacf26, 0xfffbd527, 0xfffed827, + 0xfffed825, 0xfffdd225, 0xff917a39, 0xff352d33, 0xff302e30, 0xff3b373a, + 0xff5b585b, 0xff7e7b7e, 0xffaeaeae, 0xffeaeaea, 0xfff4f3f4, 0xfff6f6f6, + 0xfff5f5f5, 0xfff5f5f5, 0xfff6f6f6, 0xffd8d8d8, 0xffa7a7a7, 0xffefefef, + 0xffe5e4e5, 0xff524d51, 0xff2b262b, 0xff2f2a2e, 0xff2f2a2e, 0xff2f2a2e, + 0xff2e292d, 0xff2e292d, 0xff302b2f, 0xff433d41, 0xff484246, 0xff939192, + 0x8c6c6b6d, 0x10100d0e, 0x07060405, 0x0d0c0c0c, 0x0e0d0e0d, 0x12111111, + 0x11101010, 0x09080808, 0x07060706, 0x08070707, 0x08060608, 0x01000000, + 0x01000001, 0x02000002, 0x01000001, 0x01000001, 0x08050608, 0x06050605, + 0x05040404, 0x00000000, 0x03020202, 0x04020202, 0x05000001, 0x06000001, + 0x0a020304, 0x0c040305, 0x0a030003, 0x764e5e64, 0xff97ccdb, 0xff80c0d6, + 0xff5dafd0, 0xff52b3dd, 0xff53b3df, 0xff6ebac3, 0xffcbd06b, 0xffedd53c, + 0xfffed824, 0xfffcdb3a, 0xfffae36a, 0xfff8e173, 0xfff8e387, 0xfff6e593, + 0xfff7e27b, 0xfff9e170, 0xfffbdf57, 0xfffed92c, 0xfffdd528, 0xffdab123, + 0xffc2a528, 0xfffcd82a, 0xffe1b824, 0xffb89326, 0xff40392e, 0xff726e71, + 0xffbcbbbb, 0xffdad9da, 0xffdbdada, 0xffebeaea, 0xffc9c8c9, 0xffa5a4a4, + 0xffa9a8a8, 0xffacabab, 0xffadabac, 0xffaeacad, 0xffbbbbbb, 0xffdadada, + 0xffffffff, 0xffffffff, 0xff989798, 0xff2f2a2e, 0xff2e292d, 0xff2e292d, + 0xff2f2a2e, 0xff2e292d, 0xff2f2a2e, 0xff2e292d, 0xff312c30, 0xff423d41, + 0xfe474246, 0xff8b898b, 0x71434344, 0x12110e0f, 0x0c0b080a, 0x110f0f10, + 0x0f0e0f0e, 0x13111312, 0x09080908, 0x03020302, 0x06050505, 0x100f0f0f, + 0x01000001, 0x01000001, 0x01000001, 0x02000001, 0x03000103, 0x0d0a0b0d, + 0x13101113, 0x03020202, 0x05040504, 0x02010101, 0x04010101, 0x06000001, + 0x07000002, 0x08000103, 0x0e050406, 0x06000000, 0x50364144, 0xf8a7ccd2, + 0xf599c5d0, 0xfd93c4d4, 0xff57b3d6, 0xff41a0cc, 0xff5ab6dc, 0xff82c4d1, + 0xffafca90, 0xfff5d732, 0xffffd925, 0xfffedb33, 0xfffcdb3f, 0xfff9e170, + 0xfff7e48e, 0xfff8e27c, 0xfff8e16d, 0xfff9df62, 0xfffbde52, 0xfffedb2b, + 0xfffdc825, 0xff9c8129, 0xff74652a, 0xffb79b2e, 0xff7a6026, 0xff4d4533, + 0xff97959a, 0xfffbfdfa, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, + 0xffffffff, 0xffffffff, 0xffffffff, 0xfffefefe, 0xfffbfbfb, 0xfffefffe, + 0xffffffff, 0xffffffff, 0xfffefefe, 0xfff6f6f6, 0xff656365, 0xff2c272b, + 0xff2e282c, 0xff2d282c, 0xff2e292d, 0xff2d282c, 0xff2f2a2e, 0xff2f2a2e, + 0xff302b2f, 0xff423d41, 0xff3d383c, 0xff807e81, 0xa7676969, 0x13100d0e, + 0x04030102, 0x0b0a090b, 0x0f0e0f0e, 0x09080908, 0x04030303, 0x0a090a09, + 0x0f0e0e0e, 0x24222223, 0x01000001, 0x02000002, 0x01000001, 0x00000000, + 0x04030303, 0x02010201, 0x02000002, 0x00000000, 0x05040504, 0x04030303, + 0x04000001, 0x07000002, 0x07000002, 0x0c040404, 0x08020000, 0x01000000, + 0x583b4549, 0xf7a1c8d2, 0xd38cabb5, 0xfc6ebbd8, 0xff3191be, 0xff50b1d8, + 0xff63bde5, 0xff88c3b5, 0xffe4d449, 0xffffda29, 0xffffd826, 0xffffd829, + 0xfffddc3a, 0xfff9e06d, 0xfff8e277, 0xfffbde4f, 0xfffcdb43, 0xfffddd49, + 0xfff5d447, 0xffeaca44, 0xffc19930, 0xff4e4429, 0xff51472d, 0xff433828, + 0xff302c2c, 0xff726d70, 0xfff8f8f8, 0xfffffefe, 0xfffdfdfd, 0xfffefdfe, + 0xfffdfdfd, 0xfffefefe, 0xfffefefe, 0xfffdfdfd, 0xfffefefe, 0xfffefefe, + 0xfffefefe, 0xfffefefe, 0xfffefefe, 0xfffdfdfd, 0xfffefefe, 0xffe3e2e2, + 0xff524d50, 0xfe2c272b, 0xff2b282b, 0xfe2d282c, 0xff2e282c, 0xff2e292d, + 0xff2e292d, 0xff2f2a2e, 0xff2e292d, 0xff3f3a3e, 0xff353034, 0xff767477, + 0xbb757878, 0x110a0809, 0x05000000, 0x07030103, 0x05030403, 0x01000100, + 0x0d0c0d0c, 0x0d0c0c0c, 0x201e1e1f, 0x25222325, 0x01000001, 0x02000002, + 0x08050607, 0x0f0c0d0f, 0x02000002, 0x02000001, 0x01000000, 0x04000000, + 0x01000000, 0x04030303, 0x04010201, 0x05000001, 0x07000002, 0x09040103, + 0x04010100, 0x0e080709, 0xbf7c9aa0, 0x9b647c82, 0xc46597a8, 0xff3996bd, + 0xff42a3cd, 0xff56b4e0, 0xff6ab9ca, 0xffcdd162, 0xffefd846, 0xfffcd829, + 0xfffed929, 0xfffed925, 0xfffcda3b, 0xfffae273, 0xfffbdc40, 0xffffd929, + 0xfffed927, 0xffffdb33, 0xffe5b32d, 0xffad8e3e, 0xff63583b, 0xff2f2a29, + 0xff383129, 0xff2f292e, 0xff2c2a2b, 0xff817d81, 0xffffffff, 0xfffefefe, + 0xfffefefe, 0xfffdfefd, 0xfffefefe, 0xfffefefe, 0xfffefefe, 0xfffefefe, + 0xfffefefe, 0xfffefefe, 0xfffefefe, 0xfffefefe, 0xfffefefe, 0xfffefefe, + 0xfffefefe, 0xffd4d3d3, 0xff413f41, 0xff2f292d, 0xf1464748, 0xff2d292d, + 0xff2c292e, 0xff302b2e, 0xff2d282c, 0xff2e292d, 0xff2e292d, 0xff3a373a, + 0xff332f33, 0xff787376, 0xb9727373, 0x12080508, 0x0a030003, 0x0c030204, + 0x0b060608, 0x1c181a1b, 0x19161718, 0x16161515, 0x19181818, 0x12111111, + 0x04020204, 0x0f0c0d0f, 0x0b08090b, 0x01000001, 0x02000001, 0x04000001, + 0x07000002, 0x07000002, 0x06000002, 0x05000001, 0x03000000, 0x04010102, + 0x05020102, 0x06040203, 0x0e0b0709, 0xae758d92, 0xa96d868c, 0x76445b64, + 0xfa4fa0c4, 0xff399ac6, 0xff4bacd3, 0xff4eadd4, 0xff8bc2a5, 0xffc0cc75, + 0xffe7d440, 0xffffda2b, 0xfffeda28, 0xfffdd927, 0xfffcdf53, 0xfffcdc45, + 0xfffed825, 0xffffd825, 0xfffed629, 0xfffed731, 0xffd8a428, 0xff715b2c, + 0xff372f2b, 0xff2e2a2b, 0xff322b2c, 0xff2f292e, 0xff2d292d, 0xff777576, + 0xfffdfdfd, 0xfffefefe, 0xfffefefe, 0xfffefefe, 0xfffefefe, 0xfffefefe, + 0xfffefefe, 0xfffefefe, 0xfffefefe, 0xfffefefe, 0xfffefefe, 0xfffdfdfd, + 0xfffefefe, 0xfffefefe, 0xfffefefe, 0xffc9c8c8, 0xfe363236, 0xff322f32, + 0x583d3a3c, 0xe23d3d3e, 0xff2b272a, 0xfe2c272b, 0xff2d282c, 0xff2e292d, + 0xff2f2a2e, 0xff342f33, 0xfe332f33, 0xff757174, 0xb36c6d6d, 0x12060507, + 0x0b030003, 0x0b020204, 0x07010103, 0x15121314, 0x221f2022, 0x211e1e20, + 0x0f0b0b0d, 0x0e0b0c0d, 0x1a17181a, 0x0c090a0b, 0x03000001, 0x05000001, + 0x06000002, 0x07000002, 0x07000002, 0x07000002, 0x06000001, 0x07000002, + 0x05000001, 0x03010101, 0x04010000, 0x04000000, 0x835a686c, 0xb97d9698, + 0x3822292d, 0xeb62a5c0, 0xff3797c1, 0xfe40a1ca, 0xff48a7d2, 0xff63b9d1, + 0xff98c498, 0xffe7d445, 0xffffda27, 0xfffdd926, 0xffffd826, 0xfffcda38, + 0xfffddd45, 0xfffdda25, 0xfffed826, 0xfffeda2b, 0xfffbc521, 0xffffd12e, + 0xffcf9822, 0xff584a33, 0xff302c2a, 0xff2f2b2c, 0xff2d292d, 0xff2d292c, + 0xff2e282c, 0xff747172, 0xfffbfbfb, 0xfffefefe, 0xfffefefe, 0xfffefefe, + 0xfffefefe, 0xfffefefe, 0xfffefefe, 0xfffefefe, 0xfffefefe, 0xfffefefe, + 0xfffefefe, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffc3c3c4, + 0xff322e32, 0xff383738, 0x332a2929, 0x7d282829, 0xff312d30, 0xff2d282d, + 0xff2d282c, 0xff2e292d, 0xff2f2a2e, 0xff312c30, 0xfe373135, 0xff6e686b, + 0x8b545354, 0x0d030205, 0x0a030003, 0x0c060508, 0x110d0e10, 0x2a27282a, + 0x1b18181a, 0x08030205, 0x05000001, 0x05000002, 0x0a060608, 0x05000001, + 0x06000001, 0x07000002, 0x07000002, 0x06000001, 0x07000002, 0x06000001, + 0x07000002, 0x06000001, 0x0a020304, 0x08020002, 0x09020002, 0x0a010002, + 0x36222528, 0x150a0a0e, 0xc36994a4, 0xff3896be, 0xff3596c0, 0xfe41a3cd, + 0xff51afd7, 0xff6db9c2, 0xffd5d156, 0xfffbd429, 0xfffed525, 0xfffdd926, + 0xfffed627, 0xfffbca24, 0xffffd82c, 0xfffed925, 0xffffd928, 0xffffd729, + 0xfff9bb1d, 0xfff8c628, 0xffb48320, 0xff594f3e, 0xff2e2929, 0xff2e292d, + 0xff2d282d, 0xff2d292c, 0xff2d282c, 0xff6a686a, 0xfff6f6f6, 0xfffefefe, + 0xfffefefe, 0xfffefefe, 0xfffefefe, 0xfffefefe, 0xfffefefe, 0xfffefefe, + 0xfffefefe, 0xfffdfdfd, 0xfffefefe, 0xffebebec, 0xff999798, 0xff6e6d6d, + 0xffc1c0c0, 0xffb6b4b6, 0xff312c30, 0xff484748, 0x07070707, 0x38171717, + 0xe5333034, 0xff2d292a, 0xff2d282d, 0xff302b2f, 0xff302b2f, 0xff302b2f, + 0xff363034, 0xff605d5f, 0x5c333334, 0x07010001, 0x06030102, 0x08070707, + 0x0c0b0c0b, 0x0b08090a, 0x07000103, 0x06000001, 0x07000002, 0x06000002, + 0x06000001, 0x07000002, 0x06000001, 0x07000002, 0x07000002, 0x07000002, + 0x07000002, 0x07000002, 0x07000002, 0x09010203, 0x0b030204, 0x0a020002, + 0x0a010002, 0x0a020003, 0x05000000, 0x7142575f, 0xff4ba2c8, 0xfe2a8bb9, + 0xff409fca, 0xff49a6d2, 0xff4ba6c2, 0xffb6cb7b, 0xffe1d048, 0xfff3cc2c, + 0xfffddb25, 0xffffd92a, 0xfffbc822, 0xfffcd026, 0xfffed927, 0xffffd827, + 0xfffed929, 0xfffed328, 0xfff9b81a, 0xfff4bd23, 0xff956f25, 0xff4a4337, + 0xff2d282c, 0xff2d292c, 0xff2d282d, 0xff2e292c, 0xff2d282c, 0xff474446, + 0xffd3d2d3, 0xfffefefe, 0xfffdfdfd, 0xfffefefe, 0xfffefefe, 0xfffefefe, + 0xfffefefe, 0xfffefefe, 0xfffdfdfd, 0xfffefefe, 0xfff4f3f4, 0xff908e91, + 0xff716c71, 0xff575256, 0xff464145, 0xff5c585c, 0xff2e292d, 0xff454547, + 0x24181616, 0x120e0e0d, 0xb73f3e40, 0xff322d31, 0xfe2c272b, 0xff302b2f, + 0xff312c30, 0xff312c30, 0xff342f33, 0xe8434144, 0x3c1f1d1f, 0x09080508, + 0x09090607, 0x05040404, 0x02000000, 0x06000001, 0x07000002, 0x07000002, + 0x06000001, 0x05000002, 0x07000002, 0x06000001, 0x07000002, 0x07000002, + 0x07000002, 0x07000002, 0x07000002, 0x06000001, 0x09010203, 0x10060608, + 0x0a020002, 0x09010002, 0x09020002, 0x09030001, 0x52323e42, 0xfa5facca, + 0xfe2789b4, 0xfe3a99c3, 0xff41a2cd, 0xff3e9dc0, 0xff90c3a3, 0xffb6c976, + 0xffd2c551, 0xfffdd629, 0xfffedb2e, 0xfffcd42a, 0xfffdcb26, 0xfffed72a, + 0xfffdd926, 0xfffed828, 0xfffeda29, 0xfffed52b, 0xfff8b91c, 0xffd9a621, + 0xff5f4723, 0xff302a2b, 0xff2d292d, 0xff2e292c, 0xff2e282d, 0xff2d292c, + 0xff2e292d, 0xff292428, 0xff575457, 0xffdcdbdc, 0xffffffff, 0xfffdfdfd, + 0xfffdfdfd, 0xfffefefe, 0xfffefefe, 0xfffdfdfd, 0xfffdfdfd, 0xffffffff, + 0xffbebdbf, 0xff757074, 0xff4e494d, 0xff2e292d, 0xff2d282c, 0xff322e32, + 0xfe2d292d, 0xff2b282c, 0xe4464546, 0x52232222, 0x241e1d1d, 0xad373839, + 0xff342f33, 0xff2e292d, 0xff302b2f, 0xff312c30, 0xff302c30, 0xce48484a, + 0x231e1c1d, 0x08080506, 0x07050306, 0x08030406, 0x06000001, 0x06000002, + 0x06000002, 0x07000002, 0x05000001, 0x05040403, 0x07000002, 0x07000002, + 0x07000002, 0x07000002, 0x07000002, 0x07000002, 0x07000002, 0x05000001, + 0x0f060608, 0x0b020003, 0x0a020003, 0x0a020002, 0x08000000, 0x3620262b, + 0xde5b97ad, 0xff348fb7, 0xfe2c8bb7, 0xff3799c3, 0xff3b9ac3, 0xff6eb8c2, + 0xff87bfad, 0xff9abb8e, 0xffe8d23d, 0xffffda31, 0xfffcd72b, 0xfffccc25, + 0xfffdd227, 0xfffed829, 0xfffed927, 0xfffdd829, 0xfffed929, 0xfffed52a, + 0xfff5b71c, 0xffb88b24, 0xff564425, 0xff2f2b2b, 0xff2e292e, 0xff2e292d, + 0xff2e282d, 0xff2d292c, 0xff2e292d, 0xff2e282c, 0xff282327, 0xff534f53, + 0xffdddcde, 0xffffffff, 0xfffdfdfd, 0xfffdfdfd, 0xfffefefe, 0xfffdfdfd, + 0xffffffff, 0xffd5d4d6, 0xff767276, 0xff544e53, 0xff322c30, 0xff2e292d, + 0xff2d292d, 0xff2d292c, 0xff2f2a2e, 0xfe2d282c, 0xff312c30, 0xf83a383a, + 0x702f2f2f, 0x16161616, 0xff413f41, 0xff2c272b, 0xff2e292d, 0xfe2d282c, + 0xff322f33, 0xb8545656, 0x0f0e0c0d, 0x05030002, 0x110a090b, 0x06000001, + 0x06000001, 0x07000002, 0x06000001, 0x06000002, 0x03010101, 0x07060706, + 0x07000002, 0x07000002, 0x07000002, 0x07000002, 0x07000002, 0x07000002, + 0x06000001, 0x0e050608, 0x0d040305, 0x0a030002, 0x0b010003, 0x08010001, + 0x170f0e13, 0xc1588798, 0xff338db5, 0xff3090ba, 0xff2b8db8, 0xff3091bc, + 0xff50acce, 0xff6abac6, 0xff83b9ab, 0xffc7ca60, 0xfffdd32a, 0xfffcd329, + 0xfffccb27, 0xfffbc620, 0xfffdd428, 0xfffcda28, 0xffffd82b, 0xfffed928, + 0xfffdd928, 0xffffd72b, 0xfff4b81d, 0xffb98c1f, 0xff664d26, 0xff312c2a, + 0xff2e2a2b, 0xff2d292b, 0xff2d282d, 0xff2e292c, 0xff2e292d, 0xff2e292d, + 0xff2d282c, 0xff2e292d, 0xff4b484c, 0xffd6d6d6, 0xffffffff, 0xfffdfdfd, + 0xfffdfdfd, 0xffffffff, 0xffefefef, 0xff736f73, 0xff443f43, 0xff363034, + 0xff2d282c, 0xff2d282c, 0xff2d282c, 0xff2e292d, 0xff2e292d, 0xff2e292d, + 0xfe2f2a2e, 0xff2f2a2e, 0xfa383637, 0x784d4e4f, 0xff3b393c, 0xff2d272b, + 0xff2e292d, 0xfe2f292d, 0xf239373a, 0x34252124, 0x05020002, 0x09010002, + 0x1209080a, 0x06000001, 0x06000001, 0x06000001, 0x07000002, 0x07000002, + 0x05000001, 0x03020202, 0x07000002, 0x07000002, 0x07000002, 0x07000002, + 0x06000001, 0x06000001, 0x09010203, 0x0f060507, 0x09010002, 0x0a020003, + 0x08000002, 0x0e090505, 0xad608391, 0xff338ab2, 0xfe2b8cb5, 0xff2787b3, + 0xff2d8eb8, 0xff3e9dc8, 0xff5fb8d8, 0xff6bb8c9, 0xffa6c286, 0xffe7c63c, + 0xfff9ca24, 0xfffdca24, 0xfff9bd1c, 0xfffdd52a, 0xfffdd62a, 0xffffd62a, + 0xfffad023, 0xffffd929, 0xfffdd829, 0xffffd92c, 0xfff7b81c, 0xffd39f1e, + 0xff8a6a24, 0xff3c3429, 0xff2f2a2d, 0xff2d292e, 0xff2e282d, 0xff2e282c, + 0xff2d292d, 0xff2e292d, 0xff2e292d, 0xff2d282c, 0xff2e292d, 0xff4c484c, + 0xffd1d0d1, 0xfffefefe, 0xfffefefe, 0xffffffff, 0xff9a9899, 0xff4e494d, + 0xff353034, 0xff2d282c, 0xff2d292c, 0xff2e282d, 0xff2e292d, 0xff2d282c, + 0xff2d282c, 0xff2f2a2e, 0xff2f2a2e, 0xfe322e32, 0xff2c2529, 0xff545657, + 0xfe3f3d40, 0xfe2b262a, 0xfe2d282c, 0xff302d2f, 0x99414342, 0x00000000, + 0x09010002, 0x0a020003, 0x0f060708, 0x06000001, 0x07000002, 0x06000001, + 0x06000001, 0x07000002, 0x06000002, 0x06020204, 0x07000002, 0x07000002, + 0x07000002, 0x06000001, 0x06000001, 0x0a020304, 0x0c020205, 0x0a020002, + 0x0a020002, 0x09010002, 0x06030001, 0x894f6772, 0xff4293b5, 0xfe2181ab, + 0xfe2282ae, 0xff2b88b4, 0xff3695c1, 0xff54b3da, 0xff61b7d7, 0xff98be96, + 0xffd2c251, 0xffefc32d, 0xfffcc41d, 0xfffabe20, 0xffffd428, 0xfffcd128, + 0xfffcd62d, 0xfffbc622, 0xfffdd628, 0xfffed929, 0xfffdda2a, 0xfffdd42a, + 0xfff9ba1b, 0xfffcc023, 0xffdca524, 0xff664f24, 0xff322b28, 0xff2d292c, + 0xff2d292c, 0xff2e292c, 0xff2d292d, 0xff2e292d, 0xff2e292d, 0xff2d292d, + 0xff2d282c, 0xff2a2529, 0xff484547, 0xffc1bfc1, 0xffffffff, 0xffe9e8e9, + 0xff5f5c5e, 0xff312c30, 0xff2e292d, 0xff2d292c, 0xff2d292d, 0xff2e282d, + 0xff2d282c, 0xff2d282c, 0xff2e292d, 0xff2f2a2e, 0xff302b2f, 0xff342f33, + 0xfe2f2b2f, 0xff322d32, 0xfe363437, 0xfe2b262a, 0xff332f32, 0xdf494c4c, + 0x1f161314, 0x06000000, 0x0a020003, 0x10070709, 0x06000001, 0x06000001, + 0x06000001, 0x07000002, 0x07000002, 0x06000002, 0x05000002, 0x05000001, + 0x07000002, 0x06000001, 0x06000001, 0x07000002, 0x0d040506, 0x0d040306, + 0x0b030003, 0x0b020004, 0x09000000, 0x04000000, 0x5f36464b, 0xf64794b4, + 0xff2282ab, 0xfe2383ab, 0xff2b88ae, 0xff3894be, 0xff4ba7d1, 0xff57b3d9, + 0xff80bab0, 0xffbebd67, 0xffedc32e, 0xfffdc115, 0xfffbc01e, 0xfffed22a, + 0xfffcd132, 0xfffccf2a, 0xfffbc525, 0xfffdd027, 0xfffdd829, 0xfffed82b, + 0xfffeda30, 0xfffcc621, 0xfffdcb23, 0xfffcc624, 0xfffebf23, 0xffd3981c, + 0xff7c6026, 0xff433728, 0xff2c292a, 0xff2d292c, 0xff2e292d, 0xff2d292d, + 0xff2d292d, 0xff2e292d, 0xff2d282c, 0xff2d282c, 0xff2b252a, 0xff3d383c, + 0xff959394, 0xffa5a4a6, 0xff373337, 0xff2d282c, 0xff2d282c, 0xff2d282c, + 0xff2e282d, 0xff2d282c, 0xff2d282c, 0xff2d282c, 0xff2d282c, 0xff2e292d, + 0xff2c272b, 0xff302b2f, 0xff312c30, 0xff3c363a, 0xfe2e2a2e, 0xff312c2f, + 0xc7414343, 0x351c1a1d, 0x05000000, 0x09010002, 0x0d040305, 0x09010103, + 0x05000001, 0x03000000, 0x05000001, 0x06000001, 0x05000001, 0x06000002, + 0x06000002, 0x07000002, 0x06000001, 0x07000002, 0x09010203, 0x0f060507, + 0x08010001, 0x06000000, 0x05000000, 0x08000002, 0x08020001, 0x3720262b, + 0xdc4d8ba5, 0xff2a89b2, 0xfe2684af, 0xff3089aa, 0xff469ab2, 0xff49a6ca, + 0xff53b1d8, 0xff73b6c4, 0xffa3b787, 0xffd9c34a, 0xffdeb836, 0xfffdc01b, + 0xfffdd326, 0xfffbc933, 0xfffcd33a, 0xfffcc923, 0xfffac222, 0xfffdd528, + 0xfffed829, 0xfffdd931, 0xfffdd02f, 0xfffbc922, 0xfffed92a, 0xfffdd132, + 0xfffcc223, 0xfffebb21, 0xffcb951d, 0xff705725, 0xff322d2a, 0xff2d282c, + 0xff2e292d, 0xff2e292d, 0xff2d282c, 0xff2d282c, 0xff2e292d, 0xff2e292d, + 0xff2d282c, 0xff2d282c, 0xff332e31, 0xff373337, 0xff2f292d, 0xff2e282c, + 0xff2f2a2e, 0xff2f2a2e, 0xff2d282c, 0xff2e292d, 0xff2e292d, 0xff353034, + 0xff423d41, 0xff3d383c, 0xff423d41, 0xff343134, 0xff3d393d, 0xff3c393c, + 0xff302c30, 0xde3d3d3e, 0x351b1b1d, 0x09010002, 0x0a020002, 0x0a020002, + 0x0a030204, 0x04010102, 0x00000000, 0x01000000, 0x03000001, 0x04000001, + 0x05000001, 0x05000001, 0x05000001, 0x05000001, 0x0d040406, 0x0d050406, + 0x10060407, 0x1f0c0a0d, 0x411c1d21, 0x6134393b, 0x7f4c5356, 0x59343b3f, + 0x3621282d, 0xdb6c9dad, 0xff358fb7, 0xfe2480aa, 0xff3b8faa, 0xff75a588, + 0xff68a9a7, 0xff55aed3, 0xff6eb5c5, 0xff96b798, 0xffaec17c, 0xffc7b24b, + 0xfffbbb16, 0xfffdd025, 0xfffbcc26, 0xfffbcc2f, 0xfffdcc25, 0xfffac31f, + 0xfffbcb23, 0xfffdd727, 0xfffdd82b, 0xfffdd42e, 0xfff8c120, 0xfffcd629, + 0xfffed92a, 0xfffdd62b, 0xfffdce2d, 0xfffac222, 0xfff7b61d, 0xff9f7a25, + 0xff463b29, 0xff2d282c, 0xff2d292d, 0xff2d292d, 0xff2e292d, 0xff2e292d, + 0xff2e292d, 0xff2d282c, 0xff2e292d, 0xff2f2a2e, 0xff2e292d, 0xff2c292c, + 0xff2d292c, 0xff2e282d, 0xfe2c272b, 0xff2f2a2d, 0xff302b2f, 0xff383337, + 0xff555054, 0xff777377, 0xff8a888b, 0xff767477, 0xfb555356, 0xf7555656, + 0xfb636464, 0xff3b3a3b, 0xf4373638, 0x5d2f2f30, 0x02000000, 0x0a000002, + 0x09010002, 0x08020103, 0x06030304, 0x02010101, 0x01000000, 0x01000000, + 0x01000000, 0x01000000, 0x02000000, 0x03000000, 0x03000000, 0x02000000, + 0x09030003, 0x3818191d, 0x972f3439, 0xec465358, 0xff4b6471, 0xff527686, + 0xff548ba4, 0xff6daac3, 0xf961a3bf, 0xff529ebd, 0xfe2682ae, 0xfe2484b0, + 0xff80ad8c, 0xff9db06d, 0xff66adbb, 0xff70b3bf, 0xffbab46a, 0xffa0b887, + 0xffbdb964, 0xfff5bf22, 0xfffccb23, 0xfffcd227, 0xfffbc827, 0xfffbcc27, + 0xfffdca24, 0xfffcc822, 0xfffcd026, 0xfffcd426, 0xfffdd529, 0xfffac021, + 0xfffbcc29, 0xfffdd82b, 0xfffed727, 0xfffed92a, 0xfffdda32, 0xfffdd22c, + 0xfff9c220, 0xffc59826, 0xff584828, 0xff2e292d, 0xff2e292c, 0xff2d292d, + 0xff2d282d, 0xff2d282c, 0xff2e292d, 0xff2e292d, 0xff2d282c, 0xff2e292d, + 0xff2f2a2e, 0xff2e292d, 0xff2d282c, 0xff2e292c, 0xff2f2a2e, 0xf2363335, + 0xd9302e30, 0xdd363437, 0xd7373537, 0xc42c2a2b, 0xc2313031, 0xae3b3c3c, + 0x82343435, 0x7d4d5050, 0xe0626767, 0xff444346, 0x83444445, 0x00000000, + 0x09000002, 0x09020002, 0x08020103, 0x08050606, 0x05040504, 0x01000100, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x01000000, 0x00000000, 0x552c2b2c, 0xe23d454b, 0xff3c6577, 0xff6aa0bb, + 0xff8ec4d8, 0xfe92c9e2, 0xfe70b7d4, 0xff7ebad4, 0xff8bbed6, 0xfe7ab7ce, + 0xff1d7eaa, 0xff2b8dbb, 0xff99af78, 0xffa3b077, 0xff89b19d, 0xffc5b45c, + 0xffecb72f, 0xffbaba65, 0xffecbf2e, 0xfffcca21, 0xfffbce23, 0xfffccc27, + 0xfffbc723, 0xfffcce29, 0xfffac21f, 0xfffdcd25, 0xfffac61a, 0xfffcd125, + 0xfffabc1e, 0xfffabf26, 0xfffcd333, 0xfffbca22, 0xfffdd426, 0xffffd82a, + 0xfffdda2e, 0xfffdd62b, 0xfff5bc20, 0xffa58524, 0xff70572a, 0xff352d2a, + 0xff2d2a2c, 0xff2e282e, 0xff2d292d, 0xff2e292c, 0xff2e292d, 0xff2e292d, + 0xff2e292d, 0xff2d282c, 0xff2d282c, 0xff2e292d, 0xff2d282c, 0xff2d292c, + 0xff312d31, 0xb63c3c3e, 0x26100e11, 0x301b191b, 0x2d202020, 0x1b171616, + 0x13131112, 0x00000000, 0x00000000, 0x5d313031, 0xff616765, 0x6b2e2e32, + 0x06020003, 0x09000001, 0x09010002, 0x07020001, 0x0b08090a, 0x06040505, + 0x01000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xf144484c, 0xff457b94, + 0xff5bb2d8, 0xfe8acae6, 0xff91cde8, 0xff79bfdc, 0xff4e9dc2, 0xff3d92b4, + 0xff9cc3d8, 0xff97b8c5, 0xfe3881a3, 0xff3399c5, 0xfea2b68b, 0xffd3af45, + 0xffecb12c, 0xfff8b821, 0xffd1bb4d, 0xffe5bb36, 0xfffdc722, 0xfff9c21d, + 0xfffdd128, 0xfffbc624, 0xfffcce28, 0xfffabd1c, 0xfffecd25, 0xfffcca1e, + 0xfffcca21, 0xfffabc1e, 0xfffbaf17, 0xfffdce2c, 0xfffabd1d, 0xfffdc722, + 0xfffed82b, 0xfffdd72a, 0xffffd82c, 0xfff9c31e, 0xfffdca25, 0xffdaa421, + 0xff5d4922, 0xff53422a, 0xff2d282c, 0xff2d292b, 0xff2d282d, 0xff2e282c, + 0xff2d282d, 0xff2d282c, 0xff2d282c, 0xff2e292d, 0xff2d282c, 0xff2e292d, + 0xff2e292d, 0xff2c292c, 0xff322b2f, 0xba373739, 0x110c0a0d, 0x0f0c0b0d, + 0x1a18191a, 0x08070807, 0x05040304, 0x02010000, 0x0a060505, 0x94535755, + 0x733c3d3d, 0x0f050306, 0x09010002, 0x09010002, 0x0a040205, 0x07060505, + 0x03000001, 0x02000000, 0x01000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0xff4f7484, 0xfe5bb8e0, 0xff5db6dc, 0xff7fc8e6, 0xff68b6da, 0xff5fadca, + 0xff348fb6, 0xff267da2, 0xfe96bccc, 0xfcb2887d, 0xff3b6577, 0xfe8aada6, + 0xffd7af45, 0xfff8ad16, 0xfffcb516, 0xffe9b931, 0xffe8b72e, 0xfffebc1a, + 0xfff9ba18, 0xfffdcd25, 0xfffbc520, 0xfffdca26, 0xfff9bd1e, 0xfffdc21e, + 0xfff8b70f, 0xfffbc71c, 0xfffbc221, 0xfff9ae15, 0xfffcc522, 0xfffbc223, + 0xfffbbc1b, 0xfffbca21, 0xfffccc24, 0xfffdd333, 0xfffbca24, 0xfffcc220, + 0xfff7cf29, 0xfff8b921, 0xffa17520, 0xff463727, 0xff443b2a, 0xff302b2b, + 0xff2e2a2d, 0xff2d292d, 0xff2e2a2c, 0xff2f2a2e, 0xff2e292d, 0xff2d282c, + 0xff2d282c, 0xff2e292d, 0xff2f2a2e, 0xff2f2a2e, 0xff2f2a2d, 0xc6322f31, + 0x1d111013, 0x110e0d10, 0x13131212, 0x0b0a0a0a, 0x01000101, 0x02010000, + 0x07050303, 0x22171717, 0x02000000, 0x08020002, 0x0a020001, 0x0a040404, + 0x05020302, 0x04000001, 0x03000001, 0x02000000, 0x01000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0xff5db1d6, 0xff50add4, 0xff65b7da, 0xff6abcde, + 0xff6db2cf, 0xff469abf, 0xff1d7aa3, 0xff4a97b9, 0xffa37e73, 0xff9d5d35, + 0xfe417280, 0xffdd9c26, 0xfef1a01e, 0xfffcb017, 0xfffaaf16, 0xfff9ae15, + 0xfffbb114, 0xfff9b218, 0xfffcc11e, 0xfffbc622, 0xfffdc724, 0xfff9bd1d, + 0xfffbb91d, 0xfffab313, 0xfffabe17, 0xfffdc61f, 0xfffcb418, 0xfffbbe1e, + 0xfffdc422, 0xfffbbd1f, 0xfffcbe20, 0xfffabd1c, 0xfffabb22, 0xfffcce2b, + 0xfffbc11e, 0xfffcc01c, 0xffeabc25, 0xffb98a1e, 0xffe4a424, 0xff6c5223, + 0xff3f3327, 0xff3c332b, 0xff2e2a2a, 0xff2d282e, 0xff2e292d, 0xff2d282c, + 0xff2d282c, 0xff2e292d, 0xff2e292d, 0xff2d282c, 0xff2e292d, 0xff2f2a2c, + 0xff2e292c, 0xda373638, 0x331b1d1e, 0x09050607, 0x09080908, 0x00000000, + 0x00000000, 0x05030203, 0x07030103, 0x06030102, 0x04010001, 0x08030304, + 0x09040406, 0x0a060709, 0x06000102, 0x06000002, 0x05000001, 0x02000000, + 0x01000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff5fb6dd, 0xff419bc2, + 0xff5ca8ca, 0xff51a3c6, 0xff3e92b9, 0xff2581a9, 0xff1c7ba4, 0xff428bad, + 0xff7c401d, 0xffbc782c, 0xff61624e, 0xffbe6314, 0xfff4a525, 0xfffaac15, + 0xfffaac1a, 0xfffba911, 0xfffab31d, 0xfffcba1a, 0xfffbc321, 0xfffcc122, + 0xfffbb81c, 0xfffbb41a, 0xfffab518, 0xfffcbd1e, 0xfffaba19, 0xfffcbf1d, + 0xfffbbb1e, 0xfffbbf1e, 0xfffcc023, 0xfffbc020, 0xfffbbb1d, 0xfffab51a, + 0xfffcc722, 0xfffbc21e, 0xfffab61b, 0xfffbbb1d, 0xfff9b11a, 0xffbf8b1e, + 0xffbd881f, 0xffd39520, 0xff684f22, 0xff403628, 0xff3b332a, 0xff2c292d, + 0xff2d292d, 0xff2e282c, 0xff2e282d, 0xff2d282d, 0xff2d282c, 0xff2e282d, + 0xff2e292d, 0xff2f2a2e, 0xff2c292d, 0xdc373333, 0x32191818, 0x07020204, + 0x00000000, 0x00000000, 0x00000000, 0x07010203, 0x08010103, 0x0a030305, + 0x02000000, 0x03020202, 0x04010203, 0x03000000, 0x05000001, 0x05000001, + 0x05000001, 0x04000001, 0x02000000, 0x01000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0xff5bafd2, 0xff4496b8, 0xff4397bb, 0xff2a85b1, 0xff2083aa, 0xff1d79a1, + 0xff2583ac, 0xff3b6e85, 0xff77331a, 0xff994a26, 0xff52443b, 0xffd98218, + 0xfffcad17, 0xfffbab1f, 0xfffaac1c, 0xfffcb223, 0xfffcb215, 0xfffbb719, + 0xfffbb61a, 0xfffaaf18, 0xfffbb116, 0xfffbb419, 0xfffcbb1d, 0xfffaad17, + 0xfffab41e, 0xfffcb917, 0xfffcba1d, 0xfffbbf1d, 0xfffbc123, 0xfffab91c, + 0xfffab51b, 0xfffcc01d, 0xfffed329, 0xfffbb81c, 0xfffcb51c, 0xfffab117, + 0xfffbac19, 0xfff9ac17, 0xffad7b19, 0xffbb861e, 0xffdd9c1f, 0xff6e5220, + 0xff564a2d, 0xff3a3027, 0xff282528, 0xff2c292e, 0xff2d2a2b, 0xff2e292c, + 0xff2e292d, 0xff2d2a2c, 0xff2d282c, 0xff322c30, 0xff312c2a, 0xe54f412f, + 0x43332b1f, 0x0e050707, 0x0a080709, 0x03000201, 0x04000000, 0x06000001, + 0x05000001, 0x04000001, 0x02000000, 0x01000000, 0x02000000, 0x02000000, + 0x03000000, 0x04000001, 0x05000001, 0x05000001, 0x03000000, 0x01000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x01000000, 0xff2c8ab1, 0xff3f94b8, 0xff2b86ad, 0xff2686ae, + 0xff1e77a1, 0xff207ba3, 0xff3387ab, 0xff3a5363, 0xff903616, 0xff933920, + 0xff64402e, 0xffdf8e19, 0xffde7f0e, 0xffd67a10, 0xfffab12a, 0xfffba919, + 0xfffbab12, 0xfffbaf16, 0xfff9ac16, 0xfffaab14, 0xfffdb318, 0xfffbba1d, + 0xfffaad16, 0xfffcae17, 0xfffab013, 0xfffcbd1e, 0xfffabb19, 0xfffabd20, + 0xfffdb51e, 0xfffcb61a, 0xfffcb918, 0xfff8b012, 0xfff9b71a, 0xfff8ae17, + 0xfffbad16, 0xfffaac16, 0xfffbab15, 0xfff9a816, 0xfffeac1c, 0xffba841c, + 0xffb37d1a, 0xffdda624, 0xff8c6620, 0xff9a6f1f, 0xffb77f2a, 0xff473424, + 0xff302a2d, 0xff2f2a30, 0xff2d292c, 0xff2e292e, 0xff2f2b2c, 0xff37322c, + 0xfe5b4627, 0xffbc8b2a, 0x887c602b, 0x130a0808, 0x1b16171b, 0x02000000, + 0x06000001, 0x05000001, 0x03000000, 0x02000000, 0x01000000, 0x00000000, + 0x00000000, 0x00000000, 0x01000000, 0x02000000, 0x04000001, 0x05000001, + 0x05000001, 0x03000000, 0x02000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x01000000, 0x01000100, 0x03020302, 0xff2f8bb2, 0xff2e89b0, + 0xff2c89b2, 0xff1a799f, 0xff1f759f, 0xff2782a6, 0xff3b84a4, 0xff393c43, + 0xff863519, 0xff7a2f18, 0xff6d301f, 0xffb24e1b, 0xffc65a10, 0xffe59528, + 0xfffdb123, 0xfffba60d, 0xfffbaa12, 0xfffba913, 0xfffaab15, 0xfffcb319, + 0xfffbb519, 0xfffaab16, 0xfffbac16, 0xfffab417, 0xfffcbc1c, 0xfffcb61a, + 0xfff5ac1c, 0xfff8b01d, 0xfffbb419, 0xfffab217, 0xfffbb616, 0xfff19d0b, + 0xfffbaa17, 0xfff9a713, 0xfffaa914, 0xfffbac16, 0xfffaab16, 0xfff6a216, + 0xfff9a512, 0xffffae15, 0xffe7a71e, 0xffc98d19, 0xffda931e, 0xffd48e1b, + 0xffdb8f17, 0xffb5791d, 0xff6e5123, 0xff53432d, 0xff433626, 0xff3c3228, + 0xff564225, 0xff84622c, 0xfe9e7016, 0xffffb424, 0xdab29758, 0x5f3a3c3b, + 0x09060709, 0x04020201, 0x04000001, 0x04000001, 0x02000000, 0x01000000, + 0x01000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x01000000, + 0x03000000, 0x05000001, 0x06000001, 0x04000001, 0x03000001, 0x01000000, + 0x00000000, 0x00000000, 0x01000000, 0x03020302, 0x03020202, 0x08070807, + 0xff348db4, 0xff2a88b0, 0xff257ba2, 0xff1b6e90, 0xff2280a8, 0xff3080a2, + 0xff327692, 0xff42322e, 0xff612b1e, 0xff5f2a1b, 0xff693120, 0xffa3350f, + 0xffc55a0d, 0xfff0a023, 0xfffba913, 0xfffca912, 0xfff8a911, 0xfffaaa13, + 0xfff9aa13, 0xfffbae15, 0xfffba812, 0xfffaac16, 0xfffcb317, 0xfffab518, + 0xfffcb51b, 0xfff3a419, 0xfff39d18, 0xfffbad17, 0xfff9ac17, 0xfffbb215, + 0xfff7aa15, 0xfff8a816, 0xfff9a914, 0xfffaa815, 0xfffaab17, 0xfffbad17, + 0xfff9aa18, 0xfff5a114, 0xfffba815, 0xfff6a714, 0xfffcaf19, 0xfff7a417, + 0xffec9c19, 0xfff7a813, 0xfff8a516, 0xffe99412, 0xffda981d, 0xffcb8f22, + 0xffc18d2a, 0xffb98321, 0xffda9719, 0xfffdb222, 0xffffb01a, 0xffe8a828, + 0xf39c8b63, 0x924a4d4c, 0x05040405, 0x0a080809, 0x01000000, 0x02000000, + 0x01000000, 0x01000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x01000000, 0x03000000, 0x05000001, 0x06000002, 0x06000001, + 0x04000001, 0x02000001, 0x07050506, 0x0b0a0a0b, 0x07050606, 0x04030403, + 0x0a090a09, 0x110f1110, 0xff2885ae, 0xff2586b1, 0xff1c6484, 0xff247393, + 0xff24799c, 0xff25769a, 0xff2b596f, 0xff432e30, 0xff3d2925, 0xff5c2b1c, + 0xff642718, 0xffaf410d, 0xffd16d0e, 0xfffdad1e, 0xfff7a20f, 0xfff8a911, + 0xfffaa913, 0xfffaaa13, 0xfffbaa13, 0xfffca70f, 0xfffcac16, 0xfffcac16, + 0xfffbb015, 0xfffbae16, 0xfff39e15, 0xfff29116, 0xfff9a617, 0xfffbab16, + 0xfffbae14, 0xfff9b118, 0xffef9912, 0xfffaa915, 0xfffba915, 0xfff8a512, + 0xfff6a013, 0xfffaac16, 0xfffaa214, 0xfff39e13, 0xfff7a713, 0xfff9a816, + 0xfff69f15, 0xffef9714, 0xfff9a817, 0xfff8a613, 0xfff19910, 0xfffca817, + 0xfffbae17, 0xfffaab15, 0xfffab126, 0xfffaaa14, 0xfffbab16, 0xfffaad13, + 0xfff8ac16, 0xffaa7f2e, 0xff69695f, 0x9c424244, 0x0b0b0b0a, 0x0a070a09, + 0x02000000, 0x03000001, 0x01000000, 0x01000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x01000000, 0x03000001, 0x05000001, + 0x07000003, 0x06000002, 0x06000001, 0x0c0a0b0a, 0x1b1a1815, 0x2524211e, + 0x0d0b0c0b, 0x0d0c0d0c, 0x15131514, 0x0f0e0e0e, 0xff2988b1, 0xff1d7ca5, + 0xff1e5e79, 0xff216685, 0xff257696, 0xff1b6689, 0xff2c4451, 0xff362f34, + 0xff342927, 0xff5e2b1f, 0xff602a18, 0xffa73705, 0xffed951d, 0xfff6a311, + 0xfff9a412, 0xfff9a910, 0xfffaaa14, 0xfffaaa12, 0xfffaa911, 0xfffbab16, + 0xfff49e15, 0xfffaa30f, 0xfffba813, 0xfff29211, 0xffe57d0b, 0xfff29b11, + 0xfffcad16, 0xfffaac15, 0xfffba415, 0xffef9314, 0xfff59f12, 0xfff9a917, + 0xfffbab16, 0xfff6a114, 0xfff6a014, 0xfff8a112, 0xffe37d12, 0xfff9a516, + 0xfff9a816, 0xfff7a016, 0xffee930e, 0xfff8a216, 0xfff6a212, 0xfff39a12, + 0xfff7a214, 0xfffbab16, 0xfffcab17, 0xfffba911, 0xfffbab14, 0xfffcab14, + 0xfffaab13, 0xfffbad1b, 0xffba8726, 0xff2f393c, 0xff213f4c, 0xab394346, + 0x12121112, 0x1f1c1c1e, 0x0d0a0b0c, 0x04000001, 0x03000000, 0x01000000, + 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x01000000, 0x03000101, + 0x04000102, 0x05000001, 0x07000100, 0x0a020507, 0x18151515, 0x211c1710, + 0x2c21190d, 0x12100b03, 0x17131512, 0x17161616, 0x0c0b0b0b, 0x08070807, + 0xff2382ac, 0xff19759a, 0xff21617e, 0xff24607c, 0xff176e91, 0xff1c5b79, + 0xff2b3c47, 0xff302b2e, 0xff432922, 0xff5e2b1d, 0xff672816, 0xffbc5410, + 0xffcc650b, 0xfff19b17, 0xfff5a011, 0xfffaa813, 0xfff8a714, 0xfff9aa11, + 0xfffcab16, 0xfff6a119, 0xfff08e0d, 0xfff9a412, 0xfff1920d, 0xffed8b0e, + 0xfff49813, 0xfffbad16, 0xfffaab15, 0xfffbad15, 0xfff69d11, 0xfff5a014, + 0xfffaaa16, 0xfffbac15, 0xfff7a216, 0xffea8810, 0xfff39914, 0xffe78010, + 0xfff29515, 0xfffbab15, 0xfff6a314, 0xffef8f15, 0xfff5a015, 0xfff7a417, + 0xffe68913, 0xfff6a113, 0xfffaab16, 0xfffaab16, 0xfffbaa15, 0xfff7a411, + 0xfffbaa14, 0xfffaaa13, 0xfffead15, 0xffbf871e, 0xff383228, 0xff284b5a, + 0xff1e3a49, 0xfc5d747b, 0x87596c6f, 0x523b4346, 0x09060607, 0x05000001, + 0x04000001, 0x04000001, 0x03000000, 0x02000000, 0x02000000, 0x02000000, + 0x04020203, 0x12101111, 0x12101111, 0x0a060607, 0x03000001, 0x0c0a0908, + 0x5a473d24, 0x6a50472a, 0x241e180e, 0x56433b29, 0x664e452d, 0x1814120e, + 0x0d0b0b0b, 0x0a09090a, 0xff1f79a0, 0xff1c749c, 0xff22576e, 0xff235b76, + 0xff106589, 0xff24536a, 0xff2c363d, 0xff31292c, 0xff5f2f22, 0xff58291c, + 0xff6f2b12, 0xffb44f10, 0xffd97612, 0xffe48a17, 0xfff49a0f, 0xfff6a015, + 0xfff9a813, 0xfffdaa17, 0xfff7a41a, 0xffef920e, 0xfff69e0f, 0xffef8e0c, + 0xfff09011, 0xffdf7109, 0xffee8e10, 0xfffcab16, 0xfffbab17, 0xfffba816, + 0xffee9010, 0xfff69e13, 0xfff19b12, 0xfff79e14, 0xffeb8c10, 0xfff09112, + 0xffed8d11, 0xfff09916, 0xfffdac17, 0xfff49a12, 0xffe57913, 0xffed8c13, + 0xfff8a815, 0xffe58513, 0xffe78914, 0xfff5a116, 0xfff7a615, 0xfffbac14, + 0xfffaa915, 0xfff8a716, 0xfffbac18, 0xfffea814, 0xffcc8f23, 0xff443827, + 0xff2b2e35, 0xff2c5363, 0xfe213341, 0xff3d5d6b, 0xeb95bac2, 0x60364348, + 0x00000000, 0x06000002, 0x05000001, 0x05000001, 0x05000001, 0x05000001, + 0x04000001, 0x07040504, 0x110f100f, 0x1a191717, 0x100e0e0f, 0x05040403, + 0x1613100b, 0x93726743, 0xf3b5a057, 0xeeada148, 0xa67b7845, 0x82665d43, + 0xb7897e50, 0x3e302b1e, 0x03030000, 0x4e3b3421, 0xff166c90, 0xff236f90, + 0xff243b47, 0xff1a536c, 0xff1a6e93, 0xff2b4a5b, 0xff333139, 0xff393433, + 0xff7b301a, 0xff55271d, 0xff7a321c, 0xffbf5912, 0xffd77510, 0xffda730b, + 0xfff69d15, 0xfff6a211, 0xfff7a110, 0xfff39813, 0xffeb8d10, 0xfff39613, + 0xffea7e05, 0xfff49312, 0xffda6706, 0xffd05303, 0xffed8910, 0xfffcaf16, + 0xfffaa813, 0xffe8820e, 0xffe87e0f, 0xfff79e14, 0xffe47a0c, 0xffef8d13, + 0xffec860b, 0xfff39a13, 0xfff6a213, 0xfffbab13, 0xfff1910f, 0xffdc6309, + 0xffe88211, 0xfff8a314, 0xfff29619, 0xffda7714, 0xffda6e11, 0xffe38011, + 0xfff9a715, 0xfffbaa16, 0xfff29d12, 0xfff2a016, 0xfffaa814, 0xffdf961f, + 0xff574627, 0xff2b282c, 0xff2e4955, 0xff2a4d5f, 0xff253542, 0xfe2b4f61, + 0xf68fb8c1, 0x59323f45, 0x00000000, 0x06000001, 0x06000001, 0x06000001, + 0x06000001, 0x05000001, 0x0c080a0b, 0x14111313, 0x100e0f0f, 0x09080708, + 0x08040603, 0x28221c14, 0xb0867748, 0xfeb8a84d, 0xe6aaa14e, 0xcc968d41, + 0x44322e15, 0x2019170d, 0x48383021, 0x1e19140e, 0x73584d34, 0xf4b5a566, + 0xff207ba1, 0xff275d75, 0xff272e35, 0xff1d4e65, 0xff287599, 0xff30414c, + 0xff373539, 0xff533835, 0xff8e2e0d, 0xff4d2920, 0xff7d3118, 0xffc5600d, + 0xffbd4c08, 0xffed9010, 0xfff19711, 0xfff79d12, 0xffec8a0d, 0xffec890f, + 0xffeb8110, 0xffe57e08, 0xfff69414, 0xffd55f03, 0xffcf5104, 0xffe2740c, + 0xfff59c10, 0xfff79d14, 0xffef8f11, 0xffe77a0a, 0xfff49d12, 0xffe8850f, + 0xffea800f, 0xffe27708, 0xfff59e15, 0xfff8a813, 0xfff5a215, 0xffe7870f, + 0xffd95a07, 0xffdf710b, 0xfff39614, 0xfff49d14, 0xffd86f0a, 0xffdd7b12, + 0xffd06c0b, 0xffed9114, 0xfff3a114, 0xfff6a514, 0xfff29b14, 0xfff6a214, + 0xffe19318, 0xff664d26, 0xff2a242b, 0xff253d4c, 0xff2b4251, 0xff234656, + 0xff253845, 0xff2a4d5b, 0xf79cc3cb, 0x60374548, 0x01000000, 0x07000002, + 0x07000002, 0x08010103, 0x0b040406, 0x17131211, 0x413c372f, 0x1f1b1713, + 0x08070302, 0x09070301, 0x362c2317, 0xca978b4c, 0xfdb7a94a, 0xb2857e42, + 0x34262009, 0x231b170d, 0x1c181009, 0xb78b7e52, 0xe5ab9b61, 0x7055492d, + 0xe0a8975f, 0x7a605537, 0xff2a81a7, 0xff1e4d63, 0xff282d34, 0xff294c5f, + 0xff185977, 0xff38373c, 0xff34353a, 0xff7e3623, 0xff7e2b11, 0xff4b2a23, + 0xff904618, 0xffb34109, 0xffbf5008, 0xfff29313, 0xfff09511, 0xffe9830c, + 0xffed8811, 0xffe4750d, 0xffe77705, 0xfff19118, 0xffda6807, 0xffcd5105, + 0xffde6909, 0xffe1720b, 0xffe7840c, 0xffed8910, 0xffec830c, 0xffef8f0d, + 0xfff19212, 0xffe77f0d, 0xffe77f0e, 0xffed8c11, 0xfff29813, 0xffeb8f10, + 0xffda740f, 0xffc94f05, 0xffe3720d, 0xffe38110, 0xffe98914, 0xffde790e, + 0xffe88b13, 0xffe38712, 0xffdd7e12, 0xffdb780d, 0xffed9713, 0xffdf8112, + 0xffd66f0d, 0xffe5991b, 0xff6f4e23, 0xff272229, 0xff2b333a, 0xff234b5e, + 0xff29363f, 0xff244556, 0xff263946, 0xff2d4753, 0xf57fabb7, 0x5229373c, + 0x03000000, 0x09010204, 0x08000103, 0x0b060405, 0x372b251a, 0x67534934, + 0x5a463b26, 0x2e252018, 0x0b080300, 0x624a4123, 0xe0a5964a, 0xd99e9240, + 0x77595121, 0x392b2716, 0x27201c15, 0x3b2b2417, 0xc590814f, 0xefb29c5d, + 0x9a73663a, 0x624b4027, 0x71564c30, 0xd48d834b, 0xff1f7194, 0xff1d4b60, + 0xff283038, 0xff213f50, 0xff21495d, 0xff383234, 0xff343134, 0xff9a300f, + 0xff652a19, 0xff563220, 0xff933e16, 0xffa92f04, 0xffce5e0a, 0xffef8d0f, + 0xffe68411, 0xffea8511, 0xffe27008, 0xffe16505, 0xffe67407, 0xffd76308, + 0xffcc4e04, 0xffda5e06, 0xffd55a07, 0xffcb5005, 0xffe1750a, 0xffe87a0d, + 0xffe97d08, 0xffea810b, 0xffe9820f, 0xfff49815, 0xffeb890e, 0xffe87d0e, + 0xffdf770c, 0xffc95d0e, 0xffbe4909, 0xffe17510, 0xffd67110, 0xffd0640d, + 0xffe28211, 0xffea8d16, 0xffd77710, 0xffca610b, 0xffc15109, 0xffd57413, + 0xffc14f0b, 0xffb03604, 0xffd57213, 0xff7f5c28, 0xff2f292b, 0xff2b2b2e, + 0xff283f4d, 0xff273a46, 0xff2a2e36, 0xff244353, 0xff273a44, 0xff344448, + 0xfa6b9aa1, 0x91546b70, 0x17100f0f, 0x04010002, 0x1912110d, 0x664d4731, + 0x80635c41, 0x9e796c47, 0xd39d8c52, 0x98756845, 0x9d746934, 0xf4afa139, + 0xfeb4a836, 0xaa817a44, 0x56433c26, 0x04030000, 0x5d403b24, 0xe891894a, + 0xd98e8546, 0x85645734, 0x73564b2d, 0x79574e2e, 0xdb887f40, 0xc67b753f, + 0xff126488, 0xff1e4c64, 0xff28313b, 0xff253f4c, 0xff2b363c, 0xff333137, + 0xff412a26, 0xffa0300d, 0xff4f2a20, 0xff633820, 0xff94300e, 0xffa72c03, + 0xffd86a0d, 0xffec9014, 0xffc25407, 0xffdb690a, 0xffe06911, 0xffe06706, + 0xffdd680a, 0xffcb5005, 0xffd65a05, 0xffd05206, 0xffc84703, 0xffda6706, + 0xffec8112, 0xffde6604, 0xffe77c09, 0xffe97f12, 0xffed8814, 0xffe2770a, + 0xffe5760d, 0xffd66208, 0xffc7570d, 0xffb03705, 0xffcf5f0d, 0xffc95d09, + 0xffcd6210, 0xffd7750f, 0xffe18213, 0xffba4c07, 0xffc2520b, 0xffac3506, + 0xffbd4f0b, 0xffc2581c, 0xffa72c06, 0xff9c3b15, 0xff80562a, 0xff44392e, + 0xff2d292c, 0xff2d2e34, 0xff293740, 0xff2a3033, 0xff292d35, 0xff234657, + 0xff273d4a, 0xff363f3f, 0xff57888c, 0xea8cb0b4, 0x4d393e3d, 0x7c5e563a, + 0xbe8f825b, 0x7f625a3f, 0x795e5640, 0xbc918258, 0xffbda75f, 0xfab7a444, + 0xfdb5a630, 0xffb5a734, 0xecac9d46, 0xecae9d57, 0x5c463f28, 0x835a5639, + 0xfa938e42, 0xc97c7940, 0x8e615c3e, 0x996b6745, 0x815f5737, 0xe188803e, + 0x8f4e4818, 0x2714120a, 0xff15658a, 0xff224d63, 0xff26333d, 0xff2e353c, + 0xff2f2d33, 0xff312f32, 0xff4c2921, 0xff9a2f10, 0xff382826, 0xff682f1c, + 0xff9e2c08, 0xffa83201, 0xffec8c1a, 0xffcd660f, 0xffcb5309, 0xffdf6b12, + 0xffe17009, 0xffea8010, 0xffcf5506, 0xffd75804, 0xffd15009, 0xffc54704, + 0xffd96206, 0xfff08b11, 0xffdd6507, 0xffe06d07, 0xffe77b0c, 0xffe46e0b, + 0xffdb6704, 0xffe7770d, 0xffcf5707, 0xffbd4506, 0xffae3203, 0xffb74207, + 0xffc35009, 0xffcc5f0e, 0xffca600c, 0xffd46f0d, 0xffb43f04, 0xffb33c09, + 0xffa62c01, 0xffac3406, 0xffb84714, 0xffae3d16, 0xffa62d05, 0xff623324, + 0xff363134, 0xff302c2d, 0xff2e2a2d, 0xff2e2b2f, 0xff2b2b2f, 0xff2c2c30, + 0xff292e35, 0xff1e4d62, 0xff26495c, 0xff36414a, 0xff508a95, 0xea7d988b, + 0xb4817d5e, 0xbf8e8053, 0x5a483e28, 0x44312917, 0x221b150e, 0x382e2618, + 0xf4b8a563, 0xffb8a83d, 0xfbb2a63a, 0x9a756732, 0x78584d25, 0xffafa354, + 0xffa19a4f, 0xfe969145, 0xba787245, 0x8f5e5b36, 0xcc827f49, 0xa76f6b42, + 0xf8948d45, 0x914f4819, 0x1a0b0900, 0x83565744, 0xff1c6c90, 0xff1e4254, + 0xff27333c, 0xff2d2d31, 0xff2f2b2e, 0xff2f2a2e, 0xff5b2b1e, 0xff812d13, + 0xff332727, 0xff6b2c1a, 0xff9f2906, 0xffc45a0f, 0xffe78c1d, 0xffbe4403, + 0xffde6909, 0xffee8711, 0xfff18b13, 0xffd35d06, 0xffd65605, 0xffce4f06, + 0xffc94f02, 0xffdd6307, 0xffed8510, 0xffe57407, 0xffde6a05, 0xffef880b, + 0xffe16908, 0xffdd6707, 0xffe5720e, 0xffce5305, 0xffbb3e06, 0xffad3208, + 0xffaa3206, 0xffbe4507, 0xffba4309, 0xffb13b03, 0xffd1670e, 0xffba4909, + 0xffa82c04, 0xffa72b02, 0xffa82e06, 0xffae3608, 0xffb4431c, 0xffaa2f07, + 0xff9e2f0b, 0xff3c2826, 0xff2e2a2e, 0xff2c2a2d, 0xff2f292e, 0xff2e292d, + 0xff2c292e, 0xff2c2d31, 0xff293038, 0xff195973, 0xff235e79, 0xff314a55, + 0xff48859f, 0xf981a193, 0xb07f7a58, 0x664f4732, 0x4836301f, 0x17140e0b, + 0x45342e1d, 0xc897864d, 0xffbba84f, 0xebad9e45, 0xa1797339, 0xa8776f3b, + 0xffafa356, 0xffaba352, 0xdd8c8644, 0x9a66623d, 0xa96b693a, 0xff9b9045, + 0xff9a8f40, 0xfa938a3d, 0x82474117, 0x1b0d0a03, 0x86504f3d, 0xff7c7b4d, + 0xff146388, 0xff213c4b, 0xff29323a, 0xff2f2c30, 0xff302b2e, 0xff2f282d, + 0xff662c19, 0xff6c2b17, 0xff352728, 0xff712c16, 0xffac3708, 0xffe5851b, + 0xffcc5609, 0xffdf730e, 0xfff9a116, 0xffe9840d, 0xffdd6806, 0xffdf6707, + 0xffcf5205, 0xffd35902, 0xffe77008, 0xffe97d0b, 0xffe47408, 0xffe26f06, + 0xfff7990f, 0xffe97b07, 0xffe36f0a, 0xffe06b09, 0xffcd4e04, 0xffbd4005, + 0xffb03506, 0xffac3005, 0xffae3103, 0xffbb3d08, 0xffa82d03, 0xffbd4909, + 0xffc15009, 0xffa92c04, 0xffa82c04, 0xffa72c02, 0xffa82e06, 0xffad360d, + 0xffad3911, 0xffaa2d07, 0xff9e310c, 0xff342828, 0xff2d292d, 0xff2e292d, + 0xff2d292c, 0xff2d292c, 0xff2c292d, 0xff2a2e33, 0xff2b3039, 0xff185d7c, + 0xff206f92, 0xff325a6d, 0xff377e98, 0xf380abac, 0xe6a59f70, 0x83645838, + 0x16151110, 0x84635a33, 0xecaf9d49, 0xffb7a63c, 0xdfa49a42, 0x96706a34, + 0xdd9d954b, 0xffaea450, 0xffada352, 0xb9807a44, 0x8c65603c, 0xd68b8547, + 0xff999043, 0xff978e3c, 0xf08d853b, 0x6a3a3613, 0x180b0903, 0x97565641, + 0xf9747048, 0xff706a39, 0xff115f80, 0xff253642, 0xff292f36, 0xff2f2b2f, + 0xff2e2a2c, 0xff31292a, 0xff732d16, 0xff552a1d, 0xff41292a, 0xff782d13, + 0xffca5910, 0xffd15809, 0xffc7560a, 0xfffcac1e, 0xfff8a114, 0xffe57b0a, + 0xffe8790a, 0xffd35b04, 0xffd96306, 0xfff18708, 0xffe77505, 0xffe16f07, + 0xffe37106, 0xfffa9c0d, 0xffe87705, 0xffe47409, 0xffdf6908, 0xffce4e05, + 0xffbd4003, 0xffb53904, 0xffaf3405, 0xffa92f07, 0xffb73907, 0xffb13704, + 0xffb13904, 0xffbd4907, 0xffa82c05, 0xffa82c04, 0xffa72d05, 0xffa92b03, + 0xffa92e06, 0xffac360e, 0xffaa3008, 0xffaa2e07, 0xff9e320e, 0xff302728, + 0xff302b2f, 0xff2d2a2e, 0xff2e292d, 0xff2d282c, 0xff2e282c, 0xff2b2e35, + 0xff293239, 0xff1d6280, 0xff227196, 0xff2a6a84, 0xff2e7792, 0xff84aba1, + 0xbb858058, 0x7861573b, 0xbf8b823c, 0xfdb5a736, 0xffb5a734, 0xc28f893f, + 0x916e6931, 0xf4aca23e, 0xffafa347, 0xffada659, 0xa5757247, 0x976e6a44, + 0xed9d944b, 0xff9a9142, 0xff978e3b, 0xe988823b, 0x59332d13, 0x14080501, + 0xb3676951, 0xff726f44, 0xff706a3a, 0xfe716a3a, 0xff145d7b, 0xff25323c, + 0xff2d2d33, 0xff2f2b2e, 0xff2e282d, 0xff31282a, 0xff7c2d16, 0xff422923, + 0xff542c21, 0xff8b4016, 0xffd45d0d, 0xffac3303, 0xfff19519, 0xfff8a311, + 0xffea850c, 0xffec850f, 0xffe47407, 0xffe9810d, 0xfffc9b09, 0xfff18805, + 0xffdf6906, 0xffe47208, 0xfff8950d, 0xffe17203, 0xffe3720a, 0xffe67409, + 0xffcc4e05, 0xffbd3f02, 0xffbd3f07, 0xffb63906, 0xffb03507, 0xffab2f03, + 0xffb33504, 0xffb53b05, 0xffb73f06, 0xffac3406, 0xffad3203, 0xffa92e04, + 0xffa72c04, 0xffaa2d04, 0xffa92f06, 0xffab310a, 0xffa92e04, 0xffab3006, + 0xff9b2f0d, 0xff2d2829, 0xff2f292d, 0xff2d292d, 0xff302a2e, 0xff2d282c, + 0xff2e282c, 0xff2d2e34, 0xff2b353d, 0xff256888, 0xff207293, 0xff287494, + 0xff2b7790, 0xe275a1a2, 0xab7d7d51, 0xf7b0a53c, 0xffb7a52f, 0xfbb4a637, + 0xaa7d7635, 0x9d756f2e, 0xffb4a837, 0xffb2a535, 0xffafa654, 0x74534d26, + 0xb07e794d, 0xffa59b4c, 0xff998f41, 0xff958d3a, 0xe0827b38, 0x4426220e, + 0x1b0c0a07, 0xba65644a, 0xff6f6a3d, 0xfe6f6a39, 0xfe6e6a3b, 0xff6e6b39, + 0xff195773, 0xff273138, 0xff2d2c31, 0xff2e292d, 0xff2d292d, 0xff362828, + 0xff7e2d15, 0xff3a2728, 0xff62311c, 0xff9c501a, 0xffb84709, 0xffc75f1f, + 0xfff79d13, 0xffed8c0f, 0xffeb850f, 0xffea7f0a, 0xffee870a, 0xffffa10c, + 0xfff6920b, 0xffe26f05, 0xffeb7e0b, 0xfff08d10, 0xffe57506, 0xffe87605, + 0xffe46f07, 0xffcd4e04, 0xffc04004, 0xffbf4307, 0xffbc400a, 0xffbf4309, + 0xffaa3002, 0xffb53808, 0xffb33707, 0xffb94005, 0xffb23905, 0xffb13906, + 0xffb53c05, 0xffa62c04, 0xffa82d03, 0xffa82e05, 0xffab3007, 0xffaa2e06, + 0xffa92e06, 0xffac2f03, 0xff923010, 0xff2b282b, 0xff2f292c, 0xff2d292d, + 0xff2f2a2e, 0xff2d282c, 0xff2d292c, 0xff2b3036, 0xff273641, 0xff256b8c, + 0xff1d6e93, 0xff227292, 0xff368098, 0xfe88b398, 0xffb4a838, 0xffb3a72f, + 0xeda99f3a, 0xa2787236, 0xb8867f31, 0xffb5a631, 0xfeb3a52b, 0xffb1a53e, + 0xffaea453, 0xefa19952, 0xffa19646, 0xff958d3c, 0xfe938b3a, 0xff958d38, + 0xc66f692d, 0x53282617, 0xbe5d5c41, 0xff706b3d, 0xfe7b7639, 0xff77713b, + 0xff726b39, 0xff857c3c, 0xff1c5069, 0xff28343c, 0xff2d2d32, 0xff2d2a2d, + 0xff30282d, 0xff3e2825, 0xff762b17, 0xff352925, 0xff6a3321, 0xff984112, + 0xffb7440f, 0xffd56f18, 0xfff19010, 0xffee8b10, 0xffec850e, 0xfff08f0c, + 0xfff4950d, 0xfff39110, 0xffeb7f0c, 0xffef8309, 0xffea800f, 0xffe47407, + 0xffea7b09, 0xffe06805, 0xffd05307, 0xffc04304, 0xffc44604, 0xffc34709, + 0xffc94f11, 0xffaf3404, 0xffaf3204, 0xffb83c07, 0xffaf3404, 0xffbf4507, + 0xffb23804, 0xffbc4607, 0xffa82d04, 0xffaa2f04, 0xffaa2e05, 0xffaa2f06, + 0xffa92e05, 0xffa82d04, 0xffa92f06, 0xffa82d03, 0xff923618, 0xff29272a, + 0xff2d292c, 0xff2d292d, 0xff2d282c, 0xff2e282c, 0xff2d292b, 0xff2b343d, + 0xff263945, 0xff1d698a, 0xff1d6e91, 0xff1d6e91, 0xfe408183, 0xff87ac85, + 0xffb9a43a, 0xfdb7ac49, 0x96726c35, 0xd0958d35, 0xffb4a62e, 0xffb4a729, + 0xfeb2a530, 0xfeafa346, 0xfea69c4a, 0xff9b9240, 0xff958d3b, 0xfe948b3a, + 0xff958c3b, 0xff948c3a, 0xfc8c833a, 0xf56e6b3e, 0xff75703c, 0xff88803a, + 0xff938c3c, 0xff8b843b, 0xff8c833b, 0xff958c3c, 0xff1b495e, 0xff293843, + 0xff2f2c30, 0xff2e2a2e, 0xff30282c, 0xff4a2a22, 0xff6b2b19, 0xff362629, + 0xff6f301c, 0xff98310a, 0xffc9540d, 0xffed9014, 0xfff19614, 0xffee8e14, + 0xfff89a10, 0xffef930f, 0xfff79e16, 0xfff38e12, 0xffef8108, 0xffe57205, + 0xffe37007, 0xffe87c0c, 0xffdf6706, 0xffd15808, 0xffbf4702, 0xffcd4d08, + 0xffc44605, 0xffcf550a, 0xffb23504, 0xffb43906, 0xffbe4408, 0xffbc4105, + 0xffbe4408, 0xffb74006, 0xffbd490a, 0xffab3304, 0xffa92f03, 0xffac3106, + 0xffab3007, 0xffaa2f06, 0xffa72c03, 0xffa82e05, 0xffa92d04, 0xffad3109, + 0xff8a3214, 0xff29282b, 0xff32282b, 0xff2e2a2f, 0xff2e292c, 0xff2d282c, + 0xff2d282c, 0xff2c3741, 0xff233b48, 0xff1b688a, 0xff1d6e8f, 0xff196c8e, + 0xff528b8b, 0xff8cab82, 0xffb8a632, 0xfdb4a833, 0xeeab9e38, 0xfdb3a632, + 0xe1a29b40, 0xf3ada13b, 0xffb2a536, 0xfeada34d, 0xffa19949, 0xff968e3d, + 0xff958d3c, 0xff958d3c, 0xfe978f3e, 0xff988f3d, 0xff958b3d, 0xff89823c, + 0xff978d43, 0xfe9c913f, 0xff958d3b, 0xff948c3c, 0xff948c3b, 0xff958d3b, + 0xff204253, 0xff293e4c, 0xff2e2b2f, 0xff2e292e, 0xff2f2a2b, 0xff582b1f, + 0xff612f22, 0xff392728, 0xff6e2d18, 0xffa63a13, 0xffdc781a, 0xfff7a316, + 0xfff79e17, 0xfffaa214, 0xfff19911, 0xfff49910, 0xfff59a13, 0xffeb7e0a, + 0xffe77504, 0xffe57607, 0xffec7e0f, 0xffdc6205, 0xffd56005, 0xffca5001, + 0xffd75f0a, 0xffc54905, 0xffd0540a, 0xffb53804, 0xffbd3f06, 0xffc74f08, + 0xffc45006, 0xffc34b09, 0xffb93e02, 0xffcc5b10, 0xffb53f0d, 0xffa93002, + 0xffb94008, 0xffac3204, 0xffac3105, 0xffa72c03, 0xffa92e05, 0xffa92d05, + 0xffa82c03, 0xffb0350d, 0xff8d2f10, 0xff2c2729, 0xff3d2c2c, 0xff2f2c2e, + 0xff332b2e, 0xff2d292c, 0xff2d292c, 0xff293640, 0xff223f4e, 0xff1d6c8e, + 0xff206f91, 0xff186b8c, 0xff60938a, 0xfe91a96a, 0xffb5a728, 0xffb5a72b, + 0xf7b0a134, 0xae7e7639, 0x5b43412d, 0xba868342, 0xffb2a538, 0xffb0a449, + 0xffac9f4e, 0xff9c9443, 0xff968e3d, 0xff9e9544, 0xfeab9f4d, 0xec978d45, + 0xe6918a43, 0xffac9f50, 0xffaa9e4c, 0xff9a9040, 0xff948b3c, 0xff958d3c, + 0xff978d3d, 0xff998d3e, 0xff243847, 0xff254250, 0xff2e2b2e, 0xff2f292d, + 0xff312929, 0xff623025, 0xff553027, 0xff412925, 0xff632b1a, 0xffb44117, + 0xffdf7b12, 0xffed9012, 0xfff8a21b, 0xfff6a012, 0xffef8e0e, 0xfff09212, + 0xfff08b10, 0xffe87402, 0xffe97905, 0xffe8790f, 0xffd96002, 0xffe06d05, + 0xffd45c05, 0xffda6405, 0xffd05506, 0xffd2550b, 0xffbf4005, 0xffca4e08, + 0xffc74c09, 0xffd66108, 0xffcc580b, 0xffba4004, 0xffce5906, 0xffc2500b, + 0xffa92e01, 0xffc6530f, 0xffb63d06, 0xffb4390c, 0xffaa2f04, 0xffa82d04, + 0xffa92e05, 0xffa92d04, 0xffab3006, 0xffad3507, 0xff9a300f, 0xff332728, + 0xff46302d, 0xff302c2d, 0xff31282b, 0xff2d2a2d, 0xff2c292e, 0xff2a3944, + 0xff224252, 0xff1e6e91, 0xff227094, 0xff186a8b, 0xfe6d9e89, 0xff9ba958, + 0xffb6a72b, 0xeca99e3b, 0x8c636134, 0x543e3b2b, 0x8f666238, 0xeba49b4a, + 0xffafa44a, 0xfeaea34b, 0xfeaea34e, 0xffaba04e, 0xffa69c4a, 0xfaaa9f4f, + 0xbd7f773a, 0x482c260d, 0x5138341c, 0xf8aba55c, 0xffa59b4a, 0xff978e3e, + 0xff958d3b, 0xff968c3d, 0xff9e9042, 0xffad9c50, 0xff243642, 0xff214150, + 0xff2e2a2c, 0xff2d2a2e, 0xff31292a, 0xff56342d, 0xff452c25, 0xff422b28, + 0xff642e1a, 0xffae3309, 0xffd76a09, 0xffec8e13, 0xfffaa318, 0xffee8d0e, + 0xffec8a0f, 0xfff39111, 0xffe47208, 0xffe47306, 0xffe36c07, 0xffda5c04, + 0xffeb7d07, 0xffda6504, 0xffe77606, 0xffe26e05, 0xffcf5a07, 0xffc44908, + 0xffd35207, 0xffcd4e09, 0xffd05307, 0xffd96609, 0xffc34c08, 0xffcb5306, + 0xffca5808, 0xffb33b04, 0xffc65009, 0xffca5c14, 0xffaf3507, 0xffb93f0c, + 0xffa82c03, 0xffa92e05, 0xffa82d03, 0xffa92e04, 0xffb53707, 0xffa92f06, + 0xff9c2e0a, 0xff352626, 0xff513631, 0xff303c44, 0xff30282a, 0xff2d2b2f, + 0xff2c2a30, 0xff283e4b, 0xff214657, 0xff196c8e, 0xff1f7092, 0xff1c6c8a, + 0xff78a892, 0xffa5a94d, 0xcd938a38, 0x5e444027, 0x58413f2d, 0xb7817e48, + 0xf6a9a04f, 0xffb0a346, 0xfeb0a43d, 0xfeb2a631, 0xffb2a540, 0xffada450, + 0xeda29851, 0x86595328, 0x29171506, 0x452f2c1e, 0xad757140, 0xfdaea356, + 0xffaca14f, 0xff9e9443, 0xff99903f, 0xffa9994b, 0xffb9a459, 0xffbca65d, + 0xff243946, 0xff224456, 0xff2d282b, 0xff2d292d, 0xff362f31, 0xff41312f, + 0xff3c2b29, 0xff3c2b2a, 0xff732a15, 0xffbc4004, 0xffea880f, 0xfff2991b, + 0xffec910f, 0xffea850e, 0xffe9820d, 0xffe26d09, 0xffdf690b, 0xffde650d, + 0xffda610b, 0xffe8750a, 0xffdf6b05, 0xffeb7c09, 0xffed8109, 0xffcc5602, + 0xffd76713, 0xffda5b05, 0xffd35606, 0xffcf5109, 0xffd55a07, 0xffce5b06, + 0xffd46009, 0xffc75105, 0xffbc4805, 0xffc44b05, 0xffe0720e, 0xffae3803, + 0xffc0450d, 0xffaa2e03, 0xffaa2f06, 0xffa92e05, 0xffa82d02, 0xffb13607, + 0xffb33805, 0xffae3006, 0xff9a2e0c, 0xff332625, 0xff523731, 0xff2b4a5b, + 0xff31282a, 0xff2b2c33, 0xff2b2c30, 0xff283f4c, 0xff1f495e, 0xff16678b, + 0xff1a6e93, 0xfe2b7286, 0xfc77a9a2, 0xaf777740, 0x43312d1d, 0x563c3b25, + 0xd2938c49, 0xfeb0a344, 0xffb3a538, 0xffb2a62f, 0xfeb3a52b, 0xffb7a736, + 0xffb9a64e, 0xcc8e844b, 0x4e322e15, 0x25161309, 0x7d565436, 0xe29a9452, + 0xfdada253, 0xffaea351, 0xfeada24a, 0xffb5a351, 0xffbba35b, 0xffbea75e, + 0xffae9c56, 0xff847b43, 0xff233b48, 0xff21465b, 0xff2c282a, 0xff2e292b, + 0xff332c31, 0xff332828, 0xff372a29, 0xff362829, 0xff882e11, 0xffce5f0b, + 0xffe98d15, 0xfff29719, 0xffe8850d, 0xffea810f, 0xffe57511, 0xffde670e, + 0xffdb620b, 0xffda6c1a, 0xffd8600c, 0xffe26c08, 0xffe97d08, 0xffed8a0d, + 0xffd96702, 0xffe78612, 0xffe8790a, 0xffdc6208, 0xffd1570c, 0xffd45406, + 0xffc95106, 0xffd9690c, 0xffca5406, 0xffc04a05, 0xffc74f04, 0xffdc6809, + 0xffc85809, 0xffb13505, 0xffbf410a, 0xffa82c04, 0xffaa3006, 0xffa82c03, + 0xffac3205, 0xffb63a05, 0xffb63906, 0xffae3103, 0xff952e0f, 0xff312727, + 0xff4e3834, 0xff294556, 0xff302b2b, 0xff2a3138, 0xff2a2a30, 0xff284351, + 0xff1c4d64, 0xff13678c, 0xff156d94, 0xff407e7d, 0xec6da6af, 0x291c1b12, + 0x6c4c482e, 0xeea39d58, 0xffafa444, 0xfeb3a62d, 0xfeb4a62c, 0xffb8a637, + 0xfebaa549, 0xf3b39f59, 0xa1756b44, 0x2c1c170c, 0x3a262212, 0xb0797546, + 0xfca8a056, 0xff988f4b, 0xfe867e42, 0xffaa9e40, 0xffb5a730, 0xffb7a63c, + 0xffb3a04b, 0xff938846, 0xff736e3d, 0xff706b3c, 0xff263844, 0xff1e475c, + 0xff2d282c, 0xff2e292c, 0xff312a2e, 0xff30292b, 0xff382a2a, 0xff332728, + 0xffa14017, 0xffe07713, 0xffe78d17, 0xffea890f, 0xffec8711, 0xffe0700a, + 0xffe17015, 0xffd65d09, 0xffdc6b20, 0xffd1590e, 0xffde6307, 0xffe67809, + 0xfff0900e, 0xffe57707, 0xffef8d10, 0xffef8b0a, 0xffe06a05, 0xffe1730e, + 0xffd35d0e, 0xffce5204, 0xffe27b1a, 0xffce5d09, 0xffc24c03, 0xffcf5806, + 0xffd55d05, 0xffdd740a, 0xffb43e06, 0xffbe4009, 0xffae3204, 0xffac3206, + 0xffa92f04, 0xffaf3305, 0xffb23905, 0xffbc3e07, 0xffb93c06, 0xffac2f04, + 0xff8e2e10, 0xff312627, 0xff513a37, 0xff293f4f, 0xff322b2e, 0xff2b2b2f, + 0xff2a2b2f, 0xff24495a, 0xff1d4e68, 0xff136688, 0xff15678a, 0xff4f8b87, + 0xeb7aa7a3, 0x956d6641, 0xf3a79d55, 0xffb0a454, 0xfeb4a450, 0xffb9a644, + 0xffbca64c, 0xffbea75a, 0xf5b39f5c, 0x7b595136, 0x25141109, 0x683f3c1f, + 0xd8908b4c, 0xffa0984f, 0xff867f42, 0xfe6f6b39, 0xff797137, 0xffa2972f, + 0xffb7a92c, 0xffa3982e, 0xff7a7336, 0xff6d6937, 0xff7b743d, 0xff928a47, + 0xff26323d, 0xff20465a, 0xff2d292c, 0xff2d2a2d, 0xff30292b, 0xff30292a, + 0xff392928, 0xff3a2825, 0xffc05a17, 0xffde7b11, 0xffe07710, 0xffe98310, + 0xffe27408, 0xffee8b19, 0xffd95f09, 0xffd15e1b, 0xffd05711, 0xffd95a08, + 0xffe37106, 0xfff18e0d, 0xffe87b08, 0xffe67d0b, 0xfff49814, 0xffe47705, + 0xffe57409, 0xffe97d07, 0xffd35b04, 0xffeb7c0e, 0xffdc7115, 0xffc24b02, + 0xffd05d05, 0xffd35705, 0xffe67e0d, 0xffc95b08, 0xffb33805, 0xffb83c05, + 0xffaf3404, 0xffb23707, 0xffa82c04, 0xffb94206, 0xffb63b05, 0xffc14409, + 0xffb63b05, 0xffaa2f04, 0xff8d3214, 0xff362725, 0xff523d3b, 0xff283d49, + 0xff302e32, 0xff2c292d, 0xff2b2d32, 0xff24495d, 0xff1e4b5d, 0xff185e80, + 0xff185873, 0xff5e9799, 0xfa95a784, 0xfbbba658, 0xffb3a356, 0xff9d904e, + 0xffb59f58, 0xffbfa75b, 0xffbca55c, 0xffad9b4e, 0xf89c9148, 0xa96d693f, + 0xaf6a6635, 0xf38f8741, 0xff8b8344, 0xff78723d, 0xfe6f683a, 0xff877f36, + 0xffa69a30, 0xffb5a42e, 0xffb6a82b, 0xffa39937, 0xff7c7437, 0xff938a35, + 0xffab9e33, 0xffb0a54d, 0xff283038, 0xff234658, 0xff2d292b, 0xff2c2a2d, + 0xff31292b, 0xff33282b, 0xff3b2827, 0xff442822, 0xffd46b15, 0xffd2670b, + 0xffe0710c, 0xffde6b06, 0xfff59d1b, 0xffe2710f, 0xffcb520e, 0xffcf5513, + 0xffd35406, 0xffe16c08, 0xffed8008, 0xffe67b08, 0xffe77708, 0xffef8910, + 0xffe77f0a, 0xffe6780a, 0xffeb8310, 0xffe77707, 0xffe5730c, 0xffe57c12, + 0xffc74f03, 0xffd05d05, 0xffd35804, 0xffe4770a, 0xffda7409, 0xffbf490a, + 0xffb83a06, 0xffb23504, 0xffbb3f08, 0xffad3004, 0xffac3105, 0xffb23904, + 0xffc84a07, 0xffbe4206, 0xffb43a08, 0xffab2c05, 0xff933818, 0xff372523, + 0xff503f3f, 0xff283c47, 0xff2d3036, 0xff2d292c, 0xff2a2e34, 0xff24485b, + 0xff1d495c, 0xff1d546d, 0xff21566d, 0xfe65a1a9, 0xffada971, 0xffb39d57, + 0xfe877b44, 0xfe706b3a, 0xff8c7e46, 0xffc1a95f, 0xffb39f52, 0xfe998f3f, + 0xfe958b3c, 0xfd958d3f, 0xfe8c833e, 0xff78713a, 0xff706b3b, 0xfe7a713f, + 0xffa29353, 0xffb5a449, 0xffb6a72f, 0xffb3a72f, 0xffb2a43c, 0xffb2a537, + 0xffafa32f, 0xffb4a72a, 0xffb4a739, 0xffa79c4c +}; diff --git a/src/pixman/demos/parrot.jpg b/src/pixman/demos/parrot.jpg Binary files differnew file mode 100644 index 0000000..e7727f3 --- /dev/null +++ b/src/pixman/demos/parrot.jpg diff --git a/src/pixman/demos/quad2quad.c b/src/pixman/demos/quad2quad.c new file mode 100644 index 0000000..66b838f --- /dev/null +++ b/src/pixman/demos/quad2quad.c @@ -0,0 +1,2183 @@ +#include <math.h> +#include <stdio.h> +#include <pixman.h> + +/* This code is basically the output of Maxima translated into C. + * + * See http://maxima.sourceforge.net/ + */ +static void +quad_to_quad (double x0, double y0, + double x1, double y1, + double x2, double y2, + double x3, double y3, + + double px0, double py0, + double px1, double py1, + double px2, double py2, + double px3, double py3, + + struct pixman_f_transform *trans) +{ + double + t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14, t15, t16, t17, t18, + t19, t20, t21, t22, t23, t24, t25, t26, t27, t28, t29, t30, t31, t32, t33, t34, + t35, t36, t37, t38, t39, t40, t41, t42, t43, t44, t45, t46, t47, t48, t49, t50, + t51, t52, t53, t54, t55, t56, t57, t58, t59, t60, t61, t62, t63, t64, t65, t66, + t67, t68, t69, t70, t71, t72, t73, t74, t75, t76, t77, t78, t79, t80, t81, t82, + t83, t84, t85, t86, t87, t88, t89, t90, t91, t92, t93, t94, t95, t96, t97, t98, + t99, t100, t101, t102, t103, t104, t105, t106, t107, t108, t109, t110, t111, + t112, t113, t114, t115, t116, t117, t118, t119, t120, t121, t122, t123, + t124, t125, t126, t127, t128, t129, t130, t131, t132, t133, t134, t135, + t136, t137, t138, t139, t140, t141, t142, t143, t144, t145, t146, t147, + t148, t149, t150, t151, t152, t153, t154, t155, t156, t157, t158, t159, + t160, t161, t162, t163, t164, t165, t166, t167, t168, t169, t170, t171, + t172, t173, t174, t175, t176, t177, t178, t179, t180, t181, t182, t183, + t184, t185, t186, t187, t188, t189, t190, t191, t192, t193, t194, t195, + t196, t197, t198, t199, t200, t201, t202, t203, t204, t205, t206, t207, + t208, t209, t210, t211, t212, t213, t214, t215, t216, t217, t218, t219, + t220, t221, t222, t223, t224, t225, t226, t227, t228, t229, t230, t231, + t232, t233, t234, t235, t236, t237, t238, t239, t240, t241, t242, t243, + t244, t245, t246, t247, t248, t249, t250, t251, t252, t253, t254, t255, + t256, t257, t258, t259, t260, t261, t262, t263, t264, t265, t266, t267, + t268, t269, t270, t271, t272, t273, t274, t275, t276, t277, t278, t279, + t280, t281, t282, t283, t284, t285, t286, t287, t288, t289, t290, t291, + t292, t293, t294, t295, t296, t297, t298, t299, t300, t301, t302, t303, + t304, t305, t306, t307, t308, t309, t310, t311, t312, t313, t314, t315, + t316, t317, t318, t319, t320, t321, t322, t323, t324, t325, t326, t327, + t328, t329, t330, t331, t332, t333, t334, t335, t336, t337, t338, t339, + t340, t341, t342, t343, t344, t345, t346, t347, t348, t349, t350, t351, + t352, t353, t354, t355, t356, t357, t358, t359, t360, t361, t362, t363, + t364, t365, t366, t367, t368, t369, t370, t371, t372, t373, t374, t375, + t376, t377, t378, t379, t380, t381, t382, t383, t384, t385, t386, t387, + t388, t389, t390, t391, t392, t393, t394, t395, t396, t397, t398, t399, + t400, t401, t402, t403, t404, t405, t406, t407, t408, t409, t410, t411, + t412, t413, t414, t415, t416, t417, t418, t419, t420, t421, t422, t423, + t424, t425, t426, t427, t428, t429, t430, t431, t432, t433, t434, t435, + t436, t437, t438, t439, t440, t441, t442, t443, t444, t445, t446, t447, + t448, t449, t450, t451, t452, t453, t454, t455, t456, t457, t458, t459, + t460, t461, t462, t463, t464, t465, t466, t467, t468, t469, t470, t471, + t472, t473, t474, t475, t476, t477, t478, t479, t480, t481, t482, t483, + t484, t485, t486, t487, t488, t489, t490, t491, t492, t493, t494, t495, + t496, t497, t498, t499, t500, t501, t502, t503, t504, t505, t506, t507, + t508, t509, t510, t511, t512, t513, t514, t515, t516, t517, t518, t519, + t520, t521, t522, t523, t524, t525, t526, t527, t528, t529, t530, t531, + t532, t533, t534, t535, t536, t537, t538, t539, t540, t541, t542, t543, + t544, t545, t546, t547, t548, t549, t550, t551, t552, t553, t554, t555, + t556, t557, t558, t559, t560, t561, t562, t563, t564, t565, t566, t567, + t568, t569, t570, t571, t572, t573, t574, t575, t576, t577, t578, t579, + t580, t581, t582, t583, t584, t585, t586, t587, t588, t589, t590, t591, + t592, t593, t594, t595, t596, t597, t598, t599, t600, t601, t602, t603, + t604, t605, t606, t607, t608, t609, t610, t611, t612, t613, t614, t615, + t616, t617, t618, t619, t620, t621, t622, t623, t624, t625, t626, t627, + t628, t629, t630, t631, t632, t633, t634, t635, t636, t637, t638, t639, + t640, t641, t642, t643, t644, t645, t646, t647, t648, t649, t650, t651, + t652, t653, t654, t655, t656, t657, t658, t659, t660, t661, t662, t663, + t664, t665, t666, t667, t668, t669, t670, t671, t672, t673, t674, t675, + t676, t677, t678, t679, t680, t681, t682, t683, t684, t685, t686, t687, + t688, t689, t690, t691, t692, t693, t694, t695, t696, t697, t698, t699, + t700, t701, t702, t703, t704, t705, t706, t707, t708, t709, t710, t711, + t712, t713, t714, t715, t716, t717, t718, t719, t720, t721, t722, t723, + t724, t725, t726, t727, t728, t729, t730, t731, t732, t733, t734, t735, + t736, t737, t738, t739, t740, t741, t742, t743, t744, t745, t746, t747, + t748, t749, t750, t751, t752, t753, t754, t755, t756, t757, t758, t759, + t760, t761, t762, t763, t764, t765, t766, t767, t768, t769, t770, t771, + t772, t773, t774, t775, t776, t777, t778, t779, t780, t781, t782, t783, + t784, t785, t786, t787, t788, t789, t790, t791, t792, t793, t794, t795, + t796, t797, t798, t799, t800, t801, t802, t803, t804, t805, t806, t807, + t808, t809, t810, t811, t812, t813, t814, t815, t816, t817, t818, t819, + t820, t821, t822, t823, t824, t825, t826, t827, t828, t829, t830, t831, + t832, t833, t834, t835, t836, t837, t838, t839, t840, t841, t842, t843, + t844, t845, t846, t847, t848, t849, t850, t851, t852, t853, t854, t855, + t856, t857, t858, t859, t860, t861, t862, t863, t864, t865, t866, t867, + t868, t869, t870, t871, t872, t873, t874, t875, t876, t877, t878, t879, + t880, t881, t882, t883, t884, t885, t886, t887, t888, t889, t890, t891, + t892, t893, t894, t895, t896, t897, t898, t899, t900, t901, t902, t903, + t904, t905, t906, t907, t908, t909, t910, t911, t912, t913, t914, t915, + t916, t917, t918, t919, t920, t921, t922, t923, t924, t925, t926, t927, + t928, t929, t930, t931, t932, t933, t934, t935, t936, t937, t938, t939, + t940, t941, t942, t943, t944, t945, t946, t947, t948, t949, t950, t951, + t952, t953, t954, t955, t956, t957, t958, t959, t960, t961, t962, t963, + t964, t965, t966, t967, t968, t969, t970, t971, t972, t973, t974, t975, + t976, t977, t978, t979, t980, t981, t982, t983, t984, t985, t986, t987, + t988, t989, t990, t991, t992, t993, t994, t995, t996, t997, t998, t999, + t1000, t1001, t1002, t1003, t1004, t1005, t1006, t1007, t1008, t1009, + t1010, t1011, t1012, t1013, t1014, t1015, t1016, t1017, t1018, t1019, + t1020, t1021, t1022, t1023, t1024, t1025, t1026, t1027, t1028, t1029, + t1030, t1031, t1032, t1033, t1034, t1035, t1036, t1037, t1038, t1039, + t1040, t1041, t1042, t1043, t1044, t1045, t1046, t1047, t1048, t1049, + t1050, t1051, t1052, t1053, t1054, t1055, t1056, t1057, t1058, t1059, + t1060, t1061, t1062, t1063, t1064, t1065, t1066, t1067, t1068, t1069, + t1070, t1071, t1072, t1073; + + t1 = y1 * y1; + t2 = x3 * x3; + t3 = px2 * px3 * t2; + t4 = (t3 - px2 * px3 * x2 * x3) * y2; + t5 = x2 * x2; + t6 = px2 * px3 * t5 * y3; + + t7 = - px2 * px3 * x2 * x3 * y3; + t8 = py1 * (t7 + t6 + t4); + t9 = px3 * py2 * x2 * x3; + + t10 = - px3 * py2 * t2; + t11 = (t10 + t9) * y2; + t12 = - px2 * py3 * t5 * y3; + + t13 = px2 * py3 * x2 * x3 * y3; + t14 = y0 * y0; + t15 = - px3 * py2; + t16 = px2 * py3; + + t17 = t16 + t15; + t18 = t17 * x2; + t19 = px3 * py2 * x3; + t20 = - px2 * py3 * x3; + + t21 = t20 + t19 + t18; + t22 = px2 * px3 * t5; + t23 = - 2 * px2 * px3 * x2 * x3; + + t24 = py1 * (t3 + t23 + t22); + t25 = - px2 * py3 * t5; + t26 = px2 * py3 * x3; + + t27 = x2 * (t26 + t19); + t28 = t10 + t27 + t25; + t29 = x1 * x1; + t30 = px3 * py2; + + t31 = - px2 * py3; + t32 = t31 + t30; + t33 = t32 * y2; + t34 = - px3 * py2 * y3; + + t35 = px2 * py3 * y3; + t36 = t35 + t34 + t33; + t37 = - px2 * px3 * t2; + + t38 = (t37 + px2 * px3 * x2 * x3) * y2; + t39 = - px2 * px3 * t5 * y3; + + t40 = px2 * px3 * x2 * x3 * y3; + t41 = py1 * (t40 + t39 + t38); + t42 = - px2 * py3 * x2 * x3; + + t43 = px3 * py2 * t2; + t44 = (t43 + t42) * y2; + t45 = px2 * py3 * t5 * y3; + + t46 = - px3 * py2 * x2 * x3 * y3; + t47 = (px2 * px3 * x3 - px2 * px3 * x2) * y2; + + t48 = px2 * px3 * x2 * y3; + t49 = - px2 * px3 * x3 * y3; + t50 = py1 * (t49 + t48 + t47); + + t51 = px2 * py3 * x2; + t52 = - 2 * px3 * py2 * x3; + t53 = (t26 + t52 + t51) * y2; + + t54 = px3 * py2 * x3 * y3; + t55 = px3 * py2 * y3; + t56 = - 2 * px2 * py3 * y3; + t57 = t56 + t55; + + t58 = x2 * t57; + t59 = - px2 * px3 * t5; + t60 = 2 * px2 * px3 * x2 * x3; + t61 = - px2; + + t62 = px3 + t61; + t63 = t62 * x2; + t64 = px2 * x3; + t65 = - px3 * x3; + t66 = t65 + t64 + t63; + + t67 = px2 * t5; + t68 = - px2 * x3; + t69 = x2 * (t65 + t68); + t70 = px3 * t2; + + t71 = t70 + t69 + t67; + t72 = - px3; + t73 = t72 + px2; + t74 = - px2 * y3; + t75 = px3 * y3; + + t76 = t75 + t74 + t73 * y2; + t77 = px2 * x2 * x3; + t78 = - px3 * t2; + t79 = - px2 * t5 * y3; + + t80 = px3 * x2 * x3 * y3; + t81 = t80 + t79 + (t78 + t77) * y2; + + t82 = (px2 * px3 * x2 - px2 * px3 * x3) * y2; + t83 = - px2 * px3 * x2 * y3; + + t84 = px2 * px3 * x3 * y3; + t85 = - px2 * x2; + t86 = 2 * px3 * x3; + t87 = - px3 * x3 * y3; + + t88 = 2 * px2 * y3; + t89 = - px3 * y3; + t90 = t89 + t88; + t91 = x2 * t90; + + t92 = t91 + t87 + (t86 + t68 + t85) * y2; + t93 = px2 * py3 * t5; + t94 = - px3 * py2 * x3; + + t95 = x2 * (t20 + t94); + t96 = t32 * x2; + t97 = t73 * x2; + t98 = px3 * x3; + + t99 = t98 + t68 + t97; + t100 = py1 * t99; + t101 = - px2 * t5; + t102 = x2 * (t98 + t64); + + t103 = t78 + t102 + t101; + t104 = py1 * t103; + t105 = - py2; + t106 = py3 + t105; + + t107 = py2 * y3; + t108 = - py3 * y3; + t109 = t108 + t107 + t106 * y2; + t110 = - px3 * x2 * x3; + + t111 = px2 * t5 * y3; + t112 = - px2 * x2 * x3 * y3; + t113 = t112 + t111 + (t70 + t110) * y2; + + t114 = - py2 * x3; + t115 = py3 * x3; + t116 = t115 + t114; + t117 = py2 * x3 * y3; + + t118 = - py3 * x3 * y3; + t119 = t118 + t117; + t120 = x2 * t119; + + t121 = px1 * (t120 + x2 * t116 * y2); + t122 = - px3 * py2 * x2; + t123 = (t19 + t122) * y2; + + t124 = px2 * py3 * x2 * y3; + t125 = - px2 * py3 * x3 * y3; + t126 = px3 * x2; + + t127 = - px2 * x2 * y3; + t128 = px2 * x3 * y3; + t129 = t128 + t127 + (t65 + t126) * y2; + + t130 = - py3; + t131 = t130 + py2; + t132 = t131 * x2; + t133 = py2 * x3; + t134 = - py3 * x3; + + t135 = - py2 * x3 * y3; + t136 = py3 * x3 * y3; + t137 = - py2 * y3; + t138 = py3 * y3; + + t139 = t138 + t137; + t140 = x2 * t139; + + t141 = px1 * (t140 + t136 + t135 + (t134 + t133 + t132) * y2); + t142 = y2 * y2; + + t143 = - px3 * py2 * x3 * y3; + t144 = px2 * py3 * x3 * y3; + t145 = t144 + t143; + + t146 = t142 * t145; + t147 = y3 * y3; + t148 = px3 * py2 * t147; + t149 = - px2 * py3 * t147; + + t150 = t149 + t148; + t151 = x2 * y2 * t150; + t152 = t151 + t146; + t153 = - px2 * py3 * y3; + + t154 = t153 + t55; + t155 = t142 * t154; + t156 = - px3 * py2 * t147; + + t157 = px2 * py3 * t147; + t158 = t157 + t156; + t159 = y2 * t158; + t160 = t159 + t155; + + t161 = x0 * x0; + t162 = py1 * t76; + t163 = px1 * t109; + t164 = px2 * y3; + t165 = t89 + t164; + + t166 = - px2 * t147; + t167 = px3 * t147; + t168 = t167 + t166; + + t169 = y2 * t168 + t142 * t165; + t170 = py1 * t169; + t171 = py2 * t147; + + t172 = - py3 * t147; + t173 = t172 + t171; + t174 = y2 * t173 + t142 * t139; + + t175 = px1 * t174; + t176 = t17 * t142; + t177 = px2 * t147; + t178 = - px3 * t147; + + t179 = t178 + t177 + t62 * t142; + t180 = - py2 * t147; + t181 = py3 * t147; + + t182 = t181 + t180 + t131 * t142; + + t183 = y1 * (px1 * t182 + py1 * t179 + t149 + t148 + t176) + + t175 + t170 + t159 + t1 * (t163 + t162 + t35 + t34 + t33) + t155; + + t184 = - px2 * px3 * t2 * t142; + t185 = 2 * px2 * px3 * x2 * x3 * y2 * y3; + + t186 = - px2 * px3 * t5 * t147; + t187 = py1 * (t186 + t185 + t184); + + t188 = px3 * py2 * t2 * t142; + t189 = x2 * y2 * (t125 + t143); + t190 = px2 * py3 * t5 * t147; + + t191 = t190 + t189 + t188; + t192 = px2 * px3 * x3 * t142; + t193 = y2 * (t49 + t83); + + t194 = px2 * px3 * x2 * t147; + t195 = py1 * (t194 + t193 + t192); + + t196 = - px3 * py2 * x3 * t142; + t197 = 2 * px3 * py2 * x3 * y3; + t198 = 2 * px2 * py3 * y3; + + t199 = t198 + t34; + t200 = x2 * t199; + t201 = y2 * (t200 + t125 + t197); + + t202 = - px2 * py3 * x2 * t147; + t203 = - px2 * x3 * y3; + t204 = px3 * x3 * y3; + + t205 = t204 + t203; + t206 = t142 * t205; + t207 = t178 + t177; + t208 = x2 * y2 * t207; + + t209 = t208 + t206; + t210 = px2 * px3 * t2 * t142; + t211 = - 2 * px2 * px3 * x2 * x3 * y2 * y3; + + t212 = px2 * px3 * t5 * t147; + t213 = - px3 * t2 * t142; + t214 = x2 * y2 * (t204 + t128); + + t215 = - px2 * t5 * t147; + t216 = t215 + t214 + t213; + t217 = - px2 * px3 * x3 * t142; + + t218 = y2 * (t84 + t48); + t219 = - px2 * px3 * x2 * t147; + t220 = px3 * x3 * t142; + + t221 = - 2 * px3 * x3 * y3; + t222 = - 2 * px2 * y3; + t223 = t75 + t222; + t224 = x2 * t223; + + t225 = y2 * (t224 + t221 + t128); + t226 = px2 * x2 * t147; + t227 = t226 + t225 + t220; + + t228 = t125 + t54; + t229 = t142 * t228; + t230 = x2 * y2 * t158; + t231 = t87 + t128; + + t232 = t142 * t231; + t233 = x2 * y2 * t168; + t234 = t233 + t232; + t235 = py1 * t234; + + t236 = - px3 * py2 * t2 * t142; + t237 = x2 * y2 * (t144 + t54); + + t238 = - px2 * py3 * t5 * t147; + t239 = px3 * t2 * t142; + t240 = x2 * y2 * (t87 + t203); + + t241 = px2 * t5 * t147; + t242 = t241 + t240 + t239; + t243 = py1 * t242; + + t244 = px2 * py3 * x3 * t142; + t245 = - px2 * py3 * x2 * y3; + t246 = y2 * (t143 + t245); + + t247 = px3 * py2 * x2 * t147; + t248 = - px2 * x3 * t142; + t249 = px2 * x2 * y3; + + t250 = y2 * (t204 + t249); + t251 = - px3 * x2 * t147; + t252 = t251 + t250 + t248; + + t253 = t134 + t133; + t254 = t253 * t142; + t255 = t108 + t107; + t256 = x2 * t255; + + t257 = t256 + t136 + t135; + t258 = y2 * t257; + t259 = t181 + t180; + t260 = x2 * t259; + + t261 = px1 * (t260 + t258 + t254); + t262 = py1 * (t37 + t60 + t59); + + t263 = t43 + t95 + t93; + t264 = px1 * t263; + t265 = t26 + t94; + t266 = x2 * t265 * y2; + + t267 = x2 * t228; + t268 = t267 + t266; + t269 = py1 * (t84 + t83 + t82); + + t270 = - 2 * px2 * py3; + t271 = (t26 + (t270 + t30) * x2) * y2; + t272 = px3 * py2 * x2 * y3; + + t273 = - 2 * px3 * py2 * x3 * y3; + t274 = t149 + t148 + t176; + + t275 = py1 * (t212 + t211 + t210); + t276 = t238 + t237 + t236; + t277 = px1 * t276; + + t278 = py1 * (t219 + t218 + t217); + t279 = 2 * px3 * py2 * x3; + t280 = t20 + t279; + + t281 = t280 * t142; + t282 = - px3 * py2 * x2 * y3; + t283 = y2 * (t125 + t282); + + t284 = 2 * px2 * py3 * t147; + t285 = x2 * (t284 + t156); + t286 = px1 * t103; + + t287 = t98 + t68; + t288 = x2 * t287 * y2; + t289 = x2 * t231; + t290 = t289 + t288; + + t291 = 2 * px2; + t292 = - px3 * x2 * y3; + t293 = 2 * px3 * x3 * y3; + + t294 = t293 + t203 + t292 + (t68 + (t72 + t291) * x2) * y2; + t295 = px1 * t242; + + t296 = - 2 * px3 * x3; + t297 = t296 + t64; + t298 = px3 * x2 * y3; + t299 = y2 * (t128 + t298); + + t300 = - 2 * px2 * t147; + t301 = x2 * (t167 + t300) + t299 + t297 * t142; + t302 = py1 * t71; + + t303 = py1 * t290; + t304 = 2 * py2 * x3; + t305 = - 2 * py3 * x3; + t306 = - 2 * py2 * x3 * y3; + + t307 = 2 * py3 * x3 * y3; + t308 = t307 + t306; + t309 = - 2 * px2 * py3 * x3; + + t310 = (t309 + t19 + t51) * y2; + t311 = - 2 * px3 * py2 * y3; + t312 = t35 + t311; + + t313 = x2 * t312; + t314 = 2 * px2 * x3; + t315 = 2 * px3 * y3; + t316 = t315 + t74; + + t317 = x2 * t316; + t318 = t317 + t87 + (t65 + t314 + t85) * y2; + t319 = t106 * x2; + + t320 = px1 * (t256 + t118 + t117 + (t115 + t114 + t319) * y2); + t321 = py1 * t216; + + t322 = 2 * px2 * py3 * x3 * y3; + t323 = 2 * px3 * py2 * y3; + t324 = t153 + t323; + + t325 = x2 * t324; + t326 = y2 * (t325 + t322 + t143); + t327 = - 2 * px2 * x3 * y3; + + t328 = - 2 * px3 * y3; + t329 = t328 + t164; + t330 = x2 * t329; + + t331 = y2 * (t330 + t204 + t327); + t332 = t226 + t331 + t220; + t333 = t116 * t142; + + t334 = t140 + t118 + t117; + t335 = y2 * t334; + t336 = x2 * t173; + + t337 = px1 * (t336 + t335 + t333); + t338 = t26 + t94 + t96; + t339 = t17 * y2; + + t340 = t153 + t55 + t339; + t341 = px2 * px3 * t142; + t342 = - 2 * px2 * px3 * y2 * y3; + + t343 = px2 * px3 * t147; + t344 = py1 * (t343 + t342 + t341); + t345 = - px2 * py3 * t142; + + t346 = y2 * (t35 + t55); + t347 = t156 + t346 + t345; + t348 = px1 * t347 + t344; + + t349 = t89 + t164 + t62 * y2; + t350 = - px2 * px3 * t142; + t351 = 2 * px2 * px3 * y2 * y3; + + t352 = - px2 * px3 * t147; + t353 = px2 * t142; + t354 = y2 * (t89 + t74); + + t355 = t167 + t354 + t353; + t356 = px1 * t355 + t352 + t351 + t350; + t357 = py1 * t66; + + t358 = py1 * t349; + t359 = 2 * py2; + t360 = - 2 * py3; + t361 = - 2 * py2 * y3; + + t362 = 2 * py3 * y3; + t363 = px3 * py2 * t142; + t364 = y2 * (t153 + t34); + + t365 = - px3 * t142; + t366 = y2 * (t75 + t164); + t367 = t166 + t366 + t365; + + t368 = py1 * t367; + t369 = px1 * (t172 + t171 + t106 * t142); + t370 = t35 + t34; + + t371 = t142 * t370; + t372 = y2 * t150; + t373 = t372 + t371; + t374 = t230 + t229; + + t375 = py1 * (t352 + t351 + t350); + t376 = t157 + t364 + t363; + t377 = px1 * t376 + t375; + + t378 = t75 + t74; + t379 = y2 * t207 + t142 * t378; + t380 = px1 * t367 + t343 + t342 + t341; + + t381 = py1 * t209; + t382 = py1 * t355; + t383 = py1 * t379; + t384 = 2 * py2 * y3; + + t385 = - 2 * py3 * y3; + t386 = t385 + t384; + t387 = - 2 * py2 * t147; + t388 = 2 * py3 * t147; + + t389 = px2 * py3 * t2; + t390 = t389 + t10; + t391 = x2 * t390 * y2; + t392 = t5 * t228; + + t393 = - px2 * t2; + t394 = t70 + t393; + t395 = x2 * t394 * y2; + t396 = t5 * t231; + + t397 = t396 + t395; + t398 = py1 * t397; + t399 = py2 * t2; + t400 = - py3 * t2; + + t401 = t400 + t399; + t402 = x2 * t401 * y2; + t403 = t136 + t135; + t404 = t5 * t403; + + t405 = t404 + t402; + t406 = px1 * t405; + t407 = t1 * (t406 + t398 + t392 + t391); + + t408 = t65 + t64; + t409 = t5 * t408; + t410 = x2 * t394; + t411 = t410 + t409; + + t412 = py1 * t411; + t413 = t5 * t116; + t414 = x2 * t401; + t415 = t414 + t413; + + t416 = px1 * t415; + t417 = py2 * t5; + t418 = x2 * (t134 + t114); + t419 = py3 * t2; + + t420 = t419 + t418 + t417; + t421 = px1 * t420; + t422 = t265 * y2; + t423 = x2 * t154; + + t424 = px2 * x2; + t425 = (t68 + t424) * y2; + t426 = - py2 * x2; + t427 = (t133 + t426) * y2; + + t428 = py3 * x2 * y3; + t429 = t20 + t19; + t430 = x2 * t429; + t431 = - px2 * py3 * t2; + + t432 = (t431 + t43 + t430) * y2; + t433 = t5 * t370; + t434 = x2 * t145; + + t435 = - px2 * x2 * x3; + t436 = px2 * t2; + t437 = (t436 + t435) * y2; + t438 = px3 * t5 * y3; + + t439 = - px3 * x2 * x3 * y3; + t440 = py2 * x2 * x3; + t441 = - py2 * t2; + + t442 = (t441 + t440) * y2; + t443 = - py3 * t5 * y3; + t444 = py3 * x2 * x3 * y3; + + t445 = t5 * t287; + t446 = t78 + t436; + t447 = x2 * t446; + t448 = - t2; + + t449 = t448 + 2 * x2 * x3 - t5; + t450 = px1 * t449; + t451 = (t98 + t85) * y2; + t452 = - x2 * y3; + + t453 = x3 * y3; + t454 = t453 + t452 + (x2 - x3) * y2; + t455 = px1 * t454; + t456 = t65 + t314; + + t457 = x2 * t456; + t458 = (t78 + t457) * y2; + t459 = x2 * (t293 + t203); + + t460 = - x2 * x3 * y3 + t5 * y3 + (t2 - x2 * x3) * y2; + t461 = px1 * t460; + t462 = t5 * t253; + + t463 = t419 + t441; + t464 = x2 * t463; + t465 = - py2 * t5; + t466 = x2 * (t115 + t133); + + t467 = t2 - 2 * x2 * x3 + t5; + t468 = py1 * t467; + t469 = py2 * x2; + t470 = (t134 + t469) * y2; + + t471 = - py2 * x2 * y3; + t472 = x2 * y3; + t473 = - x3 * y3; + t474 = t473 + t472 + (x3 - x2) * y2; + + t475 = py1 * t474; + t476 = - 2 * py2 * x3; + t477 = t115 + t476; + t478 = x2 * t477; + + t479 = (t419 + t478) * y2; + t480 = py2 * t5 * y3; + t481 = - 2 * py3 * x3 * y3; + + t482 = x2 * (t481 + t117); + t483 = x2 * x3 * y3 - t5 * y3 + (t448 + x2 * x3) * y2; + + t484 = py1 * t483; + t485 = t431 + t43; + t486 = t485 * t142; + t487 = t5 * t158; + + t488 = t446 * t142; + t489 = t5 * t168; + t490 = t489 + t488; + t491 = py1 * t490; + + t492 = t463 * t142; + t493 = t5 * t173; + t494 = t493 + t492; + t495 = px1 * t494; + + t496 = x1 * y1 * (t495 + t491 + t487 + t486); + t497 = t142 * t119; + t498 = x2 * y2 * t259; + + t499 = t498 + t497; + t500 = px1 * t499; + t501 = t29 * (t500 + t381 + t151 + t146); + + t502 = t429 * t142; + t503 = x2 * t370; + t504 = y2 * (t503 + t125 + t54); + t505 = x2 * t158; + + t506 = - px3 * x3 * t142; + t507 = - px2 * x2 * t147; + t508 = py3 * x3 * t142; + + t509 = y2 * (t118 + t471); + t510 = py2 * x2 * t147; + t511 = - py2 * t142; + + t512 = y2 * (t138 + t107); + t513 = t172 + t512 + t511; + t514 = px1 * t513; + + t515 = y2 * t259 + t142 * t255; + t516 = px1 * t515; + t517 = py1 * t454; + + t518 = - py2 * x3 * t142; + t519 = t108 + t384; + t520 = x2 * t519; + + t521 = y2 * (t520 + t307 + t135); + t522 = - py3 * x2 * t147; + t523 = py2 * t142; + + t524 = y2 * (t108 + t137); + t525 = - t147 + 2 * y2 * y3 - t142; + t526 = py1 * t525; + + t527 = x2 * t147 + y2 * (t473 + t452) + x3 * t142; + t528 = py1 * t527; + t529 = px1 * t474; + + t530 = px2 * x3 * t142; + t531 = px3 * x2 * t147; + + t532 = - x2 * t147 + y2 * (t453 + t472) - x3 * t142; + t533 = px1 * t532; + + t534 = - px2 * t142; + t535 = t147 - 2 * y2 * y3 + t142; + t536 = px1 * t535; + + t537 = t447 + t445; + t538 = py1 * t537; + t539 = t464 + t462; + t540 = px1 * t539; + + t541 = 2 * px3 * py2 * t2; + t542 = - 2 * px2 * py3 * t2; + t543 = x2 * t446 * y2; + + t544 = t5 * t205; + t545 = t544 + t543; + t546 = py1 * t545; + t547 = x2 * t463 * y2; + + t548 = t5 * t119; + t549 = t548 + t547; + t550 = px1 * t549; + t551 = x2 * t265; + + t552 = (t389 + t10 + t551) * y2; + t553 = t5 * t154; + t554 = 2 * px3 * t2; + + t555 = (t554 + t393 + t110) * y2; + t556 = t5 * t90; + t557 = py3 * x2 * x3; + + t558 = - 2 * py3 * t2; + t559 = (t558 + t399 + t557) * y2; + t560 = py2 * x2 * x3 * y3; + + t561 = t138 + t361; + t562 = t5 * t561; + t563 = t390 * t142; + t564 = t5 * t150; + + t565 = - px2 * t2 * t142; + t566 = - px3 * t5 * t147; + t567 = t566 + t214 + t565; + + t568 = py1 * t567; + t569 = py2 * t2 * t142; + t570 = x2 * y2 * (t118 + t135); + + t571 = py3 * t5 * t147; + t572 = t571 + t570 + t569; + t573 = px1 * t572; + t574 = t86 + t68; + + t575 = x2 * t574; + t576 = (t78 + t575) * y2; + t577 = 2 * px2 * x3 * y3; + + t578 = x2 * (t87 + t577); + t579 = px1 * t527; + + t580 = - t5 * t147 + 2 * x2 * x3 * y2 * y3 - t2 * t142; + t581 = px1 * t580; + t582 = t305 + t133; + + t583 = x2 * t582; + t584 = (t419 + t583) * y2; + t585 = x2 * (t136 + t306); + + t586 = py1 * t532; + t587 = - py3 * t2 * t142; + t588 = x2 * y2 * (t136 + t117); + + t589 = - py2 * t5 * t147; + t590 = t5 * t147 - 2 * x2 * x3 * y2 * y3 + t2 * t142; + + t591 = py1 * t590; + t592 = t400 + t466 + t465; + t593 = px1 * t592; + t594 = t309 + t279; + + t595 = t198 + t311; + t596 = x2 * t378; + t597 = t596 + t408 * y2; + t598 = py1 * t597; + + t599 = t256 + t116 * y2; + t600 = px1 * t599; + t601 = t178 + t366 + t534; + + t602 = py1 * t601; + t603 = t181 + t524 + t523; + t604 = px1 * t603; + t605 = t265 * t142; + + t606 = t423 + t144 + t143; + t607 = y2 * t606; + t608 = x2 * t150; + t609 = 2 * py2 * x3 * y3; + + t610 = t362 + t137; + t611 = x2 * t610; + t612 = y2 * (t611 + t118 + t609); + + t613 = py1 * t449; + t614 = t419 + t613 + t418 + t417; + t615 = py1 * t460; + + t616 = py1 * t535; + t617 = t616 + t172 + t512 + t511; + t618 = t134 + t304; + + t619 = t618 * t142; + t620 = - py3 * x2 * y3; + t621 = y2 * (t135 + t620); + + t622 = x2 * (t388 + t180); + t623 = px1 * t467; + t624 = t623 + t78 + t102 + t101; + + t625 = px1 * t483; + t626 = px1 * t525; + t627 = t167 + t626 + t354 + t353; + + t628 = - 2 * px2 * x3; + t629 = t98 + t628; + t630 = t629 * t142; + t631 = - 2 * px3 * t147; + + t632 = x2 * (t631 + t177); + t633 = - 2 * px2 * py3 * x3 * y3; + t634 = t633 + t197; + + t635 = - 2 * px3 * py2 * t147; + t636 = t142 * t403; + t637 = x2 * y2 * t173; + + t638 = t637 + t636; + t639 = px1 * t638; + t640 = t589 + t588 + t587; + t641 = px1 * t640; + + t642 = px1 * t590; + t643 = py1 * t580; + + t644 = (x0 * (px0 * (y1 * (x1 * (t528 + t522 + t612 + t518) + + t643 + t571 + t570 + t569) + + t29 * t515 + x1 * t638 + t1 * (t615 + t444 + t443 + t442)) + + py0 * (y1 * (x1 * (t533 + t531 + t331 + t530) + + t642 + t566 + t214 + t565) + + x1 * t234 + t29 * t379 + t1 * (t625 + t439 + t438 + t437)) + + y1 * (x1 * (px1 * (t622 + t621 + t619) + py1 * (t632 + t299 + t630) + + t608 + t607 + t605) + + t641 + t243 + t564 + t563) + + x1 * (t639 + t235 + x2 * y2 * (t284 + t635) + t142 * t634) + + t29 * (t175 + t170) + + t1 * (px1 * (t482 + t480 + t479) + py1 * (t459 + t79 + t458) + t434 + + t433 + t432)) + + y0 * (x0 * (py0 * (x1 * (t579 + t632 + t299 + t630) + + t489 + t29 * t627 + + y1 * (x1 * t597 + t625 + t556 + t112 + t555) + t488 + + t624 * t1) + + px0 * (x1 * (t586 + t622 + t621 + t619) + + t29 * t617 + t493 + + y1 * (x1 * t599 + t615 + t562 + t560 + t559) + t492 + + t614 * t1) + + x1 * (px1 * (t522 + t612 + t518) + py1 * (t531 + t331 + t530) + + t608 + t607 + t605) + + t29 * (t604 + t602) + t487 + + y1 * (x1 * (t600 + t598 + x2 * t595 + t594 * y2) + + px1 * (t585 + t480 + t584) + py1 * (t578 + t79 + t576) + t267 + + t553 + t552) + t486 + (t593 + t302) * t1) + + px0 * (x1 * (t591 + t589 + t588 + t587) + + t29 * (t586 + t510 + t509 + t508) + + y1 * (x1 * (t484 + t585 + t480 + t584) + t548 + t547) + t415 * t1) + + py0 * (x1 * (t581 + t241 + t240 + t239) + + t29 * (t579 + t507 + t250 + t506) + + y1 * (x1 * (t461 + t578 + t79 + t576) + t544 + t543) + t411 * t1) + + x1 * (t573 + t568 + t564 + t563) + + t29 * (px1 * (t522 + t521 + t518) + py1 * (t531 + t225 + t530) + t505 + + t504 + t502) + + y1 * (x1 * (px1 * (t562 + t560 + t559) + py1 * (t556 + t112 + t555) + + t267 + t553 + t552) + + t550 + t546 + t5 * (t322 + t273) + x2 * (t542 + t541) * y2) + + (t540 + t538) * t1) + + t161 * (py0 * (y1 * (x1 * (t536 + t178 + t366 + t534) + + t533 + t531 + t225 + t530) + + x1 * t169 + t208 + t1 * (t529 + t204 + t292 + t425) + t206) + + px0 * (y1 * (t528 + x1 * (t181 + t526 + t524 + t523) + t522 + t521 + + t518) + + x1 * t174 + t498 + t1 * (t517 + t118 + t428 + t427) + t497) + + x1 * (t516 + t383) + + y1 * (x1 * (t514 + t382) + px1 * (t510 + t509 + t508) + + py1 * (t507 + t250 + t506) + t505 + t504 + + t502) + t151 + + t1 * (px1 * (t136 + t471 + t470) + py1 * (t87 + t249 + t451) + t423 + + t422) + t146) + t501 + t496 + + t14 * (px0 * (x1 * (t484 + t482 + t480 + t479) + + t29 * (t475 + t136 + t471 + t470) + t404 + t402 + + (x1 * (t468 + t400 + t466 + t465) + t464 + t462) * y1) + + py0 * (x1 * (t461 + t459 + t79 + t458) + + t29 * (t455 + t87 + t249 + t451) + t396 + t395 + + (x1 * (t70 + t450 + t69 + t67) + t447 + t445) * y1) + + x1 * (px1 * (t444 + t443 + t442) + py1 * (t439 + t438 + t437) + t434 + + t433 + t432) + + t29 * (px1 * (t118 + t428 + t427) + py1 * (t204 + t292 + t425) + t423 + + t422) + t392 + t391 + + (x1 * (t421 + t104) + t416 + t412) * y1) + t407); + t645 = t5 * t265; + + t646 = t115 + t114 + t132; + t647 = px1 * t646; + t648 = x2 * t485; + t649 = t32 * t5; + + t650 = t70 + t393 + t73 * t5; + t651 = t400 + t399 + t106 * t5; + + t652 = t540 + x1 * (px1 * t651 + py1 * t650 + t389 + t10 + t649) + t538 + t648 + + t29 * (t647 + t357 + t20 + t19 + t18) + t645; + t653 = t648 + t645; + + t654 = t392 + t391; + t655 = px1 * t654; + t656 = t309 + t19; + t657 = x2 * t656; + + t658 = (t389 + t657) * y2; + t659 = px3 * py2 * t5 * y3; + t660 = x2 * (t144 + t273); + + t661 = - px3 * py2 * t5; + t662 = t431 + t27 + t661; + t663 = px1 * t662 + t24; + + t664 = t5 * t429; + t665 = x2 * t390; + t666 = t665 + t664; + t667 = px3 * py2 * x2; + + t668 = (t20 + t667) * y2; + t669 = x2 * t485 * y2; + t670 = t5 * t145; + t671 = t670 + t669; + + t672 = px1 * t671; + t673 = t26 + t52; + t674 = x2 * t673; + t675 = (t389 + t674) * y2; + + t676 = x2 * (t633 + t54); + t677 = px3 * t5; + t678 = t436 + t69 + t677; + + t679 = px1 * t678 + t37 + t60 + t59; + t680 = - px3 * x2; + + t681 = t203 + t298 + (t64 + t680) * y2; + t682 = px1 * t545; + t683 = - px3 * t5 * y3; + + t684 = t578 + t683 + (t393 + t575) * y2; + t685 = 2 * py3 * x3; + t686 = t685 + t476; + + t687 = 2 * py2 * t2; + t688 = px1 * (t419 + t441 + t131 * t5); + t689 = - px2 * py3 * x2; + + t690 = 2 * px2 * py3 * x3; + t691 = (t690 + t94 + t689) * y2; + + t692 = t330 + t204 + (t98 + t628 + t424) * y2; + t693 = t134 + t133 + t319; + + t694 = px1 * (t140 + t118 + t117 + t693 * y2); + t695 = (t542 + t43 + t9) * y2; + + t696 = t5 * t312; + t697 = 2 * px2 * t2; + t698 = t5 * t316 + t112 + (t78 + t697 + t110) * y2; + + t699 = x2 * t253; + t700 = t5 * t255; + t701 = x2 * t403; + + t702 = px1 * (t701 + t700 + (t419 + t441 + t699) * y2); + t703 = px2 * py3 * x2 * x3; + + t704 = (t10 + t703) * y2; + t705 = px3 * py2 * x2 * x3 * y3; + t706 = (t20 + t279 + t689) * y2; + + t707 = t439 + t111 + (t70 + t435) * y2; + t708 = t224 + t204 + (t296 + t64 + t424) * y2; + + t709 = - 2 * py2; + t710 = 2 * py3; + t711 = py1 * t678; + + t712 = t459 + t683 + (t393 + t457) * y2; + t713 = x2 * t116; + t714 = t5 * t139; + + t715 = px1 * (t120 + t714 + (t400 + t399 + t713) * y2); + t716 = 2 * px2 * py3; + + t717 = (t94 + (t716 + t15) * x2) * y2; + t718 = - 2 * px2; + + t719 = t221 + t128 + t249 + (t98 + (px3 + t718) * x2) * y2; + + t720 = px1 * (t256 + t136 + t135 + t646 * y2); + t721 = - px2 * py3 * t2 * t142; + + t722 = - px3 * py2 * t5 * t147; + t723 = t722 + t237 + t721; + t724 = - px2 * py3 * x3 * t142; + + t725 = y2 * (t54 + t124); + t726 = px1 * y2 * t257; + t727 = - px3 * py2 * x2 * t147; + + t728 = y2 * (t87 + t127); + t729 = t531 + t728 + t530; + t730 = px2 * py3 * t2 * t142; + + t731 = px3 * py2 * t5 * t147; + t732 = px1 * t397; + t733 = t251 + t299 + t248; + + t734 = px2 * t2 * t142; + t735 = px3 * t5 * t147; + t736 = t735 + t240 + t734; + + t737 = t389 + t10 + t649; + t738 = t731 + t189 + t730; + t739 = px1 * t738; + + t740 = x2 * t165; + t741 = t740 + t204 + t203; + t742 = py1 * y2 * t741; + t743 = py1 * t736; + + t744 = px2 * py3 * t142; + t745 = px1 * t567; + t746 = t148 + t364 + t744; + + t747 = px3 * py2 * t5; + t748 = t389 + t95 + t747; + t749 = (t26 + t122) * y2; + + t750 = x2 * t280; + t751 = (t431 + t750) * y2; + t752 = - px3 * py2 * t5 * y3; + + t753 = x2 * (t322 + t143); + t754 = - px3 * t5; + t755 = t393 + t102 + t754; + + t756 = t128 + t292 + (t68 + t126) * y2; + t757 = x2 * t297; + t758 = x2 * (t204 + t327); + + t759 = t758 + t438 + (t436 + t757) * y2; + t760 = (t94 + t667) * y2; + + t761 = t203 + t249 + (t98 + t680) * y2; + t762 = px1 * (t140 + t253 * y2); + + t763 = - px3 * py2 * x2 * x3; + t764 = (t43 + t763) * y2; + t765 = - px2 * py3 * x2 * x3 * y3; + + t766 = px3 * x2 * x3; + t767 = px2 * x2 * x3 * y3; + t768 = t767 + t79 + (t78 + t766) * y2; + + t769 = px1 * (t120 + t700 + (t419 + t441 + t713) * y2); + t770 = t501 + t496 + t407; + + t771 = px3 * py2 * x3 * t142; + t772 = y2 * (t313 + t633 + t54); + + t773 = px2 * py3 * x2 * t147; + t774 = - px3 * py2 * t142; + t775 = t149 + t346 + t774; + + t776 = y2 * (t317 + t87 + t577); + t777 = t507 + t776 + t506; + t778 = px3 * t142; + + t779 = t177 + t354 + t778; + t780 = y2 * (t144 + t272); + t781 = y2 * (t203 + t292); + + t782 = t531 + t781 + t530; + t783 = px1 * (t336 + t258 + t333); + t784 = t690 + t94; + + t785 = x2 * t784; + t786 = (t431 + t785) * y2; + t787 = x2 * (t125 + t197); + + t788 = x2 * t629; + t789 = x2 * (t221 + t128); + t790 = t789 + t438 + (t436 + t788) * y2; + + t791 = - 2 * py2 * t2; + t792 = 2 * py3 * t2; + t793 = 2 * px2 * py3 * t2; + + t794 = (t793 + t10 + t42) * y2; + t795 = t5 * t324; + t796 = - 2 * px2 * t2; + + t797 = t5 * t329 + t80 + (t70 + t796 + t77) * y2; + + t798 = px1 * (t701 + t714 + (t400 + t399 + t699) * y2); + + t799 = px1 * (t5 * t259 + t401 * t142); + t800 = t429 * y2; + t801 = t503 + t800; + + t802 = t487 + t486; + t803 = t673 * t142; + t804 = - 2 * px2 * py3 * t147; + + t805 = x2 * (t804 + t148); + t806 = 2 * px2 * t147; + + t807 = x2 * (t178 + t806) + t728 + t574 * t142; + t808 = py1 * t755; + t809 = py1 * t779; + + t810 = y2 * (t58 + t144 + t273); + t811 = y2 * (t91 + t293 + t203); + + t812 = t507 + t811 + t506; + t813 = px1 * (t260 + t335 + t254); + t814 = 2 * py2 * t147; + + t815 = - 2 * py3 * t147; + t816 = (t389 + t42) * y2; + t817 = - py2 * py3 * t2; + + t818 = (t817 + py2 * py3 * x2 * x3) * y2; + t819 = - py2 * py3 * t5 * y3; + + t820 = py2 * py3 * x2 * x3 * y3; + t821 = px1 * (t820 + t819 + t818); + t822 = - py2 * py3 * t5; + + t823 = 2 * py2 * py3 * x2 * x3; + t824 = px1 * (t817 + t823 + t822); + t825 = (t431 + t9) * y2; + + t826 = py2 * py3 * t2; + t827 = (t826 - py2 * py3 * x2 * x3) * y2; + t828 = py2 * py3 * t5 * y3; + + t829 = - py2 * py3 * x2 * x3 * y3; + t830 = px1 * (t829 + t828 + t827); + + t831 = (py2 * py3 * x2 - py2 * py3 * x3) * y2; + t832 = - py2 * py3 * x2 * y3; + + t833 = py2 * py3 * x3 * y3; + t834 = px1 * (t833 + t832 + t831); + + t835 = (t690 + t94 + t122) * y2; + t836 = px1 * t693; + t837 = - py2 * t5 * y3; + + t838 = t560 + t837 + (t400 + t557) * y2; + t839 = x2 * t205; + + t840 = py1 * (t839 + x2 * t408 * y2); + t841 = (t20 + t51) * y2; + t842 = - py3 * x2; + + t843 = py2 * x2 * y3; + t844 = t135 + t843 + (t115 + t842) * y2; + + t845 = py1 * (t740 + t87 + t128 + (t98 + t68 + t63) * y2); + t846 = py2 * py3 * t5; + + t847 = - 2 * py2 * py3 * x2 * x3; + t848 = - py2 * x2 * x3; + t849 = - py3 * x2 * x3 * y3; + + t850 = t849 + t480 + (t419 + t848) * y2; + t851 = (py2 * py3 * x3 - py2 * py3 * x2) * y2; + + t852 = py2 * py3 * x2 * y3; + t853 = - py2 * py3 * x3 * y3; + t854 = x2 * t561; + + t855 = t854 + t136 + (t305 + t133 + t469) * y2; + t856 = py2 * py3 * t2 * t142; + + t857 = - 2 * py2 * py3 * x2 * x3 * y2 * y3; + t858 = py2 * py3 * t5 * t147; + + t859 = px1 * (t858 + t857 + t856); + t860 = - py2 * py3 * x3 * t142; + + t861 = y2 * (t833 + t852); + t862 = - py2 * py3 * x2 * t147; + + t863 = px1 * (t862 + t861 + t860); + t864 = - py2 * py3 * t2 * t142; + + t865 = 2 * py2 * py3 * x2 * x3 * y2 * y3; + t866 = - py2 * py3 * t5 * t147; + + t867 = py3 * t2 * t142; + t868 = py2 * t5 * t147; + t869 = t868 + t570 + t867; + + t870 = py2 * py3 * x3 * t142; + t871 = y2 * (t853 + t832); + t872 = py2 * py3 * x2 * t147; + + t873 = - py3 * x3 * t142; + t874 = - py2 * x2 * t147; + t875 = t874 + t521 + t873; + + t876 = py2 * x3 * t142; + t877 = py3 * x2 * t147; + t878 = t877 + t509 + t876; + + t879 = t287 * t142; + t880 = t596 + t87 + t128; + t881 = y2 * t880; + t882 = x2 * t207; + + t883 = py1 * (t882 + t881 + t879); + t884 = py1 * t662; + + t885 = px1 * (t826 + t847 + t846); + t886 = 2 * px3 * py2; + + t887 = (t94 + (t31 + t886) * x2) * y2; + t888 = px1 * (t853 + t852 + t851); + + t889 = py1 * t738; + t890 = px1 * (t866 + t865 + t864); + + t891 = px1 * (t872 + t871 + t870); + t892 = t656 * t142; + t893 = x2 * (t157 + t635); + + t894 = t221 + t577; + t895 = x2 * t253 * y2; + t896 = t701 + t895; + t897 = px1 * t896; + + t898 = (t20 + t279 + t122) * y2; + + t899 = py1 * (t596 + t204 + t203 + (t65 + t64 + t97) * y2); + t900 = t385 + t107; + + t901 = x2 * t900; + t902 = t901 + t136 + (t115 + t476 + t469) * y2; + t903 = px1 * t869; + + t904 = t874 + t612 + t873; + t905 = t408 * t142; + t906 = y2 * t741; + t907 = x2 * t168; + + t908 = py1 * (t907 + t906 + t905); + t909 = - py2 * py3 * t142; + + t910 = 2 * py2 * py3 * y2 * y3; + t911 = - py2 * py3 * t147; + + t912 = px1 * (t911 + t910 + t909); + t913 = t912 + py1 * t376; + + t914 = t481 + t117 + t428 + (t133 + (py3 + t709) * x2) * y2; + t915 = 2 * px3; + + t916 = t138 + t137 + t131 * y2; + t917 = px1 * t916; + + t918 = py1 * (t167 + t166 + t73 * t142); + t919 = py3 * t142; + t920 = t171 + t524 + t919; + + t921 = px1 * t920; + t922 = py2 * py3 * t142; + t923 = - 2 * py2 * py3 * y2 * y3; + + t924 = py2 * py3 * t147; + t925 = py1 * t513 + t924 + t923 + t922; + t926 = py1 * t420; + + t927 = py1 * t640; + t928 = t685 + t114; + t929 = x2 * (t172 + t814) + t621 + t928 * t142; + + t930 = px1 * (t924 + t923 + t922); + t931 = t930 + py1 * t347; + + t932 = py1 * t920 + t911 + t910 + t909; + t933 = t315 + t222; + t934 = py1 * t654; + + t935 = (t10 + t750) * y2; + t936 = t824 + py1 * t263; + t937 = py1 * t671; + + t938 = (t19 + t689) * y2; + t939 = (t10 + t785) * y2; + t940 = t296 + t314; + + t941 = py1 * (t78 + t436 + t62 * t5); + t942 = (t26 + t52 + t667) * y2; + + t943 = py1 * (t740 + t204 + t203 + t99 * y2); + + t944 = t611 + t118 + (t134 + t304 + t426) * y2; + t945 = (t431 + t541 + t42) * y2; + + t946 = t5 * t199; + t947 = t5 * t900 + t560 + (t419 + t791 + t557) * y2; + t948 = x2 * t287; + + t949 = t5 * t378; + t950 = py1 * (t289 + t949 + (t78 + t436 + t948) * y2); + + t951 = - py3 * t5; + t952 = t441 + t466 + t951; + t953 = py1 * t952 + t826 + t847 + t846; + + t954 = py3 * x2; + t955 = t117 + t620 + (t114 + t954) * y2; + t956 = py1 * t549; + + t957 = py3 * t5 * y3; + t958 = t585 + t957 + (t399 + t583) * y2; + t959 = (t389 + t763) * y2; + + t960 = (t309 + t19 + t667) * y2; + t961 = - 2 * px3; + t962 = px1 * t952; + t963 = x2 * t408; + + t964 = t5 * t165; + t965 = py1 * (t839 + t964 + (t70 + t393 + t963) * y2); + + t966 = t482 + t957 + (t399 + t478) * y2; + t967 = - 2 * px3 * py2; + + t968 = (t26 + (t16 + t967) * x2) * y2; + + t969 = t307 + t135 + t471 + (t134 + (t130 + t359) * x2) * y2; + + t970 = py1 * (t596 + t87 + t128 + t66 * y2); + t971 = t444 + t837 + (t400 + t440) * y2; + + t972 = t520 + t118 + (t685 + t114 + t426) * y2; + t973 = py1 * t405; + + t974 = t877 + t621 + t876; + t975 = - py2 * t2 * t142; + t976 = - py3 * t5 * t147; + + t977 = t976 + t588 + t975; + t978 = py1 * y2 * t880; + t979 = y2 * (t136 + t843); + + t980 = t522 + t979 + t518; + t981 = py1 * t276; + t982 = py1 * t572; + t983 = px1 * y2 * t334; + + t984 = px1 * t977; + t985 = (t94 + t51) * y2; + t986 = (t43 + t657) * y2; + + t987 = (t26 + t689) * y2; + t988 = t117 + t471 + (t134 + t954) * y2; + + t989 = py1 * (t740 + t287 * y2); + t990 = (t431 + t703) * y2; + t991 = - py3 * x2 * x3; + + t992 = - py2 * x2 * x3 * y3; + t993 = t992 + t480 + (t419 + t991) * y2; + + t994 = py1 * (t839 + t949 + (t78 + t436 + t963) * y2); + t995 = py3 * t5; + + t996 = t399 + t418 + t995; + t997 = t135 + t428 + (t133 + t842) * y2; + t998 = x2 * t928; + + t999 = x2 * (t118 + t609); + t1000 = t999 + t443 + (t441 + t998) * y2; + + t1001 = y2 * (t901 + t136 + t306); + t1002 = t510 + t1001 + t508; + t1003 = - py3 * t142; + + t1004 = t180 + t512 + t1003; + t1005 = y2 * (t117 + t428); + t1006 = t522 + t1005 + t518; + + t1007 = py1 * (t907 + t881 + t905); + t1008 = y2 * (t854 + t481 + t117); + + t1009 = t510 + t1008 + t508; + t1010 = 2 * px3 * t147; + + t1011 = py1 * (t5 * t207 + t394 * t142); + t1012 = t784 * t142; + + t1013 = 2 * px3 * py2 * t147; + t1014 = x2 * (t149 + t1013); + + t1015 = py1 * (t882 + t906 + t879); + t1016 = x2 * (t181 + t387) + t979 + t582 * t142; + + t1017 = (t43 + t674) * y2; + t1018 = x2 * t618; + t1019 = x2 * (t307 + t135); + + t1020 = t1019 + t443 + (t441 + t1018) * y2; + t1021 = - 2 * px3 * t2; + + t1022 = - 2 * px3 * py2 * t2; + t1023 = (t389 + t1022 + t9) * y2; + t1024 = t5 * t57; + + t1025 = t5 * t610 + t849 + (t400 + t687 + t848) * y2; + + t1026 = py1 * (t289 + t964 + (t70 + t393 + t948) * y2); + t1027 = px1 * t996; + + t1028 = px1 * t1004; + t1029 = x2 * t429 * y2; + t1030 = (t436 + t110) * y2; + + t1031 = (t441 + t557) * y2; + t1032 = (t393 + t77) * y2; + t1033 = (t399 + t848) * y2; + + t1034 = (t26 + t94 + t18) * y2; + t1035 = (t64 + t85) * y2; + t1036 = (t114 + t469) * y2; + + t1037 = (t98 + t628 + t126) * y2; + t1038 = (t134 + t304 + t842) * y2; + + t1039 = (t20 + t19 + t96) * y2; + t1040 = (t296 + t64 + t126) * y2; + + t1041 = (t685 + t114 + t842) * y2; + t1042 = (t98 + (t961 + px2) * x2) * y2; + + t1043 = t456 * t142; + t1044 = x2 * (t1010 + t166); + + t1045 = (t134 + (t710 + t105) * x2) * y2; + t1046 = t477 * t142; + + t1047 = x2 * (t815 + t171); + t1048 = t32 * t142; + t1049 = t171 + t526 + t524 + t919; + + t1050 = t536 + t166 + t366 + t365; + t1051 = (t389 + t10 + t430) * y2; + + t1052 = (t393 + t766) * y2; + t1053 = (t399 + t991) * y2; + t1054 = t17 * t5; + + t1055 = (t431 + t43 + t551) * y2; + t1056 = (t1021 + t436 + t77) * y2; + t1057 = t5 * t223; + + t1058 = (t792 + t441 + t848) * y2; + t1059 = t5 * t519; + t1060 = t338 * y2; + + t1061 = (t86 + t68 + t680) * y2; + t1062 = (t305 + t133 + t954) * y2; + + t1063 = (t115 + t426) * y2; + t1064 = (t400 + t1018) * y2; + t1065 = (t65 + t424) * y2; + + t1066 = (t70 + t788) * y2; + t1067 = (t70 + t757) * y2; + t1068 = (t400 + t998) * y2; + + t1069 = t21 * y2; + t1070 = (t68 + (t915 + t61) * x2) * y2; + + t1071 = (t133 + (t360 + py2) * x2) * y2; + t1072 = (t115 + t476 + t954) * y2; + + t1073 = (t65 + t314 + t680) * y2; + + trans->m[0][0] + = (x0 * (px0 * (x1 * (px1 * (y2 * (t388 + t387) + t142 * t386) + + t383 + t372 + t371) + + y1 * (x1 * (t369 + t382 + t156 + t346 + t345) + + t337 + py1 * t301 + t285 + t283 + t281) + t381 + t151 + + t1 * (t141 + py1 * t92 + t58 + t54 + t53) + t146) + + py0 * (y1 * (x1 * t380 + px1 * t332 + t219 + t218 + t217) + + px1 * t234 + px1 * x1 * t379 + t1 * (px1 * t129 + t49 + t48 + t47)) + + y1 * (x1 * t377 + px1 * (t202 + t326 + t196) + t195) + px1 * t374 + + px1 * x1 * t373 + t1 * (px1 * (t125 + t124 + t123) + t269)) + + y0 * (x0 * (px0 * (t261 + x1 * (t369 + t368 + t157 + t364 + t363) + py1 * t227 + + t202 + + y1 + * (x1 + * (px1 * (t362 + t361 + (t360 + t359) * y2) + + t358 + t153 + t55 + t339) + + t320 + py1 * t294 + t144 + t273 + t272 + t271) + + t201 + t196 + (t357 + t20 + t19 + t18) * t1) + + py0 * (x1 * t356 + px1 * t252 + t194 + + y1 * (px1 * t318 + px1 * x1 * t349 + t84 + t83 + t82) + + t193 + t192 + px1 * t99 * t1) + x1 * t348 + + px1 * (t247 + t246 + t244) + t278 + + y1 * (px1 * (t313 + t54 + t310) + t50 + px1 * x1 * t340) + + px1 * t338 * t1) + + px0 * (x1 * (t337 + py1 * t332 + t202 + t326 + t196) + + t321 + px1 * t29 * t182 + t190 + + y1 * (x1 * (t320 + py1 * t318 + t313 + t54 + t310) + + px1 * (x2 * t308 + x2 * (t305 + t304) * y2) + t303 + t267 + + t266) + t189 + t188 + (t302 + t10 + t27 + t25) * t1) + + py0 * (x1 * (px1 * t301 + t194 + t193 + t192) + + t295 + px1 * t29 * t179 + t186 + + y1 * (x1 * (px1 * t294 + t49 + t48 + t47) + px1 * t290) + t185 + t184 + + (t286 + t3 + t23 + t22) * t1) + + x1 * (px1 * (t285 + t283 + t281) + t278) + t277 + t275 + px1 * t29 * t274 + + y1 * (x1 * (px1 * (t144 + t273 + t272 + t271) + t269) + px1 * t268) + + (t264 + t262) * t1) + + px0 * (y1 * (x1 * (t261 + py1 * t252 + t247 + t246 + t244) + + t243 + t238 + t237 + t236) + + x1 * (t235 + t230 + t229) + px1 * t29 * t174 + + t1 * (t121 + py1 * t81 + t46 + t45 + t44)) + + py0 * (y1 * (x1 * (px1 * t227 + t219 + t218 + t217) + + px1 * t216 + t212 + t211 + t210) + + px1 * t29 * t169 + px1 * x1 * t209 + t1 * (px1 * t113 + t40 + t39 + t38)) + + y1 * (x1 * (px1 * (t202 + t201 + t196) + t195) + px1 * t191 + t187) + + px0 * t161 * t183 + px1 * t29 * t160 + px1 * x1 * t152 + + t14 * (px0 * (x1 * (t141 + py1 * t129 + t125 + t124 + t123) + + t121 + py1 * t113 + px1 * t29 * t109 + t13 + t12 + t11 + + (t104 + t43 + x1 * (t100 + t26 + t94 + t96) + t95 + t93) * y1) + + py0 * (x1 * (px1 * t92 + t84 + t83 + t82) + + px1 * t81 + px1 * t29 * t76 + t7 + t6 + t4 + + (px1 * t71 + t37 + px1 * x1 * t66 + t60 + t59) * y1) + + x1 * (px1 * (t58 + t54 + t53) + t50) + px1 * (t46 + t45 + t44) + t41 + + px1 * t29 * t36 + (px1 * t28 + t24 + px1 * x1 * t21) * y1) + + t1 * (px1 * (t13 + t12 + t11) + t8)); + + trans->m[0][1] = + (t161 * (px0 * (x1 * (t382 + t156 + t346 + t345) + + py1 * t733 + t247 + + y1 * (t694 + x1 * (t358 + t153 + t55 + t339) + py1 * t681 + + t144 + t282 + t668) + t726 + t283 + t244 + + px1 * t646 * t1) + + py0 * (x1 * (px1 * t601 + t343 + t342 + t341) + + px1 * t729 + t219 + + y1 * (px1 * t692 + px1 * x1 * t76 + t49 + t48 + t47) + t218 + + t217 + px1 * t66 * t1) + x1 * (px1 * t746 + t375) + + px1 * (t727 + t725 + t724) + t195 + + y1 * (px1 * (t325 + t143 + t691) + t269 + px1 * x1 * t36) + + px1 * t21 * t1) + + x0 * (py0 * (t29 * t356 + t745 + t212 + + y1 + * (x1 * (px1 * t719 + t84 + t83 + t82) + + px1 * t698 + t40 + t39 + t38) + px1 * x1 * y2 * t741 + + t211 + t210 + px1 * t650 * t1) + + px0 * (t29 * (t602 + t148 + t364 + t744) + + t743 + t722 + + y1 * (x1 * (t720 + py1 * t708 + t200 + t143 + t706) + + t702 + py1 * t684 + t676 + t659 + t675) + + x1 * (t607 + px1 * y2 * (x2 * (t362 + t361) + t481 + t609) + t742) + + t237 + t721 + px1 * t651 * t1) + t29 * t348 + t739 + t187 + + y1 * (x1 * (px1 * (t125 + t197 + t245 + t717) + t50) + + px1 * (t696 + t13 + t695) + t8) + px1 * x1 * y2 * t606 + + px1 * t737 * t1) + + py0 * (x1 * (px1 * t736 + t186 + t185 + t184) + + t29 * (px1 * t733 + t194 + t193 + t192) + + y1 * (x1 * (px1 * t712 + t7 + t6 + t4) + t732) + px1 * t537 * t1) + + px0 * (x1 * (t568 + t731 + t189 + t730) + + t29 * (py1 * t729 + t727 + t726 + t725 + t724) + + y1 * (x1 * (t715 + py1 * t707 + t705 + t12 + t704) + t546 + t670 + t669) + + px1 * t539 * t1) + x1 * (px1 * t723 + t275) + + t29 * (px1 * (t247 + t283 + t244) + t278) + + y0 * (x0 * (px0 * (x1 * (t720 + py1 * t719 + t125 + t197 + t245 + t717) + + t715 + py1 * t712 + t29 * (t162 + t35 + t34 + t33) + t660 + + t659 + t658 + + (t688 + t711 + t431 + + x1 + * (px1 * (t305 + t304 + (t710 + t709) * x2) + + t100 + t26 + t94 + t96) + t27 + t661) + * y1) + + py0 * (x1 * (px1 * t708 + t49 + t48 + t47) + + px1 * t707 + px1 * t29 * t349 + t40 + t39 + t38 + + (t286 + t3 + px1 * x1 * t99 + t23 + t22) * y1) + + x1 * (px1 * (t200 + t143 + t706) + t269) + px1 * (t705 + t12 + t704) + + t8 + px1 * t29 * t340 + (t264 + t262 + px1 * x1 * t338) * y1) + + px0 * (x1 * (t702 + py1 * t698 + t696 + t13 + t695) + + t29 * (t694 + py1 * t692 + t325 + t143 + t691) + t398 + t392 + t391 + + (x1 * (t688 + t104 + t43 + t95 + t93) + + px1 * (x2 * (t558 + t687) + t5 * t686) + t412 + t665 + t664) + * y1) + + py0 * (x1 * (px1 * t684 + t7 + t6 + t4) + t682 + + t29 * (px1 * t681 + t84 + t83 + t82) + + (px1 * t411 + x1 * t679) * y1) + + x1 * (px1 * (t676 + t659 + t675) + t41) + t672 + + t29 * (px1 * (t144 + t282 + t668) + t50) + (px1 * t666 + x1 * t663) * y1) + + y1 * (x1 * (px1 * (t660 + t659 + t658) + t41) + t655) + px1 * t653 * t1 + + px0 * t652 * t14) + ; + + trans->m[0][2] = + (x0 * (px0 * (y1 * (x1 * (t813 + py1 * t807 + t805 + t725 + t803) + + t799 + t568 + t731 + t189 + t730) + + x1 * (px1 * (x2 * y2 * (t815 + t814) + t142 * t308) + + t235 + t230 + t229) + t29 * (t170 + t159 + t155) + + t1 * (t769 + py1 * t759 + t753 + t752 + t751)) + + py0 * (y1 * (x1 * (px1 * t812 + t194 + t193 + t192) + + t295 + t186 + t185 + t184) + + px1 * x1 * t234 + px1 * t29 * t379 + + t1 * (px1 * t768 + t7 + t6 + t4)) + + y1 * (x1 * (px1 * (t773 + t810 + t771) + t278) + t277 + t275) + + px1 * x1 * t374 + px1 * t29 * t373 + + t1 * (px1 * (t765 + t45 + t764) + t41)) + + y0 * (x0 * (px0 * (x1 * (t813 + py1 * t812 + t773 + t810 + t771) + + t495 + t29 * (t809 + t149 + t346 + t774) + + y1 + * (x1 + * (px1 * (x2 * t386 + t686 * y2) + + t598 + t503 + t800) + + t798 + py1 * t790 + t787 + t752 + t786) + + (t808 + t389 + t95 + t747) * t1) + + py0 * (x1 * (px1 * t807 + t219 + t218 + t217) + + px1 * t490 + t29 * t380 + + y1 * (px1 * x1 * t597 + px1 * t797 + t7 + t6 + t4) + + t679 * t1) + + x1 * (px1 * (t805 + t725 + t803) + t195) + px1 * t802 + + t29 * t377 + + y1 * (px1 * x1 * t801 + px1 * (t795 + t46 + t794) + t41) + + t663 * t1) + + px0 * (x1 * (t799 + t243 + t238 + t237 + t236) + + t29 * (t783 + py1 * t777 + t773 + t772 + t771) + + y1 * (x1 * (t798 + py1 * t797 + t795 + t46 + t794) + + px1 * (t5 * (t481 + t609) + x2 * (t792 + t791) * y2) + + t546 + t670 + t669) + (t538 + t648 + t645) * t1) + + py0 * (x1 * (t745 + t212 + t211 + t210) + + t29 * (px1 * t782 + t219 + t218 + t217) + + y1 * (x1 * (px1 * t790 + t40 + t39 + t38) + t682) + + px1 * t411 * t1) + x1 * (t739 + t187) + + t29 * (px1 * (t727 + t780 + t724) + t195) + + y1 * (x1 * (px1 * (t787 + t752 + t786) + t8) + t672) + + px1 * t666 * t1) + + t161 * (px0 * (y1 + * (t783 + x1 * (t368 + t157 + t364 + t363) + py1 * t782 + + t727 + t780 + t724) + + x1 * (t383 + t372 + t371) + t500 + + t1 * (t762 + py1 * t756 + t125 + t272 + t749)) + + py0 * (y1 + * (x1 * (px1 * t779 + t352 + t351 + t350) + + px1 * t777 + t194 + t193 + t192) + + px1 * x1 * t169 + px1 * t209 + + t1 * (px1 * t761 + t84 + t83 + t82)) + + y1 * (x1 * (px1 * t775 + t344) + px1 * (t773 + t772 + t771) + + t278) + px1 * x1 * t160 + + px1 * t152 + t1 * (px1 * (t144 + t245 + t760) + t50)) + + px0 * t770 + + t14 * (px0 * (x1 * (t769 + py1 * t768 + t765 + t45 + t764) + + t29 * (t762 + py1 * t761 + t144 + t245 + t760) + t406 + + (t412 + x1 * (t711 + t431 + t27 + t661) + t665 + t664) + * y1) + + py0 * (x1 * (px1 * t759 + t40 + t39 + t38) + + t732 + t29 * (px1 * t756 + t49 + t48 + t47) + + (px1 * t537 + x1 * (px1 * t755 + t3 + t23 + t22)) * y1) + + x1 * (px1 * (t753 + t752 + t751) + t8) + t655 + + t29 * (px1 * (t125 + t272 + t749) + t269) + + (x1 * (px1 * t748 + t262) + px1 * t653) * y1)); + + trans->m[1][0] = (x0 * (py0 * (x1 * (t516 + py1 * (y2 * (t631 + t806) + t142 * t933) + t372 + + t371) + + y1 * (px1 * t929 + x1 * (t514 + t918 + t157 + t364 + t363) + t908 + + t893 + t725 + t892) + t500 + t151 + + t1 * (px1 * t855 + t845 + t325 + t125 + t835) + t146) + + px0 * (y1 * (x1 * t932 + py1 * t904 + t872 + t871 + t870) + + py1 * x1 * t515 + py1 * t638 + + t1 * (py1 * t844 + t833 + t832 + t831)) + + y1 * (x1 * t931 + t863 + py1 * (t247 + t810 + t244)) + py1 * t374 + + py1 * x1 * t373 + t1 * (t888 + py1 * (t54 + t282 + t841))) + + y0 * (px0 * (x1 * (py1 * t929 + t862 + t861 + t860) + + t927 + py1 * t29 * t182 + t858 + + y1 * (py1 * t896 + x1 * (py1 * t914 + t833 + t832 + t831)) + t857 + + t856 + (t926 + t817 + t823 + t822) * t1) + + x0 * (px0 * (x1 * t925 + py1 * t878 + t862 + + y1 + * (py1 * t902 + py1 * x1 * t916 + t853 + t852 + + t851) + t861 + t860 + + py1 * t693 * t1) + + py0 * (x1 * (t921 + t918 + t156 + t346 + t345) + + t883 + px1 * t875 + t247 + + y1 * (x1 * (t917 + py1 * (t328 + t88 + (t915 + t718) * y2) + + t153 + t55 + t339) + + t899 + px1 * t914 + t322 + t143 + t245 + t887) + t772 + + t244 + (t647 + t20 + t19 + t18) * t1) + x1 * t913 + t891 + + py1 * (t202 + t780 + t196) + + y1 * (py1 * (t200 + t125 + t898) + t834 + py1 * x1 * t340) + + py1 * t338 * t1) + + py0 * (x1 * (t908 + px1 * t904 + t247 + t810 + t244) + + t903 + py1 * t29 * t179 + t722 + + y1 * (x1 * (px1 * t902 + t899 + t200 + t125 + t898) + + t897 + py1 * (x2 * t894 + x2 * (t86 + t628) * y2) + t267 + + t266) + t237 + t721 + (t593 + t389 + t95 + t747) * t1) + + x1 * (py1 * (t893 + t725 + t892) + t891) + t890 + t889 + py1 * t29 * t274 + + y1 * (x1 * (t888 + py1 * (t322 + t143 + t245 + t887)) + py1 * t268) + + (t885 + t884) * t1) + + py0 * (y1 * (x1 * (t883 + px1 * t878 + t202 + t780 + t196) + + t641 + t731 + t189 + t730) + + x1 * (t639 + t230 + t229) + py1 * t29 * t169 + + t1 * (t840 + px1 * t850 + t13 + t752 + t825)) + + px0 * (y1 * (x1 * (py1 * t875 + t872 + t871 + t870) + + py1 * t869 + t866 + t865 + t864) + + py1 * x1 * t499 + py1 * t29 * t174 + + t1 * (py1 * t838 + t829 + t828 + t827)) + + y1 * (x1 * (t863 + py1 * (t247 + t772 + t244)) + t859 + py1 * t723) + + py0 * t161 * t183 + py1 * t29 * t160 + py1 * x1 * t152 + + t14 * (px0 * (x1 * (py1 * t855 + t853 + t852 + t851) + + py1 * t850 + py1 * t29 * t109 + t820 + t819 + t818 + + (py1 * t592 + t826 + py1 * x1 * t646 + t847 + t846) * y1) + + py0 * (x1 * (t845 + px1 * t844 + t54 + t282 + t841) + + t840 + px1 * t838 + py1 * t29 * t76 + t46 + t659 + t816 + + (t421 + t431 + x1 * (t836 + t26 + t94 + t96) + t27 + t661) * y1) + + x1 * (py1 * (t325 + t125 + t835) + t834) + t830 + + py1 * (t13 + t752 + t825) + py1 * t29 * t36 + + (t824 + py1 * t748 + py1 * x1 * t21) * y1) + + t1 * (t821 + py1 * (t46 + t659 + t816))) + ; + + trans->m[1][1] = (t161 * (px0 * (x1 * (py1 * t603 + t911 + t910 + t909) + + py1 * t980 + t872 + + y1 * (py1 * t944 + py1 * x1 * t109 + t833 + t832 + t831) + t871 + + t870 + py1 * t646 * t1) + + py0 * (x1 * (t514 + t157 + t364 + t363) + + px1 * t974 + t202 + + y1 * (x1 * (t917 + t153 + t55 + t339) + + t943 + px1 * t955 + t143 + t124 + t938) + t978 + t725 + + t196 + py1 * t66 * t1) + x1 * (t930 + py1 * t775) + t863 + + py1 * (t773 + t283 + t771) + + y1 * (py1 * (t58 + t144 + t942) + t888 + py1 * x1 * t36) + + py1 * t21 * t1) + + x0 * (py0 * (t29 * (t604 + t149 + t346 + t774) + + t984 + t190 + + y1 * (x1 * (px1 * t972 + t970 + t313 + t144 + t960) + + px1 * t958 + t950 + t787 + t12 + t939) + + x1 * (t607 + t983 + py1 * y2 * (x2 * (t328 + t88) + t293 + t327)) + + t189 + t188 + py1 * t650 * t1) + + px0 * (t29 * t925 + t982 + t866 + + y1 + * (x1 * (py1 * t969 + t853 + t852 + t851) + + py1 * t947 + t829 + t828 + t827) + + py1 * x1 * y2 * t334 + t865 + t864 + py1 * t651 * t1) + + t29 * t913 + t859 + t981 + + y1 * (x1 * (t834 + py1 * (t633 + t54 + t272 + t968)) + + py1 * (t946 + t46 + t945) + t821) + py1 * x1 * y2 * t606 + + py1 * t737 * t1) + + py0 * (x1 * (t573 + t238 + t237 + t236) + + t29 * (px1 * t980 + t773 + t978 + t283 + t771) + + y1 * (x1 * (t965 + px1 * t971 + t765 + t659 + t959) + t550 + t670 + t669) + + py1 * t537 * t1) + + px0 * (x1 * (py1 * t977 + t858 + t857 + t856) + + t29 * (py1 * t974 + t862 + t861 + t860) + + y1 * (x1 * (py1 * t966 + t820 + t819 + t818) + t973) + py1 * t539 * t1) + + x1 * (t890 + py1 * t191) + t29 * (t891 + py1 * (t202 + t725 + t196)) + + y0 * (x0 * (px0 * (x1 * (py1 * t972 + t833 + t832 + t831) + + py1 * t971 + py1 * t29 * t916 + t829 + t828 + t827 + + (t926 + t817 + py1 * x1 * t693 + t823 + t822) * y1) + + py0 * (x1 * (t970 + px1 * t969 + t633 + t54 + t272 + t968) + + px1 * t966 + t965 + t29 * (t163 + t35 + t34 + t33) + t753 + t12 + + t935 + + (t962 + t941 + t43 + + x1 + * (t836 + py1 * (t86 + t628 + (t961 + t291) * x2) + + t26 + t94 + t96) + t95 + t93) + * y1) + x1 * (py1 * (t313 + t144 + t960) + t888) + t821 + + py1 * (t765 + t659 + t959) + py1 * t29 * t340 + + (t885 + t884 + py1 * x1 * t338) * y1) + + px0 * (x1 * (py1 * t958 + t820 + t819 + t818) + + t956 + t29 * (py1 * t955 + t853 + t852 + t851) + + (py1 * t415 + x1 * t953) * y1) + + py0 * (x1 * (t950 + px1 * t947 + t946 + t46 + t945) + + t29 * (px1 * t944 + t943 + t58 + t144 + t942) + t406 + t392 + t391 + + (x1 * (t421 + t941 + t431 + t27 + t661) + + t416 + py1 * (x2 * (t554 + t796) + t5 * t940) + t665 + t664) + * y1) + x1 * (py1 * (t787 + t12 + t939) + t830) + + t29 * (t834 + py1 * (t143 + t124 + t938)) + t937 + + (x1 * t936 + py1 * t666) * y1) + + y1 * (x1 * (py1 * (t753 + t12 + t935) + t830) + t934) + py1 * t653 * t1 + + py0 * t652 * t14) + ; + + trans->m[1][2] = (y0 * (x0 * (px0 * (x1 * (py1 * t1016 + t872 + t871 + t870) + + py1 * t494 + t29 * t932 + + y1 + * (py1 * t1025 + py1 * x1 * t599 + t820 + t819 + + t818) + t953 * t1) + + py0 * (x1 * (t1015 + px1 * t1009 + t727 + t326 + t724) + + t29 * (t1028 + t148 + t364 + t744) + t491 + + y1 + * (x1 + * (t600 + py1 * (x2 * t933 + t940 * y2) + t503 + + t800) + + px1 * t1020 + t1026 + t676 + t45 + t1017) + + (t1027 + t10 + t27 + t25) * t1) + + x1 * (py1 * (t1014 + t283 + t1012) + t863) + t29 * t931 + + py1 * t802 + + y1 * (py1 * x1 * t801 + py1 * (t1024 + t13 + t1023) + t830) + + t936 * t1) + + py0 * (t29 * (t1007 + px1 * t1002 + t727 + t201 + t724) + + x1 * (t1011 + t641 + t731 + t189 + t730) + + y1 * (x1 * (t1026 + px1 * t1025 + t1024 + t13 + t1023) + + t550 + + py1 + * (t5 * (t293 + t327) + x2 * (t1021 + t697) * y2) + + t670 + t669) + (t540 + t648 + t645) * t1) + + px0 * (x1 * (t982 + t866 + t865 + t864) + + t29 * (py1 * t1006 + t872 + t871 + t870) + + y1 * (x1 * (py1 * t1020 + t829 + t828 + t827) + t956) + + py1 * t415 * t1) + x1 * (t859 + t981) + + t29 * (t863 + py1 * (t773 + t246 + t771)) + + y1 * (x1 * (py1 * (t676 + t45 + t1017) + t821) + t937) + + py1 * t666 * t1) + + x0 * (py0 * (y1 * (x1 * (px1 * t1016 + t1015 + t1014 + t283 + t1012) + + t1011 + t573 + t238 + t237 + t236) + + x1 * (t639 + + py1 * (x2 * y2 * (t1010 + t300) + t142 * t894) + + t230 + t229) + t29 * (t175 + t159 + t155) + + t1 * (px1 * t1000 + t994 + t660 + t45 + t986)) + + px0 * (y1 * (x1 * (py1 * t1009 + t862 + t861 + t860) + + t927 + t858 + t857 + t856) + + py1 * t29 * t515 + py1 * x1 * t638 + + t1 * (py1 * t993 + t820 + t819 + t818)) + + y1 * (x1 * (t891 + py1 * (t727 + t326 + t724)) + t890 + t889) + + py1 * x1 * t374 + py1 * t29 * t373 + + t1 * (t830 + py1 * (t705 + t752 + t990))) + + t161 * (py0 * (x1 * (t516 + t372 + t371) + + y1 + * (x1 * (t921 + t156 + t346 + t345) + + t1007 + px1 * t1006 + t773 + t246 + t771) + t381 + + t1 * (t989 + px1 * t997 + t54 + t245 + t985)) + + px0 * (y1 + * (x1 * (py1 * t1004 + t924 + t923 + t922) + + py1 * t1002 + t862 + t861 + t860) + + py1 * t499 + py1 * x1 * t174 + + t1 * (py1 * t988 + t853 + t852 + t851)) + + y1 * (x1 * (t912 + py1 * t746) + t891 + + py1 * (t727 + t201 + t724)) + + py1 * x1 * t160 + py1 * t152 + + t1 * (t834 + py1 * (t143 + t272 + t987))) + py0 * t770 + + t14 * (px0 * (x1 * (py1 * t1000 + t829 + t828 + t827) + + t973 + t29 * (py1 * t997 + t833 + t832 + t831) + + (py1 * t539 + x1 * (py1 * t996 + t817 + t823 + t822)) + * y1) + + py0 * (x1 * (t994 + px1 * t993 + t705 + t752 + t990) + + t29 * (t989 + px1 * t988 + t143 + t272 + t987) + t398 + + (t416 + x1 * (t962 + t43 + t95 + t93) + t665 + t664) + * y1) + x1 * (py1 * (t660 + t45 + t986) + t821) + + t29 * (t888 + py1 * (t54 + t245 + t985)) + t934 + + (x1 * (t885 + py1 * t28) + py1 * t653) * y1)); + + trans->m[2][0] = (x0 * (px0 * (y1 * (x1 * t617 + t586 + t877 + t1008 + t876) + + x1 * t515 + t637 + t1 * (t475 + t136 + t620 + t1036) + t636) + + py0 * (y1 * (t579 + x1 * t627 + t251 + t811 + t248) + + x1 * t379 + t233 + t1 * (t455 + t87 + t298 + t1035) + t232) + + x1 * (t516 + t383 + y2 * (t804 + t1013) + t142 * t595) + + y1 * (px1 * (t1047 + t979 + t1046) + + x1 * (t921 + t368 + t157 + t156 + t1048) + + py1 * (t1044 + t728 + t1043) + t505 + t607 + t502) + t500 + t381 + + t1 * (px1 * (t611 + t135 + t1038) + py1 * (t330 + t128 + t1037) + t423 + + t125 + t54 + t1034)) + + y0 * (x0 * (py0 * (x1 * t1050 + t533 + t226 + + y1 * (t529 + t224 + x1 * t349 + t128 + t1040) + + t781 + t220 + t99 * t1) + + px0 * (t528 + x1 * t1049 + t874 + + y1 * (t517 + x1 * t916 + t520 + t135 + t1041) + t1005 + + t873 + t693 * t1) + + x1 * (t514 + t382 + t157 + t156 + t1048) + + px1 * (t877 + t1001 + t876) + py1 * (t251 + t776 + t248) + t608 + + y1 * (x1 * (t917 + t358 + t56 + t323 + (t716 + t967) * y2) + + px1 * (t118 + t609 + t471 + t1045) + + py1 * (t204 + t327 + t249 + t1042) + t503 + t144 + t143 + + t1039) + t504 + t605 + (t647 + t357) * t1) + + px0 * (x1 * (t528 + t1047 + t979 + t1046) + + t643 + t29 * t182 + t571 + + y1 * (x1 * (t475 + t118 + t609 + t471 + t1045) + t701 + t895) + + t570 + t569 + (t468 + t441 + t466 + t951) * t1) + + py0 * (x1 * (t533 + t1044 + t728 + t1043) + + t642 + t29 * t179 + t566 + + y1 * (x1 * (t455 + t204 + t327 + t249 + t1042) + t289 + t288) + + t214 + t565 + (t436 + t450 + t69 + t677) * t1) + + x1 * (px1 * (t877 + t1008 + t876) + py1 * (t251 + t811 + t248) + t505 + + t607 + t502) + t984 + t743 + + t29 * t274 + + y1 * (x1 * (px1 * (t520 + t135 + t1041) + py1 * (t224 + t128 + t1040) + + t503 + t144 + t143 + t1039) + + t897 + t303 + x2 * t634 + x2 * (t690 + t52) * y2) + + (t1027 + t808) * t1) + + py0 * (y1 * (x1 * (t579 + t251 + t776 + t248) + t581 + t735 + t240 + t734) + + t29 * t169 + x1 * t209 + t1 * (t461 + t80 + t683 + t1032)) + + px0 * (y1 * (x1 * (t586 + t877 + t1001 + t876) + t591 + t976 + t588 + t975) + + x1 * t499 + t29 * t174 + t1 * (t484 + t849 + t957 + t1033)) + + y1 * (x1 * (px1 * (t874 + t1005 + t873) + py1 * (t226 + t781 + t220) + t608 + + t504 + t605) + + t573 + t568) + t161 * t183 + x1 * (t639 + t235) + t29 * t160 + + t14 * (px0 * (x1 * (t517 + t611 + t135 + t1038) + + t615 + t29 * t109 + t560 + t443 + t1031 + + (t399 + t613 + x1 * t646 + t418 + t995) * y1) + + py0 * (x1 * (t529 + t330 + t128 + t1037) + + t625 + t29 * t76 + t112 + t438 + t1030 + + (t623 + t393 + t102 + x1 * t66 + t754) * y1) + + x1 * (px1 * (t136 + t620 + t1036) + py1 * (t87 + t298 + t1035) + t423 + + t125 + t54 + t1034) + + px1 * (t849 + t957 + t1033) + py1 * (t80 + t683 + t1032) + t434 + + t29 * t36 + t1029 + (t962 + t711 + x1 * (t836 + t100)) * y1) + + t1 * (px1 * (t560 + t443 + t1031) + py1 * (t112 + t438 + t1030) + t434 + + t1029)) + ; + + trans->m[2][1] = (t161 * (px0 * (x1 * (t616 + t180 + t512 + t1003) + + t586 + t510 + y1 * (t475 + t854 + x1 * t109 + t117 + t1062) + + t621 + t508 + t646 * t1) + + py0 * (t579 + x1 * (t177 + t626 + t354 + t778) + t507 + + y1 * (t455 + x1 * t76 + t91 + t203 + t1061) + t299 + + t506 + t66 * t1) + x1 * (t921 + t368) + + px1 * (t874 + t979 + t873) + py1 * (t226 + t728 + t220) + + y1 * (x1 * (t917 + t358) + px1 * (t118 + t843 + t1063) + + py1 * (t204 + t127 + t1065) + t423 + t144 + + t143 + t1060) + t504 + t21 * t1) + + x0 * (py0 * (t29 * t1050 + t581 + t241 + + y1 + * (x1 * (t529 + t87 + t577 + t292 + t1070) + + t461 + t1057 + t80 + t1056) + x1 * y2 * t741 + t240 + + t239 + t650 * t1) + + px0 * (t591 + t29 * t1049 + t589 + + y1 * (x1 * (t517 + t136 + t306 + t428 + t1071) + + t484 + t1059 + t849 + t1058) + x1 * y2 * t334 + t588 + + t587 + t651 * t1) + t29 * (t1028 + t809) + t903 + t321 + + y1 * (x1 * (px1 * (t901 + t117 + t1072) + py1 * (t317 + t203 + t1073) + + t503 + t125 + t54 + t1069) + + px1 * (t1019 + t837 + t1064) + py1 * (t789 + t111 + t1066) + t267 + + t433 + t1055) + + x1 * (y2 * (x2 * (t56 + t323) + t322 + t273) + t983 + t742) + t737 * t1) + + py0 * (x1 * (t642 + t215 + t214 + t213) + t29 * (t533 + t226 + t728 + t220) + + y1 + * (x1 * (t625 + t758 + t111 + t1067) + + t396 + t395) + t537 * t1) + + px0 * (x1 * (t643 + t868 + t570 + t867) + t29 * (t528 + t874 + t979 + t873) + + y1 + * (x1 * (t615 + t999 + t837 + t1068) + + t404 + t402) + t539 * t1) + + x1 * (t641 + t243) + + t29 * (px1 * (t510 + t621 + t508) + py1 * (t507 + t299 + t506) + t504) + + y0 * (x0 * (py0 * (x1 * (t455 + t317 + t203 + t1073) + + t461 + t29 * t349 + t767 + t683 + t1052 + + (t436 + t450 + x1 * t99 + t69 + t677) * y1) + + px0 * (x1 * (t475 + t901 + t117 + t1072) + + t484 + t29 * t916 + t992 + t957 + t1053 + + (t468 + t441 + t466 + x1 * t693 + t951) * y1) + + x1 * (px1 * (t136 + t306 + t428 + t1071) + + py1 * (t87 + t577 + t292 + t1070) + t503 + t125 + t54 + t1069) + + px1 * (t999 + t837 + t1068) + py1 * (t758 + t111 + t1067) + + t29 * (t163 + t162) + t434 + t553 + t1051 + + (t421 + t104 + t431 + t43 + + x1 * (t836 + t100 + t690 + t52 + (t270 + t886) * x2) + + t1054) + * y1) + + py0 * (x1 * (t625 + t789 + t111 + t1066) + + t29 * (t529 + t204 + t127 + t1065) + t544 + t543 + + (x1 * t624 + t410 + t409) * y1) + + px0 * (x1 * (t615 + t1019 + t837 + t1064) + + t29 * (t517 + t118 + t843 + t1063) + t548 + t547 + + (x1 * t614 + t414 + t413) * y1) + + t29 * (px1 * (t854 + t117 + t1062) + py1 * (t91 + t203 + t1061) + t423 + + t144 + t143 + t1060) + + x1 * (px1 * (t1059 + t849 + t1058) + py1 * (t1057 + t80 + t1056) + t267 + + t433 + t1055) + t406 + t398 + + (t416 + x1 * (t962 + t711 + t431 + t43 + t1054) + t412 + + x2 * (t793 + t1022) + t5 * t594) + * y1) + + y1 * (x1 * (px1 * (t992 + t957 + t1053) + py1 * (t767 + t683 + t1052) + t434 + + t553 + t1051) + + t550 + t546) + t653 * t1 + t652 * t14) + ; + trans->m[2][2] = t644; +} + +static void +print_trans (const char *header, struct pixman_f_transform *trans) +{ + int i, j; + double max; + + max = 0; + + printf ("%s\n", header); + + for (i = 0; i < 3; ++i) + { + for (j = 0; j < 3; ++j) + { + double a = fabs (trans->m[i][j]); + + if (a > max) + max = a; + } + } + + if (max == 0.0) + max = 1.0; + + for (i = 0; i < 3; ++i) + { + printf ("{ "); + for (j = 0; j < 3; ++j) + { + printf ("D2F (%.5f)%s", 16384 * (trans->m[i][j] / max), j == 2 ? "" : ", "); + } + + printf ("},\n"); + } +} + +int +main () +{ + struct pixman_f_transform t; + +#if 0 + quad_to_quad (75, 200, + 325, 200, + 450, 335, + -50, 335, + + 0, 0, + 400, 0, + 400, 400, + 0, 400, + + &t); +#endif + quad_to_quad ( + 1, 0, + 1, 2, + 2, 2, + 2, 0, + + 1, 0, + 1, 112, + 2, 2, + 2, 0, + + &t); + + print_trans ("0->0", &t); + + return 0; +} diff --git a/src/pixman/demos/radial-test.c b/src/pixman/demos/radial-test.c new file mode 100644 index 0000000..08a367c --- /dev/null +++ b/src/pixman/demos/radial-test.c @@ -0,0 +1,208 @@ +#include "../test/utils.h" +#include "gtk-utils.h" + +#define NUM_GRADIENTS 9 +#define NUM_STOPS 3 +#define NUM_REPEAT 4 +#define SIZE 128 +#define WIDTH (SIZE * NUM_GRADIENTS) +#define HEIGHT (SIZE * NUM_REPEAT) + +/* + * We want to test all the possible relative positions of the start + * and end circle: + * + * - The start circle can be smaller/equal/bigger than the end + * circle. A radial gradient can be classified in one of these + * three cases depending on the sign of dr. + * + * - The smaller circle can be completely inside/internally + * tangent/outside (at least in part) of the bigger circle. This + * classification is the same as the one which can be computed by + * examining the sign of a = (dx^2 + dy^2 - dr^2). + * + * - If the two circles have the same size, neither can be inside or + * internally tangent + * + * This test draws radial gradients whose circles always have the same + * centers (0, 0) and (1, 0), but with different radiuses. From left + * to right: + * + * - Degenerate start circle completely inside the end circle + * 0.00 -> 1.75; dr = 1.75 > 0; a = 1 - 1.75^2 < 0 + * + * - Small start circle completely inside the end circle + * 0.25 -> 1.75; dr = 1.5 > 0; a = 1 - 1.50^2 < 0 + * + * - Small start circle internally tangent to the end circle + * 0.50 -> 1.50; dr = 1.0 > 0; a = 1 - 1.00^2 = 0 + * + * - Small start circle outside of the end circle + * 0.50 -> 1.00; dr = 0.5 > 0; a = 1 - 0.50^2 > 0 + * + * - Start circle with the same size as the end circle + * 1.00 -> 1.00; dr = 0.0 = 0; a = 1 - 0.00^2 > 0 + * + * - Small end circle outside of the start circle + * 1.00 -> 0.50; dr = -0.5 > 0; a = 1 - 0.50^2 > 0 + * + * - Small end circle internally tangent to the start circle + * 1.50 -> 0.50; dr = -1.0 > 0; a = 1 - 1.00^2 = 0 + * + * - Small end circle completely inside the start circle + * 1.75 -> 0.25; dr = -1.5 > 0; a = 1 - 1.50^2 < 0 + * + * - Degenerate end circle completely inside the start circle + * 0.00 -> 1.75; dr = 1.75 > 0; a = 1 - 1.75^2 < 0 + * + */ + +const static double radiuses[NUM_GRADIENTS] = { + 0.00, + 0.25, + 0.50, + 0.50, + 1.00, + 1.00, + 1.50, + 1.75, + 1.75 +}; + +#define double_to_color(x) \ + (((uint32_t) ((x)*65536)) - (((uint32_t) ((x)*65536)) >> 16)) + +#define PIXMAN_STOP(offset,r,g,b,a) \ + { pixman_double_to_fixed (offset), \ + { \ + double_to_color (r), \ + double_to_color (g), \ + double_to_color (b), \ + double_to_color (a) \ + } \ + } + +static const pixman_gradient_stop_t stops[NUM_STOPS] = { + PIXMAN_STOP (0.0, 1, 0, 0, 0.75), + PIXMAN_STOP (0.70710678, 0, 1, 0, 0), + PIXMAN_STOP (1.0, 0, 0, 1, 1) +}; + +static pixman_image_t * +create_radial (int index) +{ + pixman_point_fixed_t p0, p1; + pixman_fixed_t r0, r1; + double x0, x1, radius0, radius1, left, right, center; + + x0 = 0; + x1 = 1; + radius0 = radiuses[index]; + radius1 = radiuses[NUM_GRADIENTS - index - 1]; + + /* center the gradient */ + left = MIN (x0 - radius0, x1 - radius1); + right = MAX (x0 + radius0, x1 + radius1); + center = (left + right) * 0.5; + x0 -= center; + x1 -= center; + + /* scale to make it fit within a 1x1 rect centered in (0,0) */ + x0 *= 0.25; + x1 *= 0.25; + radius0 *= 0.25; + radius1 *= 0.25; + + p0.x = pixman_double_to_fixed (x0); + p0.y = pixman_double_to_fixed (0); + + p1.x = pixman_double_to_fixed (x1); + p1.y = pixman_double_to_fixed (0); + + r0 = pixman_double_to_fixed (radius0); + r1 = pixman_double_to_fixed (radius1); + + return pixman_image_create_radial_gradient (&p0, &p1, + r0, r1, + stops, NUM_STOPS); +} + +static const pixman_repeat_t repeat[NUM_REPEAT] = { + PIXMAN_REPEAT_NONE, + PIXMAN_REPEAT_NORMAL, + PIXMAN_REPEAT_REFLECT, + PIXMAN_REPEAT_PAD +}; + +int +main (int argc, char **argv) +{ + pixman_transform_t transform; + pixman_image_t *src_img, *dest_img; + int i, j; + + enable_divbyzero_exceptions (); + + dest_img = pixman_image_create_bits (PIXMAN_a8r8g8b8, + WIDTH, HEIGHT, + NULL, 0); + + draw_checkerboard (dest_img, 25, 0xffaaaaaa, 0xffbbbbbb); + + pixman_transform_init_identity (&transform); + + /* + * The create_radial() function returns gradients centered in the + * origin and whose interesting part fits a 1x1 square. We want to + * paint these gradients on a SIZExSIZE square and to make things + * easier we want the origin in the top-left corner of the square + * we want to see. + */ + pixman_transform_translate (NULL, &transform, + pixman_double_to_fixed (0.5), + pixman_double_to_fixed (0.5)); + + pixman_transform_scale (NULL, &transform, + pixman_double_to_fixed (SIZE), + pixman_double_to_fixed (SIZE)); + + /* + * Gradients are evaluated at the center of each pixel, so we need + * to translate by half a pixel to trigger some interesting + * cornercases. In particular, the original implementation of PDF + * radial gradients tried to divide by 0 when using this transform + * on the "tangent circles" cases. + */ + pixman_transform_translate (NULL, &transform, + pixman_double_to_fixed (0.5), + pixman_double_to_fixed (0.5)); + + for (i = 0; i < NUM_GRADIENTS; i++) + { + src_img = create_radial (i); + pixman_image_set_transform (src_img, &transform); + + for (j = 0; j < NUM_REPEAT; j++) + { + pixman_image_set_repeat (src_img, repeat[j]); + + pixman_image_composite32 (PIXMAN_OP_OVER, + src_img, + NULL, + dest_img, + 0, 0, + 0, 0, + i * SIZE, j * SIZE, + SIZE, SIZE); + + } + + pixman_image_unref (src_img); + } + + show_image (dest_img); + + pixman_image_unref (dest_img); + + return 0; +} diff --git a/src/pixman/demos/scale.c b/src/pixman/demos/scale.c new file mode 100644 index 0000000..d00307e --- /dev/null +++ b/src/pixman/demos/scale.c @@ -0,0 +1,436 @@ +/* + * Copyright 2012, Red Hat, Inc. + * Copyright 2012, Soren Sandmann + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Author: Soren Sandmann <soren.sandmann@gmail.com> + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif +#include <math.h> +#include <gtk/gtk.h> +#include <pixman.h> +#include <stdlib.h> +#include "gtk-utils.h" + +typedef struct +{ + GtkBuilder * builder; + pixman_image_t * original; + GtkAdjustment * scale_x_adjustment; + GtkAdjustment * scale_y_adjustment; + GtkAdjustment * rotate_adjustment; + GtkAdjustment * subsample_adjustment; + int scaled_width; + int scaled_height; +} app_t; + +static GtkWidget * +get_widget (app_t *app, const char *name) +{ + GtkWidget *widget = GTK_WIDGET (gtk_builder_get_object (app->builder, name)); + + if (!widget) + g_error ("Widget %s not found\n", name); + + return widget; +} + +static double +min4 (double a, double b, double c, double d) +{ + double m1, m2; + + m1 = MIN (a, b); + m2 = MIN (c, d); + return MIN (m1, m2); +} + +static double +max4 (double a, double b, double c, double d) +{ + double m1, m2; + + m1 = MAX (a, b); + m2 = MAX (c, d); + return MAX (m1, m2); +} + +static void +compute_extents (pixman_f_transform_t *trans, double *sx, double *sy) +{ + double min_x, max_x, min_y, max_y; + pixman_f_vector_t v[4] = + { + { { 1, 1, 1 } }, + { { -1, 1, 1 } }, + { { -1, -1, 1 } }, + { { 1, -1, 1 } }, + }; + + pixman_f_transform_point (trans, &v[0]); + pixman_f_transform_point (trans, &v[1]); + pixman_f_transform_point (trans, &v[2]); + pixman_f_transform_point (trans, &v[3]); + + min_x = min4 (v[0].v[0], v[1].v[0], v[2].v[0], v[3].v[0]); + max_x = max4 (v[0].v[0], v[1].v[0], v[2].v[0], v[3].v[0]); + min_y = min4 (v[0].v[1], v[1].v[1], v[2].v[1], v[3].v[1]); + max_y = max4 (v[0].v[1], v[1].v[1], v[2].v[1], v[3].v[1]); + + *sx = (max_x - min_x) / 2.0; + *sy = (max_y - min_y) / 2.0; +} + +typedef struct +{ + char name [20]; + int value; +} named_int_t; + +static const named_int_t filters[] = +{ + { "Box", PIXMAN_KERNEL_BOX }, + { "Impulse", PIXMAN_KERNEL_IMPULSE }, + { "Linear", PIXMAN_KERNEL_LINEAR }, + { "Cubic", PIXMAN_KERNEL_CUBIC }, + { "Lanczos2", PIXMAN_KERNEL_LANCZOS2 }, + { "Lanczos3", PIXMAN_KERNEL_LANCZOS3 }, + { "Lanczos3 Stretched", PIXMAN_KERNEL_LANCZOS3_STRETCHED }, + { "Gaussian", PIXMAN_KERNEL_GAUSSIAN }, +}; + +static const named_int_t repeats[] = +{ + { "None", PIXMAN_REPEAT_NONE }, + { "Normal", PIXMAN_REPEAT_NORMAL }, + { "Reflect", PIXMAN_REPEAT_REFLECT }, + { "Pad", PIXMAN_REPEAT_PAD }, +}; + +static int +get_value (app_t *app, const named_int_t table[], const char *box_name) +{ + GtkComboBox *box = GTK_COMBO_BOX (get_widget (app, box_name)); + + return table[gtk_combo_box_get_active (box)].value; +} + +static void +copy_to_counterpart (app_t *app, GObject *object) +{ + static const char *xy_map[] = + { + "reconstruct_x_combo_box", "reconstruct_y_combo_box", + "sample_x_combo_box", "sample_y_combo_box", + "scale_x_adjustment", "scale_y_adjustment", + }; + GObject *counterpart = NULL; + int i; + + for (i = 0; i < G_N_ELEMENTS (xy_map); i += 2) + { + GObject *x = gtk_builder_get_object (app->builder, xy_map[i]); + GObject *y = gtk_builder_get_object (app->builder, xy_map[i + 1]); + + if (object == x) + counterpart = y; + if (object == y) + counterpart = x; + } + + if (!counterpart) + return; + + if (GTK_IS_COMBO_BOX (counterpart)) + { + gtk_combo_box_set_active ( + GTK_COMBO_BOX (counterpart), + gtk_combo_box_get_active ( + GTK_COMBO_BOX (object))); + } + else if (GTK_IS_ADJUSTMENT (counterpart)) + { + gtk_adjustment_set_value ( + GTK_ADJUSTMENT (counterpart), + gtk_adjustment_get_value ( + GTK_ADJUSTMENT (object))); + } +} + +static double +to_scale (double v) +{ + return pow (1.15, v); +} + +static void +rescale (GtkWidget *may_be_null, app_t *app) +{ + pixman_f_transform_t ftransform; + pixman_transform_t transform; + double new_width, new_height; + double fscale_x, fscale_y; + double rotation; + pixman_fixed_t *params; + int n_params; + double sx, sy; + + pixman_f_transform_init_identity (&ftransform); + + if (may_be_null && gtk_toggle_button_get_active ( + GTK_TOGGLE_BUTTON (get_widget (app, "lock_checkbutton")))) + { + copy_to_counterpart (app, G_OBJECT (may_be_null)); + } + + fscale_x = gtk_adjustment_get_value (app->scale_x_adjustment); + fscale_y = gtk_adjustment_get_value (app->scale_y_adjustment); + rotation = gtk_adjustment_get_value (app->rotate_adjustment); + + fscale_x = to_scale (fscale_x); + fscale_y = to_scale (fscale_y); + + new_width = pixman_image_get_width (app->original) * fscale_x; + new_height = pixman_image_get_height (app->original) * fscale_y; + + pixman_f_transform_scale (&ftransform, NULL, fscale_x, fscale_y); + + pixman_f_transform_translate (&ftransform, NULL, - new_width / 2.0, - new_height / 2.0); + + rotation = (rotation / 360.0) * 2 * M_PI; + pixman_f_transform_rotate (&ftransform, NULL, cos (rotation), sin (rotation)); + + pixman_f_transform_translate (&ftransform, NULL, new_width / 2.0, new_height / 2.0); + + pixman_f_transform_invert (&ftransform, &ftransform); + + compute_extents (&ftransform, &sx, &sy); + + pixman_transform_from_pixman_f_transform (&transform, &ftransform); + pixman_image_set_transform (app->original, &transform); + + params = pixman_filter_create_separable_convolution ( + &n_params, + sx * 65536.0 + 0.5, + sy * 65536.0 + 0.5, + get_value (app, filters, "reconstruct_x_combo_box"), + get_value (app, filters, "reconstruct_y_combo_box"), + get_value (app, filters, "sample_x_combo_box"), + get_value (app, filters, "sample_y_combo_box"), + gtk_adjustment_get_value (app->subsample_adjustment), + gtk_adjustment_get_value (app->subsample_adjustment)); + + pixman_image_set_filter (app->original, PIXMAN_FILTER_SEPARABLE_CONVOLUTION, params, n_params); + + pixman_image_set_repeat ( + app->original, get_value (app, repeats, "repeat_combo_box")); + + free (params); + + app->scaled_width = ceil (new_width); + app->scaled_height = ceil (new_height); + + gtk_widget_set_size_request ( + get_widget (app, "drawing_area"), new_width + 0.5, new_height + 0.5); + + gtk_widget_queue_draw ( + get_widget (app, "drawing_area")); +} + +static gboolean +on_expose (GtkWidget *da, GdkEvent *event, gpointer data) +{ + app_t *app = data; + GdkRectangle *area = &event->expose.area; + cairo_surface_t *surface; + pixman_image_t *tmp; + cairo_t *cr; + uint32_t *pixels; + + pixels = calloc (1, area->width * area->height * 4); + tmp = pixman_image_create_bits ( + PIXMAN_a8r8g8b8, area->width, area->height, pixels, area->width * 4); + + if (area->x < app->scaled_width && area->y < app->scaled_height) + { + pixman_image_composite ( + PIXMAN_OP_SRC, + app->original, NULL, tmp, + area->x, area->y, 0, 0, 0, 0, + app->scaled_width - area->x, app->scaled_height - area->y); + } + + surface = cairo_image_surface_create_for_data ( + (uint8_t *)pixels, CAIRO_FORMAT_ARGB32, + area->width, area->height, area->width * 4); + + cr = gdk_cairo_create (da->window); + + cairo_set_source_surface (cr, surface, area->x, area->y); + + cairo_paint (cr); + + cairo_destroy (cr); + cairo_surface_destroy (surface); + free (pixels); + pixman_image_unref (tmp); + + return TRUE; +} + +static void +set_up_combo_box (app_t *app, const char *box_name, + int n_entries, const named_int_t table[]) +{ + GtkWidget *widget = get_widget (app, box_name); + GtkListStore *model; + GtkCellRenderer *cell; + int i; + + model = gtk_list_store_new (1, G_TYPE_STRING); + + cell = gtk_cell_renderer_text_new (); + gtk_cell_layout_pack_start (GTK_CELL_LAYOUT (widget), cell, TRUE); + gtk_cell_layout_set_attributes (GTK_CELL_LAYOUT (widget), cell, + "text", 0, + NULL); + + gtk_combo_box_set_model (GTK_COMBO_BOX (widget), GTK_TREE_MODEL (model)); + + for (i = 0; i < n_entries; ++i) + { + const named_int_t *info = &(table[i]); + GtkTreeIter iter; + + gtk_list_store_append (model, &iter); + gtk_list_store_set (model, &iter, 0, info->name, -1); + } + + gtk_combo_box_set_active (GTK_COMBO_BOX (widget), 0); + + g_signal_connect (widget, "changed", G_CALLBACK (rescale), app); +} + +static void +set_up_filter_box (app_t *app, const char *box_name) +{ + set_up_combo_box (app, box_name, G_N_ELEMENTS (filters), filters); +} + +static char * +format_value (GtkWidget *widget, double value) +{ + return g_strdup_printf ("%.4f", to_scale (value)); +} + +static app_t * +app_new (pixman_image_t *original) +{ + GtkWidget *widget; + app_t *app = g_malloc (sizeof *app); + GError *err = NULL; + + app->builder = gtk_builder_new (); + app->original = original; + + if (!gtk_builder_add_from_file (app->builder, "scale.ui", &err)) + g_error ("Could not read file scale.ui: %s", err->message); + + app->scale_x_adjustment = + GTK_ADJUSTMENT (gtk_builder_get_object (app->builder, "scale_x_adjustment")); + app->scale_y_adjustment = + GTK_ADJUSTMENT (gtk_builder_get_object (app->builder, "scale_y_adjustment")); + app->rotate_adjustment = + GTK_ADJUSTMENT (gtk_builder_get_object (app->builder, "rotate_adjustment")); + app->subsample_adjustment = + GTK_ADJUSTMENT (gtk_builder_get_object (app->builder, "subsample_adjustment")); + + g_signal_connect (app->scale_x_adjustment, "value_changed", G_CALLBACK (rescale), app); + g_signal_connect (app->scale_y_adjustment, "value_changed", G_CALLBACK (rescale), app); + g_signal_connect (app->rotate_adjustment, "value_changed", G_CALLBACK (rescale), app); + g_signal_connect (app->subsample_adjustment, "value_changed", G_CALLBACK (rescale), app); + + widget = get_widget (app, "scale_x_scale"); + gtk_scale_add_mark (GTK_SCALE (widget), 0.0, GTK_POS_LEFT, NULL); + g_signal_connect (widget, "format_value", G_CALLBACK (format_value), app); + widget = get_widget (app, "scale_y_scale"); + gtk_scale_add_mark (GTK_SCALE (widget), 0.0, GTK_POS_LEFT, NULL); + g_signal_connect (widget, "format_value", G_CALLBACK (format_value), app); + widget = get_widget (app, "rotate_scale"); + gtk_scale_add_mark (GTK_SCALE (widget), 0.0, GTK_POS_LEFT, NULL); + + widget = get_widget (app, "drawing_area"); + g_signal_connect (widget, "expose_event", G_CALLBACK (on_expose), app); + + set_up_filter_box (app, "reconstruct_x_combo_box"); + set_up_filter_box (app, "reconstruct_y_combo_box"); + set_up_filter_box (app, "sample_x_combo_box"); + set_up_filter_box (app, "sample_y_combo_box"); + + set_up_combo_box ( + app, "repeat_combo_box", G_N_ELEMENTS (repeats), repeats); + + g_signal_connect ( + gtk_builder_get_object (app->builder, "lock_checkbutton"), + "toggled", G_CALLBACK (rescale), app); + + rescale (NULL, app); + + return app; +} + +int +main (int argc, char **argv) +{ + GtkWidget *window; + pixman_image_t *image; + app_t *app; + + gtk_init (&argc, &argv); + + if (argc < 2) + { + printf ("%s <image file>\n", argv[0]); + return -1; + } + + if (!(image = pixman_image_from_file (argv[1], PIXMAN_a8r8g8b8))) + { + printf ("Could not load image \"%s\"\n", argv[1]); + return -1; + } + + app = app_new (image); + + window = get_widget (app, "main"); + + g_signal_connect (window, "delete_event", G_CALLBACK (gtk_main_quit), NULL); + + gtk_window_set_default_size (GTK_WINDOW (window), 1024, 768); + + gtk_widget_show_all (window); + + gtk_main (); + + return 0; +} diff --git a/src/pixman/demos/scale.ui b/src/pixman/demos/scale.ui new file mode 100644 index 0000000..ee985dd --- /dev/null +++ b/src/pixman/demos/scale.ui @@ -0,0 +1,332 @@ +<?xml version="1.0" encoding="UTF-8"?> +<interface> + <!-- interface-requires gtk+ 2.12 --> + <!-- interface-naming-policy toplevel-contextual --> + <object class="GtkAdjustment" id="rotate_adjustment"> + <property name="lower">-180</property> + <property name="upper">190</property> + <property name="step_increment">1</property> + <property name="page_increment">10</property> + <property name="page_size">10</property> + </object> + <object class="GtkAdjustment" id="scale_y_adjustment"> + <property name="lower">-32</property> + <property name="upper">42</property> + <property name="step_increment">1</property> + <property name="page_increment">10</property> + <property name="page_size">10</property> + </object> + <object class="GtkAdjustment" id="scale_x_adjustment"> + <property name="lower">-32</property> + <property name="upper">42</property> + <property name="step_increment">1</property> + <property name="page_increment">10</property> + <property name="page_size">10</property> + </object> + <object class="GtkAdjustment" id="subsample_adjustment"> + <property name="lower">0</property> + <property name="upper">12</property> + <property name="step_increment">1</property> + <property name="page_increment">1</property> + <property name="page_size">0</property> + <property name="value">4</property> + </object> + <object class="GtkWindow" id="main"> + <child> + <object class="GtkHBox" id="u"> + <property name="visible">True</property> + <property name="spacing">12</property> + <child> + <object class="GtkScrolledWindow" id="scrolledwindow1"> + <property name="visible">True</property> + <property name="can_focus">True</property> + <property name="shadow_type">in</property> + <child> + <object class="GtkViewport" id="viewport1"> + <property name="visible">True</property> + <child> + <object class="GtkDrawingArea" id="drawing_area"> + <property name="visible">True</property> + </object> + </child> + </object> + </child> + </object> + <packing> + <property name="position">0</property> + </packing> + </child> + <child> + <object class="GtkVBox" id="box1"> + <property name="visible">True</property> + <property name="spacing">12</property> + <child> + <object class="GtkHBox" id="box2"> + <property name="visible">True</property> + <property name="homogeneous">True</property> + <child> + <object class="GtkVBox" id="box3"> + <property name="visible">True</property> + <property name="spacing">6</property> + <child> + <object class="GtkLabel" id="label1"> + <property name="visible">True</property> + <property name="label" translatable="yes"><b>Scale X</b></property> + <property name="use_markup">True</property> + </object> + <packing> + <property name="expand">False</property> + <property name="position">0</property> + </packing> + </child> + <child> + <object class="GtkVScale" id="scale_x_scale"> + <property name="visible">True</property> + <property name="can_focus">True</property> + <property name="adjustment">scale_x_adjustment</property> + <property name="fill_level">32</property> + <property name="value_pos">right</property> + </object> + <packing> + <property name="position">1</property> + </packing> + </child> + </object> + <packing> + <property name="expand">False</property> + <property name="position">0</property> + </packing> + </child> + <child> + <object class="GtkVBox" id="box4"> + <property name="visible">True</property> + <property name="spacing">6</property> + <child> + <object class="GtkLabel" id="label2"> + <property name="visible">True</property> + <property name="label" translatable="yes"><b>Scale Y</b></property> + <property name="use_markup">True</property> + </object> + <packing> + <property name="expand">False</property> + <property name="position">0</property> + </packing> + </child> + <child> + <object class="GtkVScale" id="scale_y_scale"> + <property name="visible">True</property> + <property name="can_focus">True</property> + <property name="adjustment">scale_y_adjustment</property> + <property name="fill_level">32</property> + <property name="value_pos">right</property> + </object> + <packing> + <property name="position">1</property> + </packing> + </child> + </object> + <packing> + <property name="expand">False</property> + <property name="position">1</property> + </packing> + </child> + <child> + <object class="GtkVBox" id="box5"> + <property name="visible">True</property> + <property name="spacing">6</property> + <child> + <object class="GtkLabel" id="label3"> + <property name="visible">True</property> + <property name="label" translatable="yes"><b>Rotate</b></property> + <property name="use_markup">True</property> + </object> + <packing> + <property name="expand">False</property> + <property name="position">0</property> + </packing> + </child> + <child> + <object class="GtkVScale" id="rotate_scale"> + <property name="visible">True</property> + <property name="can_focus">True</property> + <property name="adjustment">rotate_adjustment</property> + <property name="fill_level">180</property> + <property name="value_pos">right</property> + </object> + <packing> + <property name="position">1</property> + </packing> + </child> + </object> + <packing> + <property name="expand">False</property> + <property name="position">2</property> + </packing> + </child> + </object> + <packing> + <property name="padding">6</property> + <property name="position">0</property> + </packing> + </child> + <child> + <object class="GtkVBox" id="box6"> + <property name="visible">True</property> + <child> + <object class="GtkCheckButton" + id="lock_checkbutton"> + <property name="label" translatable="yes">Lock X and Y Dimensions</property> + <property name="xalign">0.0</property> + </object> + <packing> + <property name="expand">False</property> + <property name="fill">False</property> + <property name="padding">6</property> + <property name="position">1</property> + </packing> + </child> + <child> + <object class="GtkTable" id="grid1"> + <property name="visible">True</property> + <property name="column_spacing">8</property> + <property name="row_spacing">6</property> + <child> + <object class="GtkLabel" id="label4"> + <property name="visible">True</property> + <property name="xalign">1</property> + <property name="label" translatable="yes"><b>Reconstruct X:</b></property> + <property name="use_markup">True</property> + </object> + </child> + <child> + <object class="GtkLabel" id="label5"> + <property name="visible">True</property> + <property name="xalign">1</property> + <property name="label" translatable="yes"><b>Reconstruct Y:</b></property> + <property name="use_markup">True</property> + </object> + <packing> + <property name="top_attach">1</property> + </packing> + </child> + <child> + <object class="GtkLabel" id="label6"> + <property name="visible">True</property> + <property name="xalign">1</property> + <property name="label" translatable="yes"><b>Sample X:</b></property> + <property name="use_markup">True</property> + </object> + <packing> + <property name="top_attach">2</property> + </packing> + </child> + <child> + <object class="GtkLabel" id="label7"> + <property name="visible">True</property> + <property name="xalign">1</property> + <property name="label" translatable="yes"><b>Sample Y:</b></property> + <property name="use_markup">True</property> + </object> + <packing> + <property name="top_attach">3</property> + </packing> + </child> + <child> + <object class="GtkLabel" id="label8"> + <property name="visible">True</property> + <property name="xalign">1</property> + <property name="label" translatable="yes"><b>Repeat:</b></property> + <property name="use_markup">True</property> + </object> + <packing> + <property name="top_attach">4</property> + </packing> + </child> + <child> + <object class="GtkLabel" id="label9"> + <property name="visible">True</property> + <property name="xalign">1</property> + <property name="label" translatable="yes"><b>Subsample:</b></property> + <property name="use_markup">True</property> + </object> + <packing> + <property name="top_attach">5</property> + </packing> + </child> + <child> + <object class="GtkComboBox" id="reconstruct_x_combo_box"> + <property name="visible">True</property> + </object> + <packing> + <property name="left_attach">1</property> + </packing> + </child> + <child> + <object class="GtkComboBox" id="reconstruct_y_combo_box"> + <property name="visible">True</property> + </object> + <packing> + <property name="left_attach">1</property> + <property name="top_attach">1</property> + </packing> + </child> + <child> + <object class="GtkComboBox" id="sample_x_combo_box"> + <property name="visible">True</property> + </object> + <packing> + <property name="left_attach">1</property> + <property name="top_attach">2</property> + </packing> + </child> + <child> + <object class="GtkComboBox" id="sample_y_combo_box"> + <property name="visible">True</property> + </object> + <packing> + <property name="left_attach">1</property> + <property name="top_attach">3</property> + </packing> + </child> + <child> + <object class="GtkComboBox" id="repeat_combo_box"> + <property name="visible">True</property> + </object> + <packing> + <property name="left_attach">1</property> + <property name="top_attach">4</property> + </packing> + </child> + <child> + <object class="GtkSpinButton" id="subsample_spin_button"> + <property name="visible">True</property> + <property name="adjustment">subsample_adjustment</property> + </object> + <packing> + <property name="left_attach">1</property> + <property name="top_attach">5</property> + </packing> + </child> + </object> + <packing> + <property name="expand">False</property> + <property name="padding">6</property> + <property name="position">1</property> + </packing> + </child> + </object> + <packing> + <property name="expand">False</property> + <property name="position">0</property> + </packing> + </child> + </object> + <packing> + <property name="expand">False</property> + <property name="position">1</property> + </packing> + </child> + </object> + </child> + </object> +</interface> diff --git a/src/pixman/demos/screen-test.c b/src/pixman/demos/screen-test.c new file mode 100644 index 0000000..e69dba3 --- /dev/null +++ b/src/pixman/demos/screen-test.c @@ -0,0 +1,44 @@ +#include <stdio.h> +#include <stdlib.h> +#include "pixman.h" +#include "gtk-utils.h" + +int +main (int argc, char **argv) +{ +#define WIDTH 40 +#define HEIGHT 40 + + uint32_t *src1 = malloc (WIDTH * HEIGHT * 4); + uint32_t *src2 = malloc (WIDTH * HEIGHT * 4); + uint32_t *src3 = malloc (WIDTH * HEIGHT * 4); + uint32_t *dest = malloc (3 * WIDTH * 2 * HEIGHT * 4); + pixman_image_t *simg1, *simg2, *simg3, *dimg; + + int i; + + for (i = 0; i < WIDTH * HEIGHT; ++i) + { + src1[i] = 0x7ff00000; + src2[i] = 0x7f00ff00; + src3[i] = 0x7f0000ff; + } + + for (i = 0; i < 3 * WIDTH * 2 * HEIGHT; ++i) + { + dest[i] = 0x0; + } + + simg1 = pixman_image_create_bits (PIXMAN_a8r8g8b8, WIDTH, HEIGHT, src1, WIDTH * 4); + simg2 = pixman_image_create_bits (PIXMAN_a8r8g8b8, WIDTH, HEIGHT, src2, WIDTH * 4); + simg3 = pixman_image_create_bits (PIXMAN_a8r8g8b8, WIDTH, HEIGHT, src3, WIDTH * 4); + dimg = pixman_image_create_bits (PIXMAN_a8r8g8b8, 3 * WIDTH, 2 * HEIGHT, dest, 3 * WIDTH * 4); + + pixman_image_composite (PIXMAN_OP_SCREEN, simg1, NULL, dimg, 0, 0, 0, 0, WIDTH, HEIGHT / 4, WIDTH, HEIGHT); + pixman_image_composite (PIXMAN_OP_SCREEN, simg2, NULL, dimg, 0, 0, 0, 0, (WIDTH/2), HEIGHT / 4 + HEIGHT / 2, WIDTH, HEIGHT); + pixman_image_composite (PIXMAN_OP_SCREEN, simg3, NULL, dimg, 0, 0, 0, 0, (4 * WIDTH) / 3, HEIGHT, WIDTH, HEIGHT); + + show_image (dimg); + + return 0; +} diff --git a/src/pixman/demos/srgb-test.c b/src/pixman/demos/srgb-test.c new file mode 100644 index 0000000..681d521 --- /dev/null +++ b/src/pixman/demos/srgb-test.c @@ -0,0 +1,87 @@ +#include <math.h> + +#include "pixman.h" +#include "gtk-utils.h" + +static uint32_t +linear_argb_to_premult_argb (float a, + float r, + float g, + float b) +{ + r *= a; + g *= a; + b *= a; + return (uint32_t) (a * 255.0f + 0.5f) << 24 + | (uint32_t) (r * 255.0f + 0.5f) << 16 + | (uint32_t) (g * 255.0f + 0.5f) << 8 + | (uint32_t) (b * 255.0f + 0.5f) << 0; +} + +static float +lin2srgb (float linear) +{ + if (linear < 0.0031308f) + return linear * 12.92f; + else + return 1.055f * powf (linear, 1.0f/2.4f) - 0.055f; +} + +static uint32_t +linear_argb_to_premult_srgb_argb (float a, + float r, + float g, + float b) +{ + r = lin2srgb (r * a); + g = lin2srgb (g * a); + b = lin2srgb (b * a); + return (uint32_t) (a * 255.0f + 0.5f) << 24 + | (uint32_t) (r * 255.0f + 0.5f) << 16 + | (uint32_t) (g * 255.0f + 0.5f) << 8 + | (uint32_t) (b * 255.0f + 0.5f) << 0; +} + +int +main (int argc, char **argv) +{ +#define WIDTH 400 +#define HEIGHT 200 + int y, x, p; + float alpha; + + uint32_t *dest = malloc (WIDTH * HEIGHT * 4); + uint32_t *src1 = malloc (WIDTH * HEIGHT * 4); + pixman_image_t *dest_img, *src1_img; + + dest_img = pixman_image_create_bits (PIXMAN_a8r8g8b8_sRGB, + WIDTH, HEIGHT, + dest, + WIDTH * 4); + src1_img = pixman_image_create_bits (PIXMAN_a8r8g8b8, + WIDTH, HEIGHT, + src1, + WIDTH * 4); + + for (y = 0; y < HEIGHT; y ++) + { + p = WIDTH * y; + for (x = 0; x < WIDTH; x ++) + { + alpha = (float) x / WIDTH; + src1[p + x] = linear_argb_to_premult_argb (alpha, 1, 0, 1); + dest[p + x] = linear_argb_to_premult_srgb_argb (1-alpha, 0, 1, 0); + } + } + + pixman_image_composite (PIXMAN_OP_ADD, src1_img, NULL, dest_img, + 0, 0, 0, 0, 0, 0, WIDTH, HEIGHT); + pixman_image_unref (src1_img); + free (src1); + + show_image (dest_img); + pixman_image_unref (dest_img); + free (dest); + + return 0; +} diff --git a/src/pixman/demos/srgb-trap-test.c b/src/pixman/demos/srgb-trap-test.c new file mode 100644 index 0000000..d5ae16a --- /dev/null +++ b/src/pixman/demos/srgb-trap-test.c @@ -0,0 +1,119 @@ +#include <math.h> +#include "pixman.h" +#include "gtk-utils.h" + +#define F(x) \ + pixman_double_to_fixed (x) + +#define WIDTH 600 +#define HEIGHT 300 + +static uint16_t +convert_to_srgb (uint16_t in) +{ + double d = in * (1/65535.0); + double a = 0.055; + + if (d < 0.0031308) + d = 12.92 * d; + else + d = (1 + a) * pow (d, 1 / 2.4) - a; + + return (d * 65535.0) + 0.5; +} + +static void +convert_color (pixman_color_t *dest_srgb, pixman_color_t *linear) +{ + dest_srgb->alpha = convert_to_srgb (linear->alpha); + dest_srgb->red = convert_to_srgb (linear->red); + dest_srgb->green = convert_to_srgb (linear->green); + dest_srgb->blue = convert_to_srgb (linear->blue); +} + +int +main (int argc, char **argv) +{ + static const pixman_trapezoid_t traps[] = + { + { F(10.10), F(280.0), + { { F(20.0), F(10.10) }, + { F(5.3), F(280.0) } }, + { { F(20.3), F(10.10) }, + { F(5.6), F(280.0) } } + }, + { F(10.10), F(280.0), + { { F(40.0), F(10.10) }, + { F(15.3), F(280.0) } }, + { { F(41.0), F(10.10) }, + { F(16.3), F(280.0) } } + }, + { F(10.10), F(280.0), + { { F(120.0), F(10.10) }, + { F(5.3), F(280.0) } }, + { { F(128.3), F(10.10) }, + { F(6.6), F(280.0) } } + }, + { F(10.10), F(280.0), + { { F(60.0), F(10.10) }, + { F(25.3), F(280.0) } }, + { { F(61.0), F(10.10) }, + { F(26.3), F(280.0) } } + }, + { F(10.10), F(280.0), + { { F(90.0), F(10.10) }, + { F(55.3), F(280.0) } }, + { { F(93.0), F(10.10) }, + { F(58.3), F(280.0) } } + }, + { F(130.10), F(150.0), + { { F(100.0), F(130.10) }, + { F(250.3), F(150.0) } }, + { { F(110.0), F(130.10) }, + { F(260.3), F(150.0) } } + }, + { F(170.10), F(240.0), + { { F(100.0), F(170.10) }, + { F(120.3), F(240.0) } }, + { { F(250.0), F(170.10) }, + { F(250.3), F(240.0) } } + }, + }; + + pixman_image_t *src, *dest_srgb, *dest_linear; + pixman_color_t bg = { 0x0000, 0x0000, 0x0000, 0xffff }; + pixman_color_t fg = { 0xffff, 0xffff, 0xffff, 0xffff }; + pixman_color_t fg_srgb; + uint32_t *d; + + d = malloc (WIDTH * HEIGHT * 4); + + dest_srgb = pixman_image_create_bits ( + PIXMAN_a8r8g8b8_sRGB, WIDTH, HEIGHT, d, WIDTH * 4); + dest_linear = pixman_image_create_bits ( + PIXMAN_a8r8g8b8, WIDTH, HEIGHT, d, WIDTH * 4); + + src = pixman_image_create_solid_fill (&bg); + pixman_image_composite32 (PIXMAN_OP_SRC, + src, NULL, dest_srgb, + 0, 0, 0, 0, 0, 0, WIDTH, HEIGHT); + + src = pixman_image_create_solid_fill (&fg); + + pixman_composite_trapezoids (PIXMAN_OP_OVER, + src, dest_srgb, PIXMAN_a8, + 0, 0, 10, 10, G_N_ELEMENTS (traps), traps); + + convert_color (&fg_srgb, &fg); + src = pixman_image_create_solid_fill (&fg_srgb); + + pixman_composite_trapezoids (PIXMAN_OP_OVER, + src, dest_linear, PIXMAN_a8, + 0, 0, 310, 10, G_N_ELEMENTS (traps), traps); + + show_image (dest_linear); + pixman_image_unref(dest_linear); + free(d); + + return 0; +} diff --git a/src/pixman/demos/trap-test.c b/src/pixman/demos/trap-test.c new file mode 100644 index 0000000..19295e7 --- /dev/null +++ b/src/pixman/demos/trap-test.c @@ -0,0 +1,49 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "pixman.h" +#include "gtk-utils.h" + +int +main (int argc, char **argv) +{ +#define WIDTH 200 +#define HEIGHT 200 + + pixman_image_t *src_img; + pixman_image_t *mask_img; + pixman_image_t *dest_img; + pixman_trap_t trap; + pixman_color_t white = { 0x0000, 0xffff, 0x0000, 0xffff }; + uint32_t *bits = malloc (WIDTH * HEIGHT * 4); + uint32_t *mbits = malloc (WIDTH * HEIGHT); + + memset (mbits, 0, WIDTH * HEIGHT); + memset (bits, 0xff, WIDTH * HEIGHT * 4); + + trap.top.l = pixman_int_to_fixed (50) + 0x8000; + trap.top.r = pixman_int_to_fixed (150) + 0x8000; + trap.top.y = pixman_int_to_fixed (30); + + trap.bot.l = pixman_int_to_fixed (50) + 0x8000; + trap.bot.r = pixman_int_to_fixed (150) + 0x8000; + trap.bot.y = pixman_int_to_fixed (150); + + mask_img = pixman_image_create_bits (PIXMAN_a8, WIDTH, HEIGHT, mbits, WIDTH); + src_img = pixman_image_create_solid_fill (&white); + dest_img = pixman_image_create_bits (PIXMAN_a8r8g8b8, WIDTH, HEIGHT, bits, WIDTH * 4); + + pixman_add_traps (mask_img, 0, 0, 1, &trap); + + pixman_image_composite (PIXMAN_OP_OVER, + src_img, mask_img, dest_img, + 0, 0, 0, 0, 0, 0, WIDTH, HEIGHT); + + show_image (dest_img); + + pixman_image_unref (src_img); + pixman_image_unref (dest_img); + free (bits); + + return 0; +} diff --git a/src/pixman/demos/tri-test.c b/src/pixman/demos/tri-test.c new file mode 100644 index 0000000..a71869a --- /dev/null +++ b/src/pixman/demos/tri-test.c @@ -0,0 +1,48 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "../test/utils.h" +#include "gtk-utils.h" + +int +main (int argc, char **argv) +{ +#define WIDTH 200 +#define HEIGHT 200 + +#define POINT(x,y) \ + { pixman_double_to_fixed ((x)), pixman_double_to_fixed ((y)) } + + pixman_image_t *src_img, *dest_img; + pixman_triangle_t tris[4] = + { + { POINT (100, 100), POINT (10, 50), POINT (110, 10) }, + { POINT (100, 100), POINT (150, 10), POINT (200, 50) }, + { POINT (100, 100), POINT (10, 170), POINT (90, 175) }, + { POINT (100, 100), POINT (170, 150), POINT (120, 190) }, + }; + pixman_color_t color = { 0x4444, 0x4444, 0xffff, 0xffff }; + uint32_t *bits = malloc (WIDTH * HEIGHT * 4); + int i; + + for (i = 0; i < WIDTH * HEIGHT; ++i) + bits[i] = (i / HEIGHT) * 0x01010000; + + src_img = pixman_image_create_solid_fill (&color); + dest_img = pixman_image_create_bits (PIXMAN_a8r8g8b8, WIDTH, HEIGHT, bits, WIDTH * 4); + + pixman_composite_triangles (PIXMAN_OP_ATOP_REVERSE, + src_img, + dest_img, + PIXMAN_a8, + 200, 200, + -5, 5, + ARRAY_LENGTH (tris), tris); + show_image (dest_img); + + pixman_image_unref (src_img); + pixman_image_unref (dest_img); + free (bits); + + return 0; +} diff --git a/src/pixman/demos/zone_plate.png b/src/pixman/demos/zone_plate.png Binary files differnew file mode 100644 index 0000000..519291d --- /dev/null +++ b/src/pixman/demos/zone_plate.png diff --git a/src/pixman/pixman-1-uninstalled.pc.in b/src/pixman/pixman-1-uninstalled.pc.in new file mode 100644 index 0000000..e0347d0 --- /dev/null +++ b/src/pixman/pixman-1-uninstalled.pc.in @@ -0,0 +1,5 @@ +Name: Pixman +Description: The pixman library (version 1) +Version: @PACKAGE_VERSION@ +Cflags: -I${pc_top_builddir}/${pcfiledir}/pixman +Libs: ${pc_top_builddir}/${pcfiledir}/pixman/libpixman-1.la diff --git a/src/pixman/pixman-1.pc.in b/src/pixman/pixman-1.pc.in new file mode 100644 index 0000000..e3b9711 --- /dev/null +++ b/src/pixman/pixman-1.pc.in @@ -0,0 +1,11 @@ +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: Pixman +Description: The pixman library (version 1) +Version: @PACKAGE_VERSION@ +Cflags: -I${includedir}/pixman-1 +Libs: -L${libdir} -lpixman-1 + diff --git a/src/pixman/pixman/Makefile.am b/src/pixman/pixman/Makefile.am new file mode 100644 index 0000000..b376d9a --- /dev/null +++ b/src/pixman/pixman/Makefile.am @@ -0,0 +1,139 @@ +include $(top_srcdir)/pixman/Makefile.sources + +lib_LTLIBRARIES = libpixman-1.la + +libpixman_1_la_LDFLAGS = -version-info $(LT_VERSION_INFO) -no-undefined @PTHREAD_LDFLAGS@ +libpixman_1_la_LIBADD = @PTHREAD_LIBS@ -lm +libpixman_1_la_SOURCES = $(libpixman_sources) $(libpixman_headers) + +libpixmanincludedir = $(includedir)/pixman-1 +libpixmaninclude_HEADERS = pixman.h pixman-version.h +noinst_LTLIBRARIES = + +EXTRA_DIST = \ + Makefile.win32 \ + pixman-region.c \ + solaris-hwcap.mapfile \ + $(NULL) + +# mmx code +if USE_X86_MMX +noinst_LTLIBRARIES += libpixman-mmx.la +libpixman_mmx_la_SOURCES = \ + pixman-mmx.c +libpixman_mmx_la_CFLAGS = $(MMX_CFLAGS) +libpixman_1_la_LDFLAGS += $(MMX_LDFLAGS) +libpixman_1_la_LIBADD += libpixman-mmx.la + +ASM_CFLAGS_mmx=$(MMX_CFLAGS) +endif + +# vmx code +if USE_VMX +noinst_LTLIBRARIES += libpixman-vmx.la +libpixman_vmx_la_SOURCES = \ + pixman-vmx.c \ + pixman-combine32.h +libpixman_vmx_la_CFLAGS = $(VMX_CFLAGS) +libpixman_1_la_LIBADD += libpixman-vmx.la + +ASM_CFLAGS_vmx=$(VMX_CFLAGS) +endif + +# sse2 code +if USE_SSE2 +noinst_LTLIBRARIES += libpixman-sse2.la +libpixman_sse2_la_SOURCES = \ + pixman-sse2.c +libpixman_sse2_la_CFLAGS = $(SSE2_CFLAGS) +libpixman_1_la_LDFLAGS += $(SSE2_LDFLAGS) +libpixman_1_la_LIBADD += libpixman-sse2.la + +ASM_CFLAGS_sse2=$(SSE2_CFLAGS) +endif + +# ssse3 code +if USE_SSSE3 +noinst_LTLIBRARIES += libpixman-ssse3.la +libpixman_ssse3_la_SOURCES = \ + pixman-ssse3.c +libpixman_ssse3_la_CFLAGS = $(SSSE3_CFLAGS) +libpixman_1_la_LDFLAGS += $(SSSE3_LDFLAGS) +libpixman_1_la_LIBADD += libpixman-ssse3.la + +ASM_CFLAGS_ssse3=$(SSSE3_CFLAGS) +endif + +# arm simd code +if USE_ARM_SIMD +noinst_LTLIBRARIES += libpixman-arm-simd.la +libpixman_arm_simd_la_SOURCES = \ + pixman-arm-simd.c \ + pixman-arm-common.h \ + pixman-arm-simd-asm.S \ + pixman-arm-simd-asm-scaled.S \ + pixman-arm-simd-asm.h +libpixman_1_la_LIBADD += libpixman-arm-simd.la + +ASM_CFLAGS_arm_simd= +endif + +# arm neon code +if USE_ARM_NEON +noinst_LTLIBRARIES += libpixman-arm-neon.la +libpixman_arm_neon_la_SOURCES = \ + pixman-arm-neon.c \ + pixman-arm-common.h \ + pixman-arm-neon-asm.S \ + pixman-arm-neon-asm-bilinear.S \ + pixman-arm-neon-asm.h +libpixman_1_la_LIBADD += libpixman-arm-neon.la + +ASM_CFLAGS_arm_neon= +endif + +# iwmmxt code +if USE_ARM_IWMMXT +libpixman_iwmmxt_la_SOURCES = pixman-mmx.c +noinst_LTLIBRARIES += libpixman-iwmmxt.la +libpixman_1_la_LIBADD += libpixman-iwmmxt.la + +libpixman_iwmmxt_la-pixman-mmx.lo: pixman-mmx.c + $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(AM_CPPFLAGS) $(AM_CPPFLAGS) $(CPPFLAGS) $(CFLAGS) $(IWMMXT_CFLAGS) -MT libpixman_iwmmxt_la-pixman-mmx.lo -MD -MP -MF $(DEPDIR)/libpixman_iwmmxt_la-pixman-mmx.Tpo -c -o libpixman_iwmmxt_la-pixman-mmx.lo `test -f 'pixman-mmx.c' || echo '$(srcdir)/'`pixman-mmx.c + $(AM_V_at)$(am__mv) $(DEPDIR)/libpixman_iwmmxt_la-pixman-mmx.Tpo $(DEPDIR)/libpixman_iwmmxt_la-pixman-mmx.Plo + +libpixman_iwmmxt_la_DEPENDENCIES = $(am__DEPENDENCIES_1) +libpixman_iwmmxt_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(CFLAGS) $(IWMMXT_CFLAGS) $(AM_LDFLAGS) \ + $(LDFLAGS) -o $@ + +libpixman-iwmmxt.la: libpixman_iwmmxt_la-pixman-mmx.lo $(libpixman_iwmmxt_la_DEPENDENCIES) + $(AM_V_CCLD)$(libpixman_iwmmxt_la_LINK) libpixman_iwmmxt_la-pixman-mmx.lo $(libpixman_iwmmxt_la_LIBADD) $(LIBS) +endif + +# mips dspr2 code +if USE_MIPS_DSPR2 +noinst_LTLIBRARIES += libpixman-mips-dspr2.la +libpixman_mips_dspr2_la_SOURCES = \ + pixman-mips-dspr2.c \ + pixman-mips-dspr2.h \ + pixman-mips-dspr2-asm.S \ + pixman-mips-dspr2-asm.h \ + pixman-mips-memcpy-asm.S +libpixman_1_la_LIBADD += libpixman-mips-dspr2.la + +ASM_CFLAGS_mips_dspr2= +endif + +# loongson code +if USE_LOONGSON_MMI +noinst_LTLIBRARIES += libpixman-loongson-mmi.la +libpixman_loongson_mmi_la_SOURCES = pixman-mmx.c loongson-mmintrin.h +libpixman_loongson_mmi_la_CFLAGS = $(LS_CFLAGS) +libpixman_1_la_LDFLAGS += $(LS_LDFLAGS) +libpixman_1_la_LIBADD += libpixman-loongson-mmi.la +endif + +.c.s : $(libpixmaninclude_HEADERS) + $(CC) $(CFLAGS) $(ASM_CFLAGS_$(@:pixman-%.s=%)) $(ASM_CFLAGS_$(@:pixman-arm-%.s=arm_%)) -DHAVE_CONFIG_H -I$(srcdir) -I$(builddir) -I$(top_builddir) -S -o $@ $< diff --git a/src/pixman/pixman/Makefile.sources b/src/pixman/pixman/Makefile.sources new file mode 100644 index 0000000..c624eb9 --- /dev/null +++ b/src/pixman/pixman/Makefile.sources @@ -0,0 +1,42 @@ +libpixman_sources = \ + pixman.c \ + pixman-access.c \ + pixman-access-accessors.c \ + pixman-bits-image.c \ + pixman-combine32.c \ + pixman-combine-float.c \ + pixman-conical-gradient.c \ + pixman-filter.c \ + pixman-x86.c \ + pixman-mips.c \ + pixman-arm.c \ + pixman-ppc.c \ + pixman-edge.c \ + pixman-edge-accessors.c \ + pixman-fast-path.c \ + pixman-glyph.c \ + pixman-general.c \ + pixman-gradient-walker.c \ + pixman-image.c \ + pixman-implementation.c \ + pixman-linear-gradient.c \ + pixman-matrix.c \ + pixman-noop.c \ + pixman-radial-gradient.c \ + pixman-region16.c \ + pixman-region32.c \ + pixman-solid-fill.c \ + pixman-timer.c \ + pixman-trap.c \ + pixman-utils.c \ + $(NULL) + +libpixman_headers = \ + pixman.h \ + pixman-accessor.h \ + pixman-combine32.h \ + pixman-compiler.h \ + pixman-edge-imp.h \ + pixman-inlines.h \ + pixman-private.h \ + $(NULL) diff --git a/src/pixman/pixman/Makefile.win32 b/src/pixman/pixman/Makefile.win32 new file mode 100644 index 0000000..7b64033 --- /dev/null +++ b/src/pixman/pixman/Makefile.win32 @@ -0,0 +1,93 @@ +default: all + +top_srcdir = .. +include $(top_srcdir)/pixman/Makefile.sources +include $(top_srcdir)/Makefile.win32.common + +MMX_VAR = $(MMX) +ifeq ($(MMX_VAR),) +MMX_VAR=on +endif + +SSE2_VAR = $(SSE2) +ifeq ($(SSE2_VAR),) +SSE2_VAR=on +endif + +SSSE3_VAR = $(SSSE3) +ifeq ($(SSSE3_VAR),) +SSSE3_VAR=on +endif + +MMX_CFLAGS = -DUSE_X86_MMX -w14710 -w14714 +SSE2_CFLAGS = -DUSE_SSE2 +SSSE3_CFLAGS = -DUSE_SSSE3 + +# MMX compilation flags +ifeq ($(MMX_VAR),on) +PIXMAN_CFLAGS += $(MMX_CFLAGS) +libpixman_sources += pixman-mmx.c +endif + +# SSE2 compilation flags +ifeq ($(SSE2_VAR),on) +PIXMAN_CFLAGS += $(SSE2_CFLAGS) +libpixman_sources += pixman-sse2.c +endif + +# SSSE3 compilation flags +ifeq ($(SSSE3_VAR),on) +PIXMAN_CFLAGS += $(SSSE3_CFLAGS) +libpixman_sources += pixman-ssse3.c +endif + +OBJECTS = $(patsubst %.c, $(CFG_VAR)/%.obj, $(libpixman_sources)) + +# targets +all: inform informMMX informSSE2 informSSSE3 $(CFG_VAR)/$(LIBRARY).lib + +informMMX: +ifneq ($(MMX),off) +ifneq ($(MMX),on) +ifneq ($(MMX),) + @echo "Invalid specified MMX option : "$(MMX_VAR)"." + @echo + @echo "Possible choices for MMX are 'on' or 'off'" + @exit 1 +endif + @echo "Setting MMX flag to default value 'on'... (use MMX=on or MMX=off)" +endif +endif + +informSSE2: +ifneq ($(SSE2),off) +ifneq ($(SSE2),on) +ifneq ($(SSE2),) + @echo "Invalid specified SSE option : "$(SSE2)"." + @echo + @echo "Possible choices for SSE2 are 'on' or 'off'" + @exit 1 +endif + @echo "Setting SSE2 flag to default value 'on'... (use SSE2=on or SSE2=off)" +endif +endif + +informSSSE3: +ifneq ($(SSSE3),off) +ifneq ($(SSSE3),on) +ifneq ($(SSSE3),) + @echo "Invalid specified SSE option : "$(SSSE3)"." + @echo + @echo "Possible choices for SSSE3 are 'on' or 'off'" + @exit 1 +endif + @echo "Setting SSSE3 flag to default value 'on'... (use SSSE3=on or SSSE3=off)" +endif +endif + + +# pixman linking +$(CFG_VAR)/$(LIBRARY).lib: $(OBJECTS) + @$(AR) $(PIXMAN_ARFLAGS) -OUT:$@ $^ + +.PHONY: all informMMX informSSE2 informSSSE3 diff --git a/src/pixman/pixman/loongson-mmintrin.h b/src/pixman/pixman/loongson-mmintrin.h new file mode 100644 index 0000000..086c6e0 --- /dev/null +++ b/src/pixman/pixman/loongson-mmintrin.h @@ -0,0 +1,410 @@ +/* The gcc-provided loongson intrinsic functions are way too fucking broken + * to be of any use, otherwise I'd use them. + * + * - The hardware instructions are very similar to MMX or iwMMXt. Certainly + * close enough that they could have implemented the _mm_*-style intrinsic + * interface and had a ton of optimized code available to them. Instead they + * implemented something much, much worse. + * + * - pshuf takes a dead first argument, causing extra instructions to be + * generated. + * + * - There are no 64-bit shift or logical intrinsics, which means you have + * to implement them with inline assembly, but this is a nightmare because + * gcc doesn't understand that the integer vector datatypes are actually in + * floating-point registers, so you end up with braindead code like + * + * punpcklwd $f9,$f9,$f5 + * dmtc1 v0,$f8 + * punpcklwd $f19,$f19,$f5 + * dmfc1 t9,$f9 + * dmtc1 v0,$f9 + * dmtc1 t9,$f20 + * dmfc1 s0,$f19 + * punpcklbh $f20,$f20,$f2 + * + * where crap just gets copied back and forth between integer and floating- + * point registers ad nauseum. + * + * Instead of trying to workaround the problems from these crap intrinsics, I + * just implement the _mm_* intrinsics needed for pixman-mmx.c using inline + * assembly. + */ + +#include <stdint.h> + +/* vectors are stored in 64-bit floating-point registers */ +typedef double __m64; +/* having a 32-bit datatype allows us to use 32-bit loads in places like load8888 */ +typedef float __m32; + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_setzero_si64 (void) +{ + return 0.0; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_add_pi16 (__m64 __m1, __m64 __m2) +{ + __m64 ret; + asm("paddh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + return ret; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_add_pi32 (__m64 __m1, __m64 __m2) +{ + __m64 ret; + asm("paddw %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + return ret; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_adds_pu16 (__m64 __m1, __m64 __m2) +{ + __m64 ret; + asm("paddush %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + return ret; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_adds_pu8 (__m64 __m1, __m64 __m2) +{ + __m64 ret; + asm("paddusb %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + return ret; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_and_si64 (__m64 __m1, __m64 __m2) +{ + __m64 ret; + asm("and %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + return ret; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpeq_pi32 (__m64 __m1, __m64 __m2) +{ + __m64 ret; + asm("pcmpeqw %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + return ret; +} + +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_empty (void) +{ + +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_madd_pi16 (__m64 __m1, __m64 __m2) +{ + __m64 ret; + asm("pmaddhw %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + return ret; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mulhi_pu16 (__m64 __m1, __m64 __m2) +{ + __m64 ret; + asm("pmulhuh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + return ret; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mullo_pi16 (__m64 __m1, __m64 __m2) +{ + __m64 ret; + asm("pmullh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + return ret; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_or_si64 (__m64 __m1, __m64 __m2) +{ + __m64 ret; + asm("or %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + return ret; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_packs_pu16 (__m64 __m1, __m64 __m2) +{ + __m64 ret; + asm("packushb %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + return ret; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_packs_pi32 (__m64 __m1, __m64 __m2) +{ + __m64 ret; + asm("packsswh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + return ret; +} + +#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \ + (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0)) +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_set_pi16 (uint16_t __w3, uint16_t __w2, uint16_t __w1, uint16_t __w0) +{ + if (__builtin_constant_p (__w3) && + __builtin_constant_p (__w2) && + __builtin_constant_p (__w1) && + __builtin_constant_p (__w0)) + { + uint64_t val = ((uint64_t)__w3 << 48) + | ((uint64_t)__w2 << 32) + | ((uint64_t)__w1 << 16) + | ((uint64_t)__w0 << 0); + return *(__m64 *)&val; + } + else if (__w3 == __w2 && __w2 == __w1 && __w1 == __w0) + { + /* TODO: handle other cases */ + uint64_t val = __w3; + uint64_t imm = _MM_SHUFFLE (0, 0, 0, 0); + __m64 ret; + asm("pshufh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (*(__m64 *)&val), "f" (*(__m64 *)&imm) + ); + return ret; + } + uint64_t val = ((uint64_t)__w3 << 48) + | ((uint64_t)__w2 << 32) + | ((uint64_t)__w1 << 16) + | ((uint64_t)__w0 << 0); + return *(__m64 *)&val; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_set_pi32 (unsigned __i1, unsigned __i0) +{ + if (__builtin_constant_p (__i1) && + __builtin_constant_p (__i0)) + { + uint64_t val = ((uint64_t)__i1 << 32) + | ((uint64_t)__i0 << 0); + return *(__m64 *)&val; + } + else if (__i1 == __i0) + { + uint64_t imm = _MM_SHUFFLE (1, 0, 1, 0); + __m64 ret; + asm("pshufh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (*(__m32 *)&__i1), "f" (*(__m64 *)&imm) + ); + return ret; + } + uint64_t val = ((uint64_t)__i1 << 32) + | ((uint64_t)__i0 << 0); + return *(__m64 *)&val; +} +#undef _MM_SHUFFLE + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_shuffle_pi16 (__m64 __m, int64_t __n) +{ + __m64 ret; + asm("pshufh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m), "f" (*(__m64 *)&__n) + ); + return ret; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_slli_pi16 (__m64 __m, int64_t __count) +{ + __m64 ret; + asm("psllh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m), "f" (*(__m64 *)&__count) + ); + return ret; +} +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_slli_si64 (__m64 __m, int64_t __count) +{ + __m64 ret; + asm("dsll %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m), "f" (*(__m64 *)&__count) + ); + return ret; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_srli_pi16 (__m64 __m, int64_t __count) +{ + __m64 ret; + asm("psrlh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m), "f" (*(__m64 *)&__count) + ); + return ret; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_srli_pi32 (__m64 __m, int64_t __count) +{ + __m64 ret; + asm("psrlw %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m), "f" (*(__m64 *)&__count) + ); + return ret; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_srli_si64 (__m64 __m, int64_t __count) +{ + __m64 ret; + asm("dsrl %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m), "f" (*(__m64 *)&__count) + ); + return ret; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_sub_pi16 (__m64 __m1, __m64 __m2) +{ + __m64 ret; + asm("psubh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + return ret; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_unpackhi_pi8 (__m64 __m1, __m64 __m2) +{ + __m64 ret; + asm("punpckhbh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + return ret; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_unpackhi_pi16 (__m64 __m1, __m64 __m2) +{ + __m64 ret; + asm("punpckhhw %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + return ret; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_unpacklo_pi8 (__m64 __m1, __m64 __m2) +{ + __m64 ret; + asm("punpcklbh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + return ret; +} + +/* Since punpcklbh doesn't care about the high 32-bits, we use the __m32 datatype which + * allows load8888 to use 32-bit loads */ +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_unpacklo_pi8_f (__m32 __m1, __m64 __m2) +{ + __m64 ret; + asm("punpcklbh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + return ret; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_unpacklo_pi16 (__m64 __m1, __m64 __m2) +{ + __m64 ret; + asm("punpcklhw %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + return ret; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_xor_si64 (__m64 __m1, __m64 __m2) +{ + __m64 ret; + asm("xor %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + return ret; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +loongson_extract_pi16 (__m64 __m, int64_t __pos) +{ + __m64 ret; + asm("pextrh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m), "f" (*(__m64 *)&__pos) + ); + return ret; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +loongson_insert_pi16 (__m64 __m1, __m64 __m2, int64_t __pos) +{ + __m64 ret; + asm("pinsrh_%3 %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2), "i" (__pos) + ); + return ret; +} diff --git a/src/pixman/pixman/make-srgb.pl b/src/pixman/pixman/make-srgb.pl new file mode 100644 index 0000000..cdaa80b --- /dev/null +++ b/src/pixman/pixman/make-srgb.pl @@ -0,0 +1,115 @@ +#!/usr/bin/perl -w + +use strict; + +sub linear_to_srgb +{ + my ($c) = @_; + + if ($c < 0.0031308) + { + return $c * 12.92; + } + else + { + return 1.055 * $c ** (1.0/2.4) - 0.055; + } +} + +sub srgb_to_linear +{ + my ($c) = @_; + + if ($c < 0.04045) + { + return $c / 12.92; + } + else + { + return (($c + 0.055) / 1.055) ** 2.4 + } +} + +my @linear_to_srgb; +for my $linear (0 .. 4095) +{ + my $srgb = int(linear_to_srgb($linear / 4095.0) * 255.0 + 0.5); + push @linear_to_srgb, $srgb; +} + +my @srgb_to_linear; +for my $srgb (0 .. 255) +{ + my $linear = int(srgb_to_linear($srgb / 255.0) * 65535.0 + 0.5); + push @srgb_to_linear, $linear; +} + +# Ensure that we have a lossless sRGB and back conversion loop. +# some of the darkest shades need a little bias -- maximum is just +# 5 increments out of 16. This gives us useful property with +# least amount of error in the sRGB-to-linear table, and keeps the actual +# table lookup in the other direction as simple as possible. +for my $srgb (0 .. $#srgb_to_linear) +{ + my $add = 0; + while (1) + { + my $linear = $srgb_to_linear[$srgb]; + my $srgb_lossy = $linear_to_srgb[$linear >> 4]; + last if $srgb == $srgb_lossy; + + # Add slight bias to this component until it rounds correctly + $srgb_to_linear[$srgb] ++; + $add ++; + } + die "Too many adds at $srgb" if $add > 5; +} + +print <<"PROLOG"; +/* WARNING: This file is generated by $0. + * Please edit that file instead of this one. + */ + +#include <stdint.h> + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include "pixman-private.h" + +PROLOG + +print "const uint8_t linear_to_srgb[" . @linear_to_srgb . "] =\n"; +print "{\n"; +for my $linear (0 .. $#linear_to_srgb) +{ + if (($linear % 10) == 0) + { + print "\t"; + } + print sprintf("%d, ", $linear_to_srgb[$linear]); + if (($linear % 10) == 9) + { + print "\n"; + } +} +print "\n};\n"; +print "\n"; + +print "const uint16_t srgb_to_linear[" . @srgb_to_linear . "] =\n"; +print "{\n"; +for my $srgb (0 .. $#srgb_to_linear) +{ + if (($srgb % 10) == 0) + { + print "\t"; + } + print sprintf("%d, ", $srgb_to_linear[$srgb]); + if (($srgb % 10) == 9) + { + print "\n"; + } +} +print "\n};\n"; + diff --git a/src/pixman/pixman/pixman-access-accessors.c b/src/pixman/pixman/pixman-access-accessors.c new file mode 100644 index 0000000..3263582 --- /dev/null +++ b/src/pixman/pixman/pixman-access-accessors.c @@ -0,0 +1,3 @@ +#define PIXMAN_FB_ACCESSORS + +#include "pixman-access.c" diff --git a/src/pixman/pixman/pixman-access.c b/src/pixman/pixman/pixman-access.c new file mode 100644 index 0000000..4f0642d --- /dev/null +++ b/src/pixman/pixman/pixman-access.c @@ -0,0 +1,1433 @@ +/* + * + * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc. + * 2005 Lars Knoll & Zack Rusin, Trolltech + * 2008 Aaron Plattner, NVIDIA Corporation + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of Keith Packard not be used in + * advertising or publicity pertaining to distribution of the software without + * specific, written prior permission. Keith Packard makes no + * representations about the suitability of this software for any purpose. It + * is provided "as is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include <math.h> + +#include "pixman-accessor.h" +#include "pixman-private.h" + +#define CONVERT_RGB24_TO_Y15(s) \ + (((((s) >> 16) & 0xff) * 153 + \ + (((s) >> 8) & 0xff) * 301 + \ + (((s) ) & 0xff) * 58) >> 2) + +#define CONVERT_RGB24_TO_RGB15(s) \ + ((((s) >> 3) & 0x001f) | \ + (((s) >> 6) & 0x03e0) | \ + (((s) >> 9) & 0x7c00)) + +/* Fetch macros */ + +#ifdef WORDS_BIGENDIAN +#define FETCH_1(img,l,o) \ + (((READ ((img), ((uint32_t *)(l)) + ((o) >> 5))) >> (0x1f - ((o) & 0x1f))) & 0x1) +#else +#define FETCH_1(img,l,o) \ + ((((READ ((img), ((uint32_t *)(l)) + ((o) >> 5))) >> ((o) & 0x1f))) & 0x1) +#endif + +#define FETCH_8(img,l,o) (READ (img, (((uint8_t *)(l)) + ((o) >> 3)))) + +#ifdef WORDS_BIGENDIAN +#define FETCH_4(img,l,o) \ + (((4 * (o)) & 4) ? (FETCH_8 (img,l, 4 * (o)) & 0xf) : (FETCH_8 (img,l,(4 * (o))) >> 4)) +#else +#define FETCH_4(img,l,o) \ + (((4 * (o)) & 4) ? (FETCH_8 (img, l, 4 * (o)) >> 4) : (FETCH_8 (img, l, (4 * (o))) & 0xf)) +#endif + +#ifdef WORDS_BIGENDIAN +#define FETCH_24(img,l,o) \ + ((READ (img, (((uint8_t *)(l)) + ((o) * 3) + 0)) << 16) | \ + (READ (img, (((uint8_t *)(l)) + ((o) * 3) + 1)) << 8) | \ + (READ (img, (((uint8_t *)(l)) + ((o) * 3) + 2)) << 0)) +#else +#define FETCH_24(img,l,o) \ + ((READ (img, (((uint8_t *)(l)) + ((o) * 3) + 0)) << 0) | \ + (READ (img, (((uint8_t *)(l)) + ((o) * 3) + 1)) << 8) | \ + (READ (img, (((uint8_t *)(l)) + ((o) * 3) + 2)) << 16)) +#endif + +/* Store macros */ + +#ifdef WORDS_BIGENDIAN +#define STORE_1(img,l,o,v) \ + do \ + { \ + uint32_t *__d = ((uint32_t *)(l)) + ((o) >> 5); \ + uint32_t __m, __v; \ + \ + __m = 1 << (0x1f - ((o) & 0x1f)); \ + __v = (v)? __m : 0; \ + \ + WRITE((img), __d, (READ((img), __d) & ~__m) | __v); \ + } \ + while (0) +#else +#define STORE_1(img,l,o,v) \ + do \ + { \ + uint32_t *__d = ((uint32_t *)(l)) + ((o) >> 5); \ + uint32_t __m, __v; \ + \ + __m = 1 << ((o) & 0x1f); \ + __v = (v)? __m : 0; \ + \ + WRITE((img), __d, (READ((img), __d) & ~__m) | __v); \ + } \ + while (0) +#endif + +#define STORE_8(img,l,o,v) (WRITE (img, (uint8_t *)(l) + ((o) >> 3), (v))) + +#ifdef WORDS_BIGENDIAN +#define STORE_4(img,l,o,v) \ + do \ + { \ + int bo = 4 * (o); \ + int v4 = (v) & 0x0f; \ + \ + STORE_8 (img, l, bo, ( \ + bo & 4 ? \ + (FETCH_8 (img, l, bo) & 0xf0) | (v4) : \ + (FETCH_8 (img, l, bo) & 0x0f) | (v4 << 4))); \ + } while (0) +#else +#define STORE_4(img,l,o,v) \ + do \ + { \ + int bo = 4 * (o); \ + int v4 = (v) & 0x0f; \ + \ + STORE_8 (img, l, bo, ( \ + bo & 4 ? \ + (FETCH_8 (img, l, bo) & 0x0f) | (v4 << 4) : \ + (FETCH_8 (img, l, bo) & 0xf0) | (v4))); \ + } while (0) +#endif + +#ifdef WORDS_BIGENDIAN +#define STORE_24(img,l,o,v) \ + do \ + { \ + uint8_t *__tmp = (l) + 3 * (o); \ + \ + WRITE ((img), __tmp++, ((v) & 0x00ff0000) >> 16); \ + WRITE ((img), __tmp++, ((v) & 0x0000ff00) >> 8); \ + WRITE ((img), __tmp++, ((v) & 0x000000ff) >> 0); \ + } \ + while (0) +#else +#define STORE_24(img,l,o,v) \ + do \ + { \ + uint8_t *__tmp = (l) + 3 * (o); \ + \ + WRITE ((img), __tmp++, ((v) & 0x000000ff) >> 0); \ + WRITE ((img), __tmp++, ((v) & 0x0000ff00) >> 8); \ + WRITE ((img), __tmp++, ((v) & 0x00ff0000) >> 16); \ + } \ + while (0) +#endif + +/* + * YV12 setup and access macros + */ + +#define YV12_SETUP(image) \ + bits_image_t *__bits_image = (bits_image_t *)image; \ + uint32_t *bits = __bits_image->bits; \ + int stride = __bits_image->rowstride; \ + int offset0 = stride < 0 ? \ + ((-stride) >> 1) * ((__bits_image->height - 1) >> 1) - stride : \ + stride * __bits_image->height; \ + int offset1 = stride < 0 ? \ + offset0 + ((-stride) >> 1) * ((__bits_image->height) >> 1) : \ + offset0 + (offset0 >> 2) + +/* Note no trailing semicolon on the above macro; if it's there, then + * the typical usage of YV12_SETUP(image); will have an extra trailing ; + * that some compilers will interpret as a statement -- and then any further + * variable declarations will cause an error. + */ + +#define YV12_Y(line) \ + ((uint8_t *) ((bits) + (stride) * (line))) + +#define YV12_U(line) \ + ((uint8_t *) ((bits) + offset1 + \ + ((stride) >> 1) * ((line) >> 1))) + +#define YV12_V(line) \ + ((uint8_t *) ((bits) + offset0 + \ + ((stride) >> 1) * ((line) >> 1))) + +/* Misc. helpers */ + +static force_inline void +get_shifts (pixman_format_code_t format, + int *a, + int *r, + int *g, + int *b) +{ + switch (PIXMAN_FORMAT_TYPE (format)) + { + case PIXMAN_TYPE_A: + *b = 0; + *g = 0; + *r = 0; + *a = 0; + break; + + case PIXMAN_TYPE_ARGB: + case PIXMAN_TYPE_ARGB_SRGB: + *b = 0; + *g = *b + PIXMAN_FORMAT_B (format); + *r = *g + PIXMAN_FORMAT_G (format); + *a = *r + PIXMAN_FORMAT_R (format); + break; + + case PIXMAN_TYPE_ABGR: + *r = 0; + *g = *r + PIXMAN_FORMAT_R (format); + *b = *g + PIXMAN_FORMAT_G (format); + *a = *b + PIXMAN_FORMAT_B (format); + break; + + case PIXMAN_TYPE_BGRA: + /* With BGRA formats we start counting at the high end of the pixel */ + *b = PIXMAN_FORMAT_BPP (format) - PIXMAN_FORMAT_B (format); + *g = *b - PIXMAN_FORMAT_B (format); + *r = *g - PIXMAN_FORMAT_G (format); + *a = *r - PIXMAN_FORMAT_R (format); + break; + + case PIXMAN_TYPE_RGBA: + /* With BGRA formats we start counting at the high end of the pixel */ + *r = PIXMAN_FORMAT_BPP (format) - PIXMAN_FORMAT_R (format); + *g = *r - PIXMAN_FORMAT_R (format); + *b = *g - PIXMAN_FORMAT_G (format); + *a = *b - PIXMAN_FORMAT_B (format); + break; + + default: + assert (0); + break; + } +} + +static force_inline uint32_t +convert_channel (uint32_t pixel, uint32_t def_value, + int n_from_bits, int from_shift, + int n_to_bits, int to_shift) +{ + uint32_t v; + + if (n_from_bits && n_to_bits) + v = unorm_to_unorm (pixel >> from_shift, n_from_bits, n_to_bits); + else if (n_to_bits) + v = def_value; + else + v = 0; + + return (v & ((1 << n_to_bits) - 1)) << to_shift; +} + +static force_inline uint32_t +convert_pixel (pixman_format_code_t from, pixman_format_code_t to, uint32_t pixel) +{ + int a_from_shift, r_from_shift, g_from_shift, b_from_shift; + int a_to_shift, r_to_shift, g_to_shift, b_to_shift; + uint32_t a, r, g, b; + + get_shifts (from, &a_from_shift, &r_from_shift, &g_from_shift, &b_from_shift); + get_shifts (to, &a_to_shift, &r_to_shift, &g_to_shift, &b_to_shift); + + a = convert_channel (pixel, ~0, + PIXMAN_FORMAT_A (from), a_from_shift, + PIXMAN_FORMAT_A (to), a_to_shift); + + r = convert_channel (pixel, 0, + PIXMAN_FORMAT_R (from), r_from_shift, + PIXMAN_FORMAT_R (to), r_to_shift); + + g = convert_channel (pixel, 0, + PIXMAN_FORMAT_G (from), g_from_shift, + PIXMAN_FORMAT_G (to), g_to_shift); + + b = convert_channel (pixel, 0, + PIXMAN_FORMAT_B (from), b_from_shift, + PIXMAN_FORMAT_B (to), b_to_shift); + + return a | r | g | b; +} + +static force_inline uint32_t +convert_pixel_to_a8r8g8b8 (bits_image_t *image, + pixman_format_code_t format, + uint32_t pixel) +{ + if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_GRAY || + PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_COLOR) + { + return image->indexed->rgba[pixel]; + } + else + { + return convert_pixel (format, PIXMAN_a8r8g8b8, pixel); + } +} + +static force_inline uint32_t +convert_pixel_from_a8r8g8b8 (pixman_image_t *image, + pixman_format_code_t format, uint32_t pixel) +{ + if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_GRAY) + { + pixel = CONVERT_RGB24_TO_Y15 (pixel); + + return image->bits.indexed->ent[pixel & 0x7fff]; + } + else if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_COLOR) + { + pixel = convert_pixel (PIXMAN_a8r8g8b8, PIXMAN_x1r5g5b5, pixel); + + return image->bits.indexed->ent[pixel & 0x7fff]; + } + else + { + return convert_pixel (PIXMAN_a8r8g8b8, format, pixel); + } +} + +static force_inline uint32_t +fetch_and_convert_pixel (bits_image_t * image, + const uint8_t * bits, + int offset, + pixman_format_code_t format) +{ + uint32_t pixel; + + switch (PIXMAN_FORMAT_BPP (format)) + { + case 1: + pixel = FETCH_1 (image, bits, offset); + break; + + case 4: + pixel = FETCH_4 (image, bits, offset); + break; + + case 8: + pixel = READ (image, bits + offset); + break; + + case 16: + pixel = READ (image, ((uint16_t *)bits + offset)); + break; + + case 24: + pixel = FETCH_24 (image, bits, offset); + break; + + case 32: + pixel = READ (image, ((uint32_t *)bits + offset)); + break; + + default: + pixel = 0xffff00ff; /* As ugly as possible to detect the bug */ + break; + } + + return convert_pixel_to_a8r8g8b8 (image, format, pixel); +} + +static force_inline void +convert_and_store_pixel (bits_image_t * image, + uint8_t * dest, + int offset, + pixman_format_code_t format, + uint32_t pixel) +{ + uint32_t converted = convert_pixel_from_a8r8g8b8 ( + (pixman_image_t *)image, format, pixel); + + switch (PIXMAN_FORMAT_BPP (format)) + { + case 1: + STORE_1 (image, dest, offset, converted & 0x01); + break; + + case 4: + STORE_4 (image, dest, offset, converted & 0xf); + break; + + case 8: + WRITE (image, (dest + offset), converted & 0xff); + break; + + case 16: + WRITE (image, ((uint16_t *)dest + offset), converted & 0xffff); + break; + + case 24: + STORE_24 (image, dest, offset, converted); + break; + + case 32: + WRITE (image, ((uint32_t *)dest + offset), converted); + break; + + default: + *dest = 0x0; + break; + } +} + +#define MAKE_ACCESSORS(format) \ + static void \ + fetch_scanline_ ## format (bits_image_t *image, \ + int x, \ + int y, \ + int width, \ + uint32_t * buffer, \ + const uint32_t *mask) \ + { \ + uint8_t *bits = \ + (uint8_t *)(image->bits + y * image->rowstride); \ + int i; \ + \ + for (i = 0; i < width; ++i) \ + { \ + *buffer++ = \ + fetch_and_convert_pixel (image, bits, x + i, PIXMAN_ ## format); \ + } \ + } \ + \ + static void \ + store_scanline_ ## format (bits_image_t * image, \ + int x, \ + int y, \ + int width, \ + const uint32_t *values) \ + { \ + uint8_t *dest = \ + (uint8_t *)(image->bits + y * image->rowstride); \ + int i; \ + \ + for (i = 0; i < width; ++i) \ + { \ + convert_and_store_pixel ( \ + image, dest, i + x, PIXMAN_ ## format, values[i]); \ + } \ + } \ + \ + static uint32_t \ + fetch_pixel_ ## format (bits_image_t *image, \ + int offset, \ + int line) \ + { \ + uint8_t *bits = \ + (uint8_t *)(image->bits + line * image->rowstride); \ + \ + return fetch_and_convert_pixel ( \ + image, bits, offset, PIXMAN_ ## format); \ + } \ + \ + static const void *const __dummy__ ## format + +MAKE_ACCESSORS(a8r8g8b8); +MAKE_ACCESSORS(x8r8g8b8); +MAKE_ACCESSORS(a8b8g8r8); +MAKE_ACCESSORS(x8b8g8r8); +MAKE_ACCESSORS(x14r6g6b6); +MAKE_ACCESSORS(b8g8r8a8); +MAKE_ACCESSORS(b8g8r8x8); +MAKE_ACCESSORS(r8g8b8x8); +MAKE_ACCESSORS(r8g8b8a8); +MAKE_ACCESSORS(r8g8b8); +MAKE_ACCESSORS(b8g8r8); +MAKE_ACCESSORS(r5g6b5); +MAKE_ACCESSORS(b5g6r5); +MAKE_ACCESSORS(a1r5g5b5); +MAKE_ACCESSORS(x1r5g5b5); +MAKE_ACCESSORS(a1b5g5r5); +MAKE_ACCESSORS(x1b5g5r5); +MAKE_ACCESSORS(a4r4g4b4); +MAKE_ACCESSORS(x4r4g4b4); +MAKE_ACCESSORS(a4b4g4r4); +MAKE_ACCESSORS(x4b4g4r4); +MAKE_ACCESSORS(a8); +MAKE_ACCESSORS(c8); +MAKE_ACCESSORS(g8); +MAKE_ACCESSORS(r3g3b2); +MAKE_ACCESSORS(b2g3r3); +MAKE_ACCESSORS(a2r2g2b2); +MAKE_ACCESSORS(a2b2g2r2); +MAKE_ACCESSORS(x4a4); +MAKE_ACCESSORS(a4); +MAKE_ACCESSORS(g4); +MAKE_ACCESSORS(c4); +MAKE_ACCESSORS(r1g2b1); +MAKE_ACCESSORS(b1g2r1); +MAKE_ACCESSORS(a1r1g1b1); +MAKE_ACCESSORS(a1b1g1r1); +MAKE_ACCESSORS(a1); +MAKE_ACCESSORS(g1); + +/********************************** Fetch ************************************/ +/* Table mapping sRGB-encoded 8 bit numbers to linearly encoded + * floating point numbers. We assume that single precision + * floating point follows the IEEE 754 format. + */ +static const uint32_t to_linear_u[256] = +{ + 0x00000000, 0x399f22b4, 0x3a1f22b4, 0x3a6eb40e, 0x3a9f22b4, 0x3ac6eb61, + 0x3aeeb40e, 0x3b0b3e5d, 0x3b1f22b4, 0x3b33070b, 0x3b46eb61, 0x3b5b518a, + 0x3b70f18a, 0x3b83e1c5, 0x3b8fe614, 0x3b9c87fb, 0x3ba9c9b5, 0x3bb7ad6d, + 0x3bc63547, 0x3bd5635f, 0x3be539bd, 0x3bf5ba70, 0x3c0373b5, 0x3c0c6152, + 0x3c15a703, 0x3c1f45bc, 0x3c293e68, 0x3c3391f4, 0x3c3e4149, 0x3c494d43, + 0x3c54b6c7, 0x3c607eb1, 0x3c6ca5df, 0x3c792d22, 0x3c830aa8, 0x3c89af9e, + 0x3c9085db, 0x3c978dc5, 0x3c9ec7c0, 0x3ca63432, 0x3cadd37d, 0x3cb5a601, + 0x3cbdac20, 0x3cc5e639, 0x3cce54ab, 0x3cd6f7d2, 0x3cdfd00e, 0x3ce8ddb9, + 0x3cf2212c, 0x3cfb9ac1, 0x3d02a569, 0x3d0798dc, 0x3d0ca7e4, 0x3d11d2ae, + 0x3d171963, 0x3d1c7c2e, 0x3d21fb3a, 0x3d2796af, 0x3d2d4ebb, 0x3d332380, + 0x3d39152b, 0x3d3f23e3, 0x3d454fd0, 0x3d4b991c, 0x3d51ffeb, 0x3d588466, + 0x3d5f26b7, 0x3d65e6fe, 0x3d6cc564, 0x3d73c210, 0x3d7add25, 0x3d810b65, + 0x3d84b793, 0x3d88732e, 0x3d8c3e48, 0x3d9018f4, 0x3d940343, 0x3d97fd48, + 0x3d9c0714, 0x3da020b9, 0x3da44a48, 0x3da883d6, 0x3daccd70, 0x3db12728, + 0x3db59110, 0x3dba0b38, 0x3dbe95b2, 0x3dc3308f, 0x3dc7dbe0, 0x3dcc97b4, + 0x3dd1641c, 0x3dd6412a, 0x3ddb2eec, 0x3de02d75, 0x3de53cd3, 0x3dea5d16, + 0x3def8e52, 0x3df4d091, 0x3dfa23e5, 0x3dff885e, 0x3e027f06, 0x3e05427f, + 0x3e080ea2, 0x3e0ae376, 0x3e0dc104, 0x3e10a752, 0x3e139669, 0x3e168e50, + 0x3e198f0e, 0x3e1c98ab, 0x3e1fab2e, 0x3e22c6a0, 0x3e25eb08, 0x3e29186a, + 0x3e2c4ed0, 0x3e2f8e42, 0x3e32d6c4, 0x3e362861, 0x3e39831e, 0x3e3ce702, + 0x3e405416, 0x3e43ca5e, 0x3e4749e4, 0x3e4ad2ae, 0x3e4e64c2, 0x3e520027, + 0x3e55a4e6, 0x3e595303, 0x3e5d0a8a, 0x3e60cb7c, 0x3e6495e0, 0x3e6869bf, + 0x3e6c4720, 0x3e702e08, 0x3e741e7f, 0x3e78188c, 0x3e7c1c34, 0x3e8014c0, + 0x3e822039, 0x3e84308b, 0x3e8645b8, 0x3e885fc3, 0x3e8a7eb0, 0x3e8ca281, + 0x3e8ecb3a, 0x3e90f8df, 0x3e932b72, 0x3e9562f6, 0x3e979f6f, 0x3e99e0e0, + 0x3e9c274e, 0x3e9e72b8, 0x3ea0c322, 0x3ea31892, 0x3ea57308, 0x3ea7d28a, + 0x3eaa3718, 0x3eaca0b7, 0x3eaf0f69, 0x3eb18332, 0x3eb3fc16, 0x3eb67a15, + 0x3eb8fd34, 0x3ebb8576, 0x3ebe12de, 0x3ec0a56e, 0x3ec33d2a, 0x3ec5da14, + 0x3ec87c30, 0x3ecb2380, 0x3ecdd008, 0x3ed081ca, 0x3ed338c9, 0x3ed5f508, + 0x3ed8b68a, 0x3edb7d52, 0x3ede4962, 0x3ee11abe, 0x3ee3f168, 0x3ee6cd64, + 0x3ee9aeb6, 0x3eec955d, 0x3eef815d, 0x3ef272ba, 0x3ef56976, 0x3ef86594, + 0x3efb6717, 0x3efe6e02, 0x3f00bd2b, 0x3f02460c, 0x3f03d1a5, 0x3f055ff8, + 0x3f06f105, 0x3f0884ce, 0x3f0a1b54, 0x3f0bb499, 0x3f0d509f, 0x3f0eef65, + 0x3f1090ef, 0x3f12353c, 0x3f13dc50, 0x3f15862a, 0x3f1732cc, 0x3f18e237, + 0x3f1a946d, 0x3f1c4970, 0x3f1e013f, 0x3f1fbbde, 0x3f21794c, 0x3f23398c, + 0x3f24fca0, 0x3f26c286, 0x3f288b42, 0x3f2a56d3, 0x3f2c253d, 0x3f2df680, + 0x3f2fca9d, 0x3f31a195, 0x3f337b6a, 0x3f35581e, 0x3f3737b1, 0x3f391a24, + 0x3f3aff7a, 0x3f3ce7b2, 0x3f3ed2d0, 0x3f40c0d2, 0x3f42b1bc, 0x3f44a58e, + 0x3f469c49, 0x3f4895ee, 0x3f4a9280, 0x3f4c91ff, 0x3f4e946c, 0x3f5099c8, + 0x3f52a216, 0x3f54ad55, 0x3f56bb88, 0x3f58ccae, 0x3f5ae0cb, 0x3f5cf7de, + 0x3f5f11ec, 0x3f612ef0, 0x3f634eef, 0x3f6571ea, 0x3f6797e1, 0x3f69c0d6, + 0x3f6beccb, 0x3f6e1bc0, 0x3f704db6, 0x3f7282af, 0x3f74baac, 0x3f76f5ae, + 0x3f7933b6, 0x3f7b74c6, 0x3f7db8de, 0x3f800000 +}; + +static const float * const to_linear = (const float *)to_linear_u; + +static uint8_t +to_srgb (float f) +{ + uint8_t low = 0; + uint8_t high = 255; + + while (high - low > 1) + { + uint8_t mid = (low + high) / 2; + + if (to_linear[mid] > f) + high = mid; + else + low = mid; + } + + if (to_linear[high] - f < f - to_linear[low]) + return high; + else + return low; +} + +static void +fetch_scanline_a8r8g8b8_sRGB_float (bits_image_t * image, + int x, + int y, + int width, + uint32_t * b, + const uint32_t *mask) +{ + const uint32_t *bits = image->bits + y * image->rowstride; + const uint32_t *pixel = bits + x; + const uint32_t *end = pixel + width; + argb_t *buffer = (argb_t *)b; + + while (pixel < end) + { + uint32_t p = READ (image, pixel++); + argb_t *argb = buffer; + + argb->a = pixman_unorm_to_float ((p >> 24) & 0xff, 8); + + argb->r = to_linear [(p >> 16) & 0xff]; + argb->g = to_linear [(p >> 8) & 0xff]; + argb->b = to_linear [(p >> 0) & 0xff]; + + buffer++; + } +} + +/* Expects a float buffer */ +static void +fetch_scanline_a2r10g10b10_float (bits_image_t * image, + int x, + int y, + int width, + uint32_t * b, + const uint32_t *mask) +{ + const uint32_t *bits = image->bits + y * image->rowstride; + const uint32_t *pixel = bits + x; + const uint32_t *end = pixel + width; + argb_t *buffer = (argb_t *)b; + + while (pixel < end) + { + uint32_t p = READ (image, pixel++); + uint64_t a = p >> 30; + uint64_t r = (p >> 20) & 0x3ff; + uint64_t g = (p >> 10) & 0x3ff; + uint64_t b = p & 0x3ff; + + buffer->a = pixman_unorm_to_float (a, 2); + buffer->r = pixman_unorm_to_float (r, 10); + buffer->g = pixman_unorm_to_float (g, 10); + buffer->b = pixman_unorm_to_float (b, 10); + + buffer++; + } +} + +/* Expects a float buffer */ +static void +fetch_scanline_x2r10g10b10_float (bits_image_t *image, + int x, + int y, + int width, + uint32_t * b, + const uint32_t *mask) +{ + const uint32_t *bits = image->bits + y * image->rowstride; + const uint32_t *pixel = (uint32_t *)bits + x; + const uint32_t *end = pixel + width; + argb_t *buffer = (argb_t *)b; + + while (pixel < end) + { + uint32_t p = READ (image, pixel++); + uint64_t r = (p >> 20) & 0x3ff; + uint64_t g = (p >> 10) & 0x3ff; + uint64_t b = p & 0x3ff; + + buffer->a = 1.0; + buffer->r = pixman_unorm_to_float (r, 10); + buffer->g = pixman_unorm_to_float (g, 10); + buffer->b = pixman_unorm_to_float (b, 10); + + buffer++; + } +} + +/* Expects a float buffer */ +static void +fetch_scanline_a2b10g10r10_float (bits_image_t *image, + int x, + int y, + int width, + uint32_t * b, + const uint32_t *mask) +{ + const uint32_t *bits = image->bits + y * image->rowstride; + const uint32_t *pixel = bits + x; + const uint32_t *end = pixel + width; + argb_t *buffer = (argb_t *)b; + + while (pixel < end) + { + uint32_t p = READ (image, pixel++); + uint64_t a = p >> 30; + uint64_t b = (p >> 20) & 0x3ff; + uint64_t g = (p >> 10) & 0x3ff; + uint64_t r = p & 0x3ff; + + buffer->a = pixman_unorm_to_float (a, 2); + buffer->r = pixman_unorm_to_float (r, 10); + buffer->g = pixman_unorm_to_float (g, 10); + buffer->b = pixman_unorm_to_float (b, 10); + + buffer++; + } +} + +/* Expects a float buffer */ +static void +fetch_scanline_x2b10g10r10_float (bits_image_t *image, + int x, + int y, + int width, + uint32_t * b, + const uint32_t *mask) +{ + const uint32_t *bits = image->bits + y * image->rowstride; + const uint32_t *pixel = (uint32_t *)bits + x; + const uint32_t *end = pixel + width; + argb_t *buffer = (argb_t *)b; + + while (pixel < end) + { + uint32_t p = READ (image, pixel++); + uint64_t b = (p >> 20) & 0x3ff; + uint64_t g = (p >> 10) & 0x3ff; + uint64_t r = p & 0x3ff; + + buffer->a = 1.0; + buffer->r = pixman_unorm_to_float (r, 10); + buffer->g = pixman_unorm_to_float (g, 10); + buffer->b = pixman_unorm_to_float (b, 10); + + buffer++; + } +} + +static void +fetch_scanline_yuy2 (bits_image_t *image, + int x, + int line, + int width, + uint32_t * buffer, + const uint32_t *mask) +{ + const uint32_t *bits = image->bits + image->rowstride * line; + int i; + + for (i = 0; i < width; i++) + { + int16_t y, u, v; + int32_t r, g, b; + + y = ((uint8_t *) bits)[(x + i) << 1] - 16; + u = ((uint8_t *) bits)[(((x + i) << 1) & - 4) + 1] - 128; + v = ((uint8_t *) bits)[(((x + i) << 1) & - 4) + 3] - 128; + + /* R = 1.164(Y - 16) + 1.596(V - 128) */ + r = 0x012b27 * y + 0x019a2e * v; + /* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */ + g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u; + /* B = 1.164(Y - 16) + 2.018(U - 128) */ + b = 0x012b27 * y + 0x0206a2 * u; + + *buffer++ = 0xff000000 | + (r >= 0 ? r < 0x1000000 ? r & 0xff0000 : 0xff0000 : 0) | + (g >= 0 ? g < 0x1000000 ? (g >> 8) & 0x00ff00 : 0x00ff00 : 0) | + (b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0); + } +} + +static void +fetch_scanline_yv12 (bits_image_t *image, + int x, + int line, + int width, + uint32_t * buffer, + const uint32_t *mask) +{ + YV12_SETUP (image); + uint8_t *y_line = YV12_Y (line); + uint8_t *u_line = YV12_U (line); + uint8_t *v_line = YV12_V (line); + int i; + + for (i = 0; i < width; i++) + { + int16_t y, u, v; + int32_t r, g, b; + + y = y_line[x + i] - 16; + u = u_line[(x + i) >> 1] - 128; + v = v_line[(x + i) >> 1] - 128; + + /* R = 1.164(Y - 16) + 1.596(V - 128) */ + r = 0x012b27 * y + 0x019a2e * v; + /* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */ + g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u; + /* B = 1.164(Y - 16) + 2.018(U - 128) */ + b = 0x012b27 * y + 0x0206a2 * u; + + *buffer++ = 0xff000000 | + (r >= 0 ? r < 0x1000000 ? r & 0xff0000 : 0xff0000 : 0) | + (g >= 0 ? g < 0x1000000 ? (g >> 8) & 0x00ff00 : 0x00ff00 : 0) | + (b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0); + } +} + +/**************************** Pixel wise fetching *****************************/ + +static argb_t +fetch_pixel_x2r10g10b10_float (bits_image_t *image, + int offset, + int line) +{ + uint32_t *bits = image->bits + line * image->rowstride; + uint32_t p = READ (image, bits + offset); + uint64_t r = (p >> 20) & 0x3ff; + uint64_t g = (p >> 10) & 0x3ff; + uint64_t b = p & 0x3ff; + argb_t argb; + + argb.a = 1.0; + argb.r = pixman_unorm_to_float (r, 10); + argb.g = pixman_unorm_to_float (g, 10); + argb.b = pixman_unorm_to_float (b, 10); + + return argb; +} + +static argb_t +fetch_pixel_a2r10g10b10_float (bits_image_t *image, + int offset, + int line) +{ + uint32_t *bits = image->bits + line * image->rowstride; + uint32_t p = READ (image, bits + offset); + uint64_t a = p >> 30; + uint64_t r = (p >> 20) & 0x3ff; + uint64_t g = (p >> 10) & 0x3ff; + uint64_t b = p & 0x3ff; + argb_t argb; + + argb.a = pixman_unorm_to_float (a, 2); + argb.r = pixman_unorm_to_float (r, 10); + argb.g = pixman_unorm_to_float (g, 10); + argb.b = pixman_unorm_to_float (b, 10); + + return argb; +} + +static argb_t +fetch_pixel_a2b10g10r10_float (bits_image_t *image, + int offset, + int line) +{ + uint32_t *bits = image->bits + line * image->rowstride; + uint32_t p = READ (image, bits + offset); + uint64_t a = p >> 30; + uint64_t b = (p >> 20) & 0x3ff; + uint64_t g = (p >> 10) & 0x3ff; + uint64_t r = p & 0x3ff; + argb_t argb; + + argb.a = pixman_unorm_to_float (a, 2); + argb.r = pixman_unorm_to_float (r, 10); + argb.g = pixman_unorm_to_float (g, 10); + argb.b = pixman_unorm_to_float (b, 10); + + return argb; +} + +static argb_t +fetch_pixel_x2b10g10r10_float (bits_image_t *image, + int offset, + int line) +{ + uint32_t *bits = image->bits + line * image->rowstride; + uint32_t p = READ (image, bits + offset); + uint64_t b = (p >> 20) & 0x3ff; + uint64_t g = (p >> 10) & 0x3ff; + uint64_t r = p & 0x3ff; + argb_t argb; + + argb.a = 1.0; + argb.r = pixman_unorm_to_float (r, 10); + argb.g = pixman_unorm_to_float (g, 10); + argb.b = pixman_unorm_to_float (b, 10); + + return argb; +} + +static argb_t +fetch_pixel_a8r8g8b8_sRGB_float (bits_image_t *image, + int offset, + int line) +{ + uint32_t *bits = image->bits + line * image->rowstride; + uint32_t p = READ (image, bits + offset); + argb_t argb; + + argb.a = pixman_unorm_to_float ((p >> 24) & 0xff, 8); + + argb.r = to_linear [(p >> 16) & 0xff]; + argb.g = to_linear [(p >> 8) & 0xff]; + argb.b = to_linear [(p >> 0) & 0xff]; + + return argb; +} + +static uint32_t +fetch_pixel_yuy2 (bits_image_t *image, + int offset, + int line) +{ + const uint32_t *bits = image->bits + image->rowstride * line; + + int16_t y, u, v; + int32_t r, g, b; + + y = ((uint8_t *) bits)[offset << 1] - 16; + u = ((uint8_t *) bits)[((offset << 1) & - 4) + 1] - 128; + v = ((uint8_t *) bits)[((offset << 1) & - 4) + 3] - 128; + + /* R = 1.164(Y - 16) + 1.596(V - 128) */ + r = 0x012b27 * y + 0x019a2e * v; + + /* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */ + g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u; + + /* B = 1.164(Y - 16) + 2.018(U - 128) */ + b = 0x012b27 * y + 0x0206a2 * u; + + return 0xff000000 | + (r >= 0 ? r < 0x1000000 ? r & 0xff0000 : 0xff0000 : 0) | + (g >= 0 ? g < 0x1000000 ? (g >> 8) & 0x00ff00 : 0x00ff00 : 0) | + (b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0); +} + +static uint32_t +fetch_pixel_yv12 (bits_image_t *image, + int offset, + int line) +{ + YV12_SETUP (image); + int16_t y = YV12_Y (line)[offset] - 16; + int16_t u = YV12_U (line)[offset >> 1] - 128; + int16_t v = YV12_V (line)[offset >> 1] - 128; + int32_t r, g, b; + + /* R = 1.164(Y - 16) + 1.596(V - 128) */ + r = 0x012b27 * y + 0x019a2e * v; + + /* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */ + g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u; + + /* B = 1.164(Y - 16) + 2.018(U - 128) */ + b = 0x012b27 * y + 0x0206a2 * u; + + return 0xff000000 | + (r >= 0 ? r < 0x1000000 ? r & 0xff0000 : 0xff0000 : 0) | + (g >= 0 ? g < 0x1000000 ? (g >> 8) & 0x00ff00 : 0x00ff00 : 0) | + (b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0); +} + +/*********************************** Store ************************************/ + +static void +store_scanline_a2r10g10b10_float (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *v) +{ + uint32_t *bits = image->bits + image->rowstride * y; + uint32_t *pixel = bits + x; + argb_t *values = (argb_t *)v; + int i; + + for (i = 0; i < width; ++i) + { + uint16_t a, r, g, b; + + a = pixman_float_to_unorm (values[i].a, 2); + r = pixman_float_to_unorm (values[i].r, 10); + g = pixman_float_to_unorm (values[i].g, 10); + b = pixman_float_to_unorm (values[i].b, 10); + + WRITE (image, pixel++, + (a << 30) | (r << 20) | (g << 10) | b); + } +} + +static void +store_scanline_x2r10g10b10_float (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *v) +{ + uint32_t *bits = image->bits + image->rowstride * y; + uint32_t *pixel = bits + x; + argb_t *values = (argb_t *)v; + int i; + + for (i = 0; i < width; ++i) + { + uint16_t r, g, b; + + r = pixman_float_to_unorm (values[i].r, 10); + g = pixman_float_to_unorm (values[i].g, 10); + b = pixman_float_to_unorm (values[i].b, 10); + + WRITE (image, pixel++, + (r << 20) | (g << 10) | b); + } +} + +static void +store_scanline_a2b10g10r10_float (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *v) +{ + uint32_t *bits = image->bits + image->rowstride * y; + uint32_t *pixel = bits + x; + argb_t *values = (argb_t *)v; + int i; + + for (i = 0; i < width; ++i) + { + uint16_t a, r, g, b; + + a = pixman_float_to_unorm (values[i].a, 2); + r = pixman_float_to_unorm (values[i].r, 10); + g = pixman_float_to_unorm (values[i].g, 10); + b = pixman_float_to_unorm (values[i].b, 10); + + WRITE (image, pixel++, + (a << 30) | (b << 20) | (g << 10) | r); + } +} + +static void +store_scanline_x2b10g10r10_float (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *v) +{ + uint32_t *bits = image->bits + image->rowstride * y; + uint32_t *pixel = bits + x; + argb_t *values = (argb_t *)v; + int i; + + for (i = 0; i < width; ++i) + { + uint16_t r, g, b; + + r = pixman_float_to_unorm (values[i].r, 10); + g = pixman_float_to_unorm (values[i].g, 10); + b = pixman_float_to_unorm (values[i].b, 10); + + WRITE (image, pixel++, + (b << 20) | (g << 10) | r); + } +} + +static void +store_scanline_a8r8g8b8_sRGB_float (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *v) +{ + uint32_t *bits = image->bits + image->rowstride * y; + uint32_t *pixel = bits + x; + argb_t *values = (argb_t *)v; + int i; + + for (i = 0; i < width; ++i) + { + uint8_t a, r, g, b; + + a = pixman_float_to_unorm (values[i].a, 8); + r = to_srgb (values[i].r); + g = to_srgb (values[i].g); + b = to_srgb (values[i].b); + + WRITE (image, pixel++, + (a << 24) | (r << 16) | (g << 8) | b); + } +} + +/* + * Contracts a floating point image to 32bpp and then stores it using a + * regular 32-bit store proc. Despite the type, this function expects an + * argb_t buffer. + */ +static void +store_scanline_generic_float (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *values) +{ + uint32_t *argb8_pixels; + + assert (image->common.type == BITS); + + argb8_pixels = pixman_malloc_ab (width, sizeof(uint32_t)); + if (!argb8_pixels) + return; + + /* Contract the scanline. We could do this in place if values weren't + * const. + */ + pixman_contract_from_float (argb8_pixels, (argb_t *)values, width); + + image->store_scanline_32 (image, x, y, width, argb8_pixels); + + free (argb8_pixels); +} + +static void +fetch_scanline_generic_float (bits_image_t * image, + int x, + int y, + int width, + uint32_t * buffer, + const uint32_t *mask) +{ + image->fetch_scanline_32 (image, x, y, width, buffer, NULL); + + pixman_expand_to_float ((argb_t *)buffer, buffer, image->format, width); +} + +/* The 32_sRGB paths should be deleted after narrow processing + * is no longer invoked for formats that are considered wide. + * (Also see fetch_pixel_generic_lossy_32) */ +static void +fetch_scanline_a8r8g8b8_32_sRGB (bits_image_t *image, + int x, + int y, + int width, + uint32_t *buffer, + const uint32_t *mask) +{ + const uint32_t *bits = image->bits + y * image->rowstride; + const uint32_t *pixel = (uint32_t *)bits + x; + const uint32_t *end = pixel + width; + uint32_t tmp; + + while (pixel < end) + { + uint8_t a, r, g, b; + + tmp = READ (image, pixel++); + + a = (tmp >> 24) & 0xff; + r = (tmp >> 16) & 0xff; + g = (tmp >> 8) & 0xff; + b = (tmp >> 0) & 0xff; + + r = to_linear[r] * 255.0f + 0.5f; + g = to_linear[g] * 255.0f + 0.5f; + b = to_linear[b] * 255.0f + 0.5f; + + *buffer++ = (a << 24) | (r << 16) | (g << 8) | (b << 0); + } +} + +static uint32_t +fetch_pixel_a8r8g8b8_32_sRGB (bits_image_t *image, + int offset, + int line) +{ + uint32_t *bits = image->bits + line * image->rowstride; + uint32_t tmp = READ (image, bits + offset); + uint8_t a, r, g, b; + + a = (tmp >> 24) & 0xff; + r = (tmp >> 16) & 0xff; + g = (tmp >> 8) & 0xff; + b = (tmp >> 0) & 0xff; + + r = to_linear[r] * 255.0f + 0.5f; + g = to_linear[g] * 255.0f + 0.5f; + b = to_linear[b] * 255.0f + 0.5f; + + return (a << 24) | (r << 16) | (g << 8) | (b << 0); +} + +static void +store_scanline_a8r8g8b8_32_sRGB (bits_image_t *image, + int x, + int y, + int width, + const uint32_t *v) +{ + uint32_t *bits = image->bits + image->rowstride * y; + uint64_t *values = (uint64_t *)v; + uint32_t *pixel = bits + x; + uint64_t tmp; + int i; + + for (i = 0; i < width; ++i) + { + uint8_t a, r, g, b; + + tmp = values[i]; + + a = (tmp >> 24) & 0xff; + r = (tmp >> 16) & 0xff; + g = (tmp >> 8) & 0xff; + b = (tmp >> 0) & 0xff; + + r = to_srgb (r * (1/255.0f)); + g = to_srgb (g * (1/255.0f)); + b = to_srgb (b * (1/255.0f)); + + WRITE (image, pixel++, a | (r << 16) | (g << 8) | (b << 0)); + } +} + +static argb_t +fetch_pixel_generic_float (bits_image_t *image, + int offset, + int line) +{ + uint32_t pixel32 = image->fetch_pixel_32 (image, offset, line); + argb_t f; + + pixman_expand_to_float (&f, &pixel32, image->format, 1); + + return f; +} + +/* + * XXX: The transformed fetch path only works at 32-bpp so far. When all + * paths have wide versions, this can be removed. + * + * WARNING: This function loses precision! + */ +static uint32_t +fetch_pixel_generic_lossy_32 (bits_image_t *image, + int offset, + int line) +{ + argb_t pixel64 = image->fetch_pixel_float (image, offset, line); + uint32_t result; + + pixman_contract_from_float (&result, &pixel64, 1); + + return result; +} + +typedef struct +{ + pixman_format_code_t format; + fetch_scanline_t fetch_scanline_32; + fetch_scanline_t fetch_scanline_float; + fetch_pixel_32_t fetch_pixel_32; + fetch_pixel_float_t fetch_pixel_float; + store_scanline_t store_scanline_32; + store_scanline_t store_scanline_float; +} format_info_t; + +#define FORMAT_INFO(format) \ + { \ + PIXMAN_ ## format, \ + fetch_scanline_ ## format, \ + fetch_scanline_generic_float, \ + fetch_pixel_ ## format, \ + fetch_pixel_generic_float, \ + store_scanline_ ## format, \ + store_scanline_generic_float \ + } + +static const format_info_t accessors[] = +{ +/* 32 bpp formats */ + FORMAT_INFO (a8r8g8b8), + FORMAT_INFO (x8r8g8b8), + FORMAT_INFO (a8b8g8r8), + FORMAT_INFO (x8b8g8r8), + FORMAT_INFO (b8g8r8a8), + FORMAT_INFO (b8g8r8x8), + FORMAT_INFO (r8g8b8a8), + FORMAT_INFO (r8g8b8x8), + FORMAT_INFO (x14r6g6b6), + +/* sRGB formats */ + { PIXMAN_a8r8g8b8_sRGB, + fetch_scanline_a8r8g8b8_32_sRGB, fetch_scanline_a8r8g8b8_sRGB_float, + fetch_pixel_a8r8g8b8_32_sRGB, fetch_pixel_a8r8g8b8_sRGB_float, + store_scanline_a8r8g8b8_32_sRGB, store_scanline_a8r8g8b8_sRGB_float, + }, + +/* 24bpp formats */ + FORMAT_INFO (r8g8b8), + FORMAT_INFO (b8g8r8), + +/* 16bpp formats */ + FORMAT_INFO (r5g6b5), + FORMAT_INFO (b5g6r5), + + FORMAT_INFO (a1r5g5b5), + FORMAT_INFO (x1r5g5b5), + FORMAT_INFO (a1b5g5r5), + FORMAT_INFO (x1b5g5r5), + FORMAT_INFO (a4r4g4b4), + FORMAT_INFO (x4r4g4b4), + FORMAT_INFO (a4b4g4r4), + FORMAT_INFO (x4b4g4r4), + +/* 8bpp formats */ + FORMAT_INFO (a8), + FORMAT_INFO (r3g3b2), + FORMAT_INFO (b2g3r3), + FORMAT_INFO (a2r2g2b2), + FORMAT_INFO (a2b2g2r2), + + FORMAT_INFO (c8), + + FORMAT_INFO (g8), + +#define fetch_scanline_x4c4 fetch_scanline_c8 +#define fetch_pixel_x4c4 fetch_pixel_c8 +#define store_scanline_x4c4 store_scanline_c8 + FORMAT_INFO (x4c4), + +#define fetch_scanline_x4g4 fetch_scanline_g8 +#define fetch_pixel_x4g4 fetch_pixel_g8 +#define store_scanline_x4g4 store_scanline_g8 + FORMAT_INFO (x4g4), + + FORMAT_INFO (x4a4), + +/* 4bpp formats */ + FORMAT_INFO (a4), + FORMAT_INFO (r1g2b1), + FORMAT_INFO (b1g2r1), + FORMAT_INFO (a1r1g1b1), + FORMAT_INFO (a1b1g1r1), + + FORMAT_INFO (c4), + + FORMAT_INFO (g4), + +/* 1bpp formats */ + FORMAT_INFO (a1), + FORMAT_INFO (g1), + +/* Wide formats */ + + { PIXMAN_a2r10g10b10, + NULL, fetch_scanline_a2r10g10b10_float, + fetch_pixel_generic_lossy_32, fetch_pixel_a2r10g10b10_float, + NULL, store_scanline_a2r10g10b10_float }, + + { PIXMAN_x2r10g10b10, + NULL, fetch_scanline_x2r10g10b10_float, + fetch_pixel_generic_lossy_32, fetch_pixel_x2r10g10b10_float, + NULL, store_scanline_x2r10g10b10_float }, + + { PIXMAN_a2b10g10r10, + NULL, fetch_scanline_a2b10g10r10_float, + fetch_pixel_generic_lossy_32, fetch_pixel_a2b10g10r10_float, + NULL, store_scanline_a2b10g10r10_float }, + + { PIXMAN_x2b10g10r10, + NULL, fetch_scanline_x2b10g10r10_float, + fetch_pixel_generic_lossy_32, fetch_pixel_x2b10g10r10_float, + NULL, store_scanline_x2b10g10r10_float }, + +/* YUV formats */ + { PIXMAN_yuy2, + fetch_scanline_yuy2, fetch_scanline_generic_float, + fetch_pixel_yuy2, fetch_pixel_generic_float, + NULL, NULL }, + + { PIXMAN_yv12, + fetch_scanline_yv12, fetch_scanline_generic_float, + fetch_pixel_yv12, fetch_pixel_generic_float, + NULL, NULL }, + + { PIXMAN_null }, +}; + +static void +setup_accessors (bits_image_t *image) +{ + const format_info_t *info = accessors; + + while (info->format != PIXMAN_null) + { + if (info->format == image->format) + { + image->fetch_scanline_32 = info->fetch_scanline_32; + image->fetch_scanline_float = info->fetch_scanline_float; + image->fetch_pixel_32 = info->fetch_pixel_32; + image->fetch_pixel_float = info->fetch_pixel_float; + image->store_scanline_32 = info->store_scanline_32; + image->store_scanline_float = info->store_scanline_float; + + return; + } + + info++; + } +} + +#ifndef PIXMAN_FB_ACCESSORS +void +_pixman_bits_image_setup_accessors_accessors (bits_image_t *image); + +void +_pixman_bits_image_setup_accessors (bits_image_t *image) +{ + if (image->read_func || image->write_func) + _pixman_bits_image_setup_accessors_accessors (image); + else + setup_accessors (image); +} + +#else + +void +_pixman_bits_image_setup_accessors_accessors (bits_image_t *image) +{ + setup_accessors (image); +} + +#endif diff --git a/src/pixman/pixman/pixman-accessor.h b/src/pixman/pixman/pixman-accessor.h new file mode 100644 index 0000000..8e0b036 --- /dev/null +++ b/src/pixman/pixman/pixman-accessor.h @@ -0,0 +1,25 @@ +#ifdef PIXMAN_FB_ACCESSORS + +#define READ(img, ptr) \ + (((bits_image_t *)(img))->read_func ((ptr), sizeof(*(ptr)))) +#define WRITE(img, ptr,val) \ + (((bits_image_t *)(img))->write_func ((ptr), (val), sizeof (*(ptr)))) + +#define MEMSET_WRAPPED(img, dst, val, size) \ + do { \ + size_t _i; \ + uint8_t *_dst = (uint8_t*)(dst); \ + for(_i = 0; _i < (size_t) size; _i++) { \ + WRITE((img), _dst +_i, (val)); \ + } \ + } while (0) + +#else + +#define READ(img, ptr) (*(ptr)) +#define WRITE(img, ptr, val) (*(ptr) = (val)) +#define MEMSET_WRAPPED(img, dst, val, size) \ + memset(dst, val, size) + +#endif + diff --git a/src/pixman/pixman/pixman-arm-common.h b/src/pixman/pixman/pixman-arm-common.h new file mode 100644 index 0000000..3a7cb2b --- /dev/null +++ b/src/pixman/pixman/pixman-arm-common.h @@ -0,0 +1,428 @@ +/* + * Copyright © 2010 Nokia Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Author: Siarhei Siamashka (siarhei.siamashka@nokia.com) + */ + +#ifndef PIXMAN_ARM_COMMON_H +#define PIXMAN_ARM_COMMON_H + +#include "pixman-inlines.h" + +/* Define some macros which can expand into proxy functions between + * ARM assembly optimized functions and the rest of pixman fast path API. + * + * All the low level ARM assembly functions have to use ARM EABI + * calling convention and take up to 8 arguments: + * width, height, dst, dst_stride, src, src_stride, mask, mask_stride + * + * The arguments are ordered with the most important coming first (the + * first 4 arguments are passed to function in registers, the rest are + * on stack). The last arguments are optional, for example if the + * function is not using mask, then 'mask' and 'mask_stride' can be + * omitted when doing a function call. + * + * Arguments 'src' and 'mask' contain either a pointer to the top left + * pixel of the composited rectangle or a pixel color value depending + * on the function type. In the case of just a color value (solid source + * or mask), the corresponding stride argument is unused. + */ + +#define SKIP_ZERO_SRC 1 +#define SKIP_ZERO_MASK 2 + +#define PIXMAN_ARM_BIND_FAST_PATH_SRC_DST(cputype, name, \ + src_type, src_cnt, \ + dst_type, dst_cnt) \ +void \ +pixman_composite_##name##_asm_##cputype (int32_t w, \ + int32_t h, \ + dst_type *dst, \ + int32_t dst_stride, \ + src_type *src, \ + int32_t src_stride); \ + \ +static void \ +cputype##_composite_##name (pixman_implementation_t *imp, \ + pixman_composite_info_t *info) \ +{ \ + PIXMAN_COMPOSITE_ARGS (info); \ + dst_type *dst_line; \ + src_type *src_line; \ + int32_t dst_stride, src_stride; \ + \ + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \ + src_stride, src_line, src_cnt); \ + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type, \ + dst_stride, dst_line, dst_cnt); \ + \ + pixman_composite_##name##_asm_##cputype (width, height, \ + dst_line, dst_stride, \ + src_line, src_stride); \ +} + +#define PIXMAN_ARM_BIND_FAST_PATH_N_DST(flags, cputype, name, \ + dst_type, dst_cnt) \ +void \ +pixman_composite_##name##_asm_##cputype (int32_t w, \ + int32_t h, \ + dst_type *dst, \ + int32_t dst_stride, \ + uint32_t src); \ + \ +static void \ +cputype##_composite_##name (pixman_implementation_t *imp, \ + pixman_composite_info_t *info) \ +{ \ + PIXMAN_COMPOSITE_ARGS (info); \ + dst_type *dst_line; \ + int32_t dst_stride; \ + uint32_t src; \ + \ + src = _pixman_image_get_solid ( \ + imp, src_image, dest_image->bits.format); \ + \ + if ((flags & SKIP_ZERO_SRC) && src == 0) \ + return; \ + \ + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type, \ + dst_stride, dst_line, dst_cnt); \ + \ + pixman_composite_##name##_asm_##cputype (width, height, \ + dst_line, dst_stride, \ + src); \ +} + +#define PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST(flags, cputype, name, \ + mask_type, mask_cnt, \ + dst_type, dst_cnt) \ +void \ +pixman_composite_##name##_asm_##cputype (int32_t w, \ + int32_t h, \ + dst_type *dst, \ + int32_t dst_stride, \ + uint32_t src, \ + int32_t unused, \ + mask_type *mask, \ + int32_t mask_stride); \ + \ +static void \ +cputype##_composite_##name (pixman_implementation_t *imp, \ + pixman_composite_info_t *info) \ +{ \ + PIXMAN_COMPOSITE_ARGS (info); \ + dst_type *dst_line; \ + mask_type *mask_line; \ + int32_t dst_stride, mask_stride; \ + uint32_t src; \ + \ + src = _pixman_image_get_solid ( \ + imp, src_image, dest_image->bits.format); \ + \ + if ((flags & SKIP_ZERO_SRC) && src == 0) \ + return; \ + \ + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type, \ + dst_stride, dst_line, dst_cnt); \ + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type, \ + mask_stride, mask_line, mask_cnt); \ + \ + pixman_composite_##name##_asm_##cputype (width, height, \ + dst_line, dst_stride, \ + src, 0, \ + mask_line, mask_stride); \ +} + +#define PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST(flags, cputype, name, \ + src_type, src_cnt, \ + dst_type, dst_cnt) \ +void \ +pixman_composite_##name##_asm_##cputype (int32_t w, \ + int32_t h, \ + dst_type *dst, \ + int32_t dst_stride, \ + src_type *src, \ + int32_t src_stride, \ + uint32_t mask); \ + \ +static void \ +cputype##_composite_##name (pixman_implementation_t *imp, \ + pixman_composite_info_t *info) \ +{ \ + PIXMAN_COMPOSITE_ARGS (info); \ + dst_type *dst_line; \ + src_type *src_line; \ + int32_t dst_stride, src_stride; \ + uint32_t mask; \ + \ + mask = _pixman_image_get_solid ( \ + imp, mask_image, dest_image->bits.format); \ + \ + if ((flags & SKIP_ZERO_MASK) && mask == 0) \ + return; \ + \ + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type, \ + dst_stride, dst_line, dst_cnt); \ + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \ + src_stride, src_line, src_cnt); \ + \ + pixman_composite_##name##_asm_##cputype (width, height, \ + dst_line, dst_stride, \ + src_line, src_stride, \ + mask); \ +} + +#define PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST(cputype, name, \ + src_type, src_cnt, \ + mask_type, mask_cnt, \ + dst_type, dst_cnt) \ +void \ +pixman_composite_##name##_asm_##cputype (int32_t w, \ + int32_t h, \ + dst_type *dst, \ + int32_t dst_stride, \ + src_type *src, \ + int32_t src_stride, \ + mask_type *mask, \ + int32_t mask_stride); \ + \ +static void \ +cputype##_composite_##name (pixman_implementation_t *imp, \ + pixman_composite_info_t *info) \ +{ \ + PIXMAN_COMPOSITE_ARGS (info); \ + dst_type *dst_line; \ + src_type *src_line; \ + mask_type *mask_line; \ + int32_t dst_stride, src_stride, mask_stride; \ + \ + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type, \ + dst_stride, dst_line, dst_cnt); \ + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \ + src_stride, src_line, src_cnt); \ + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type, \ + mask_stride, mask_line, mask_cnt); \ + \ + pixman_composite_##name##_asm_##cputype (width, height, \ + dst_line, dst_stride, \ + src_line, src_stride, \ + mask_line, mask_stride); \ +} + +#define PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST(cputype, name, op, \ + src_type, dst_type) \ +void \ +pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype ( \ + int32_t w, \ + dst_type * dst, \ + const src_type * src, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x, \ + pixman_fixed_t max_vx); \ + \ +static force_inline void \ +scaled_nearest_scanline_##cputype##_##name##_##op (dst_type * pd, \ + const src_type * ps, \ + int32_t w, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x, \ + pixman_fixed_t max_vx, \ + pixman_bool_t zero_src) \ +{ \ + pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype (w, pd, ps, \ + vx, unit_x, \ + max_vx); \ +} \ + \ +FAST_NEAREST_MAINLOOP (cputype##_##name##_cover_##op, \ + scaled_nearest_scanline_##cputype##_##name##_##op, \ + src_type, dst_type, COVER) \ +FAST_NEAREST_MAINLOOP (cputype##_##name##_none_##op, \ + scaled_nearest_scanline_##cputype##_##name##_##op, \ + src_type, dst_type, NONE) \ +FAST_NEAREST_MAINLOOP (cputype##_##name##_pad_##op, \ + scaled_nearest_scanline_##cputype##_##name##_##op, \ + src_type, dst_type, PAD) \ +FAST_NEAREST_MAINLOOP (cputype##_##name##_normal_##op, \ + scaled_nearest_scanline_##cputype##_##name##_##op, \ + src_type, dst_type, NORMAL) + +/* Provide entries for the fast path table */ +#define PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \ + SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \ + SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \ + SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func), \ + SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func) + +#define PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST(flags, cputype, name, op, \ + src_type, dst_type) \ +void \ +pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype ( \ + int32_t w, \ + dst_type * dst, \ + const src_type * src, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x, \ + pixman_fixed_t max_vx, \ + const uint8_t * mask); \ + \ +static force_inline void \ +scaled_nearest_scanline_##cputype##_##name##_##op (const uint8_t * mask, \ + dst_type * pd, \ + const src_type * ps, \ + int32_t w, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x, \ + pixman_fixed_t max_vx, \ + pixman_bool_t zero_src) \ +{ \ + if ((flags & SKIP_ZERO_SRC) && zero_src) \ + return; \ + pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype (w, pd, ps, \ + vx, unit_x, \ + max_vx, \ + mask); \ +} \ + \ +FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_cover_##op, \ + scaled_nearest_scanline_##cputype##_##name##_##op,\ + src_type, uint8_t, dst_type, COVER, TRUE, FALSE)\ +FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_none_##op, \ + scaled_nearest_scanline_##cputype##_##name##_##op,\ + src_type, uint8_t, dst_type, NONE, TRUE, FALSE) \ +FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_pad_##op, \ + scaled_nearest_scanline_##cputype##_##name##_##op,\ + src_type, uint8_t, dst_type, PAD, TRUE, FALSE) \ +FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_normal_##op, \ + scaled_nearest_scanline_##cputype##_##name##_##op,\ + src_type, uint8_t, dst_type, NORMAL, TRUE, FALSE) + +/* Provide entries for the fast path table */ +#define PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func) \ + SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func), \ + SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func), \ + SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func), \ + SIMPLE_NEAREST_A8_MASK_FAST_PATH_NORMAL (op,s,d,func) + +/*****************************************************************************/ + +#define PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST(flags, cputype, name, op, \ + src_type, dst_type) \ +void \ +pixman_scaled_bilinear_scanline_##name##_##op##_asm_##cputype ( \ + dst_type * dst, \ + const src_type * top, \ + const src_type * bottom, \ + int wt, \ + int wb, \ + pixman_fixed_t x, \ + pixman_fixed_t ux, \ + int width); \ + \ +static force_inline void \ +scaled_bilinear_scanline_##cputype##_##name##_##op ( \ + dst_type * dst, \ + const uint32_t * mask, \ + const src_type * src_top, \ + const src_type * src_bottom, \ + int32_t w, \ + int wt, \ + int wb, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x, \ + pixman_fixed_t max_vx, \ + pixman_bool_t zero_src) \ +{ \ + if ((flags & SKIP_ZERO_SRC) && zero_src) \ + return; \ + pixman_scaled_bilinear_scanline_##name##_##op##_asm_##cputype ( \ + dst, src_top, src_bottom, wt, wb, vx, unit_x, w); \ +} \ + \ +FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_cover_##op, \ + scaled_bilinear_scanline_##cputype##_##name##_##op, \ + src_type, uint32_t, dst_type, COVER, FLAG_NONE) \ +FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_none_##op, \ + scaled_bilinear_scanline_##cputype##_##name##_##op, \ + src_type, uint32_t, dst_type, NONE, FLAG_NONE) \ +FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_pad_##op, \ + scaled_bilinear_scanline_##cputype##_##name##_##op, \ + src_type, uint32_t, dst_type, PAD, FLAG_NONE) \ +FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_normal_##op, \ + scaled_bilinear_scanline_##cputype##_##name##_##op, \ + src_type, uint32_t, dst_type, NORMAL, \ + FLAG_NONE) + + +#define PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST(flags, cputype, name, op, \ + src_type, dst_type) \ +void \ +pixman_scaled_bilinear_scanline_##name##_##op##_asm_##cputype ( \ + dst_type * dst, \ + const uint8_t * mask, \ + const src_type * top, \ + const src_type * bottom, \ + int wt, \ + int wb, \ + pixman_fixed_t x, \ + pixman_fixed_t ux, \ + int width); \ + \ +static force_inline void \ +scaled_bilinear_scanline_##cputype##_##name##_##op ( \ + dst_type * dst, \ + const uint8_t * mask, \ + const src_type * src_top, \ + const src_type * src_bottom, \ + int32_t w, \ + int wt, \ + int wb, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x, \ + pixman_fixed_t max_vx, \ + pixman_bool_t zero_src) \ +{ \ + if ((flags & SKIP_ZERO_SRC) && zero_src) \ + return; \ + pixman_scaled_bilinear_scanline_##name##_##op##_asm_##cputype ( \ + dst, mask, src_top, src_bottom, wt, wb, vx, unit_x, w); \ +} \ + \ +FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_cover_##op, \ + scaled_bilinear_scanline_##cputype##_##name##_##op, \ + src_type, uint8_t, dst_type, COVER, \ + FLAG_HAVE_NON_SOLID_MASK) \ +FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_none_##op, \ + scaled_bilinear_scanline_##cputype##_##name##_##op, \ + src_type, uint8_t, dst_type, NONE, \ + FLAG_HAVE_NON_SOLID_MASK) \ +FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_pad_##op, \ + scaled_bilinear_scanline_##cputype##_##name##_##op, \ + src_type, uint8_t, dst_type, PAD, \ + FLAG_HAVE_NON_SOLID_MASK) \ +FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_normal_##op, \ + scaled_bilinear_scanline_##cputype##_##name##_##op, \ + src_type, uint8_t, dst_type, NORMAL, \ + FLAG_HAVE_NON_SOLID_MASK) + + +#endif diff --git a/src/pixman/pixman/pixman-arm-detect-win32.asm b/src/pixman/pixman/pixman-arm-detect-win32.asm new file mode 100644 index 0000000..8f5d5eb --- /dev/null +++ b/src/pixman/pixman/pixman-arm-detect-win32.asm @@ -0,0 +1,21 @@ + area pixman_msvc, code, readonly
+
+ export pixman_msvc_try_arm_simd_op
+
+pixman_msvc_try_arm_simd_op
+ ;; I don't think the msvc arm asm knows how to do SIMD insns
+ ;; uqadd8 r3,r3,r3
+ dcd 0xe6633f93
+ mov pc,lr
+ endp
+
+ export pixman_msvc_try_arm_neon_op
+
+pixman_msvc_try_arm_neon_op
+ ;; I don't think the msvc arm asm knows how to do NEON insns
+ ;; veor d0,d0,d0
+ dcd 0xf3000110
+ mov pc,lr
+ endp
+
+ end
diff --git a/src/pixman/pixman/pixman-arm-neon-asm-bilinear.S b/src/pixman/pixman/pixman-arm-neon-asm-bilinear.S new file mode 100644 index 0000000..e37b5c2 --- /dev/null +++ b/src/pixman/pixman/pixman-arm-neon-asm-bilinear.S @@ -0,0 +1,1368 @@ +/* + * Copyright © 2011 SCore Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Author: Siarhei Siamashka (siarhei.siamashka@nokia.com) + * Author: Taekyun Kim (tkq.kim@samsung.com) + */ + +/* + * This file contains scaled bilinear scanline functions implemented + * using older siarhei's bilinear macro template. + * + * << General scanline function procedures >> + * 1. bilinear interpolate source pixels + * 2. load mask pixels + * 3. load destination pixels + * 4. duplicate mask to fill whole register + * 5. interleave source & destination pixels + * 6. apply mask to source pixels + * 7. combine source & destination pixels + * 8, Deinterleave final result + * 9. store destination pixels + * + * All registers with single number (i.e. src0, tmp0) are 64-bits registers. + * Registers with double numbers(src01, dst01) are 128-bits registers. + * All temp registers can be used freely outside the code block. + * Assume that symbol(register .req) OUT and MASK are defined at caller of these macro blocks. + * + * Remarks + * There can be lots of pipeline stalls inside code block and between code blocks. + * Further optimizations will be done by new macro templates using head/tail_head/tail scheme. + */ + +/* Prevent the stack from becoming executable for no reason... */ +#if defined(__linux__) && defined (__ELF__) +.section .note.GNU-stack,"",%progbits +#endif + +.text +.fpu neon +.arch armv7a +.object_arch armv4 +.eabi_attribute 10, 0 +.eabi_attribute 12, 0 +.arm +.altmacro +.p2align 2 + +#include "pixman-private.h" +#include "pixman-arm-neon-asm.h" + +/* + * Bilinear macros from pixman-arm-neon-asm.S + */ + +/* Supplementary macro for setting function attributes */ +.macro pixman_asm_function fname + .func fname + .global fname +#ifdef __ELF__ + .hidden fname + .type fname, %function +#endif +fname: +.endm + +/* + * Bilinear scaling support code which tries to provide pixel fetching, color + * format conversion, and interpolation as separate macros which can be used + * as the basic building blocks for constructing bilinear scanline functions. + */ + +.macro bilinear_load_8888 reg1, reg2, tmp + mov TMP1, X, asr #16 + add X, X, UX + add TMP1, TOP, TMP1, asl #2 + vld1.32 {reg1}, [TMP1], STRIDE + vld1.32 {reg2}, [TMP1] +.endm + +.macro bilinear_load_0565 reg1, reg2, tmp + mov TMP1, X, asr #16 + add X, X, UX + add TMP1, TOP, TMP1, asl #1 + vld1.32 {reg2[0]}, [TMP1], STRIDE + vld1.32 {reg2[1]}, [TMP1] + convert_four_0565_to_x888_packed reg2, reg1, reg2, tmp +.endm + +.macro bilinear_load_and_vertical_interpolate_two_8888 \ + acc1, acc2, reg1, reg2, reg3, reg4, tmp1, tmp2 + + bilinear_load_8888 reg1, reg2, tmp1 + vmull.u8 acc1, reg1, d28 + vmlal.u8 acc1, reg2, d29 + bilinear_load_8888 reg3, reg4, tmp2 + vmull.u8 acc2, reg3, d28 + vmlal.u8 acc2, reg4, d29 +.endm + +.macro bilinear_load_and_vertical_interpolate_four_8888 \ + xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \ + yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi + + bilinear_load_and_vertical_interpolate_two_8888 \ + xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi + bilinear_load_and_vertical_interpolate_two_8888 \ + yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi +.endm + +.macro bilinear_load_and_vertical_interpolate_two_0565 \ + acc1, acc2, reg1, reg2, reg3, reg4, acc2lo, acc2hi + + mov TMP1, X, asr #16 + add X, X, UX + add TMP1, TOP, TMP1, asl #1 + mov TMP2, X, asr #16 + add X, X, UX + add TMP2, TOP, TMP2, asl #1 + vld1.32 {acc2lo[0]}, [TMP1], STRIDE + vld1.32 {acc2hi[0]}, [TMP2], STRIDE + vld1.32 {acc2lo[1]}, [TMP1] + vld1.32 {acc2hi[1]}, [TMP2] + convert_0565_to_x888 acc2, reg3, reg2, reg1 + vzip.u8 reg1, reg3 + vzip.u8 reg2, reg4 + vzip.u8 reg3, reg4 + vzip.u8 reg1, reg2 + vmull.u8 acc1, reg1, d28 + vmlal.u8 acc1, reg2, d29 + vmull.u8 acc2, reg3, d28 + vmlal.u8 acc2, reg4, d29 +.endm + +.macro bilinear_load_and_vertical_interpolate_four_0565 \ + xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \ + yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi + + mov TMP1, X, asr #16 + add X, X, UX + add TMP1, TOP, TMP1, asl #1 + mov TMP2, X, asr #16 + add X, X, UX + add TMP2, TOP, TMP2, asl #1 + vld1.32 {xacc2lo[0]}, [TMP1], STRIDE + vld1.32 {xacc2hi[0]}, [TMP2], STRIDE + vld1.32 {xacc2lo[1]}, [TMP1] + vld1.32 {xacc2hi[1]}, [TMP2] + convert_0565_to_x888 xacc2, xreg3, xreg2, xreg1 + mov TMP1, X, asr #16 + add X, X, UX + add TMP1, TOP, TMP1, asl #1 + mov TMP2, X, asr #16 + add X, X, UX + add TMP2, TOP, TMP2, asl #1 + vld1.32 {yacc2lo[0]}, [TMP1], STRIDE + vzip.u8 xreg1, xreg3 + vld1.32 {yacc2hi[0]}, [TMP2], STRIDE + vzip.u8 xreg2, xreg4 + vld1.32 {yacc2lo[1]}, [TMP1] + vzip.u8 xreg3, xreg4 + vld1.32 {yacc2hi[1]}, [TMP2] + vzip.u8 xreg1, xreg2 + convert_0565_to_x888 yacc2, yreg3, yreg2, yreg1 + vmull.u8 xacc1, xreg1, d28 + vzip.u8 yreg1, yreg3 + vmlal.u8 xacc1, xreg2, d29 + vzip.u8 yreg2, yreg4 + vmull.u8 xacc2, xreg3, d28 + vzip.u8 yreg3, yreg4 + vmlal.u8 xacc2, xreg4, d29 + vzip.u8 yreg1, yreg2 + vmull.u8 yacc1, yreg1, d28 + vmlal.u8 yacc1, yreg2, d29 + vmull.u8 yacc2, yreg3, d28 + vmlal.u8 yacc2, yreg4, d29 +.endm + +.macro bilinear_store_8888 numpix, tmp1, tmp2 +.if numpix == 4 + vst1.32 {d0, d1}, [OUT]! +.elseif numpix == 2 + vst1.32 {d0}, [OUT]! +.elseif numpix == 1 + vst1.32 {d0[0]}, [OUT, :32]! +.else + .error bilinear_store_8888 numpix is unsupported +.endif +.endm + +.macro bilinear_store_0565 numpix, tmp1, tmp2 + vuzp.u8 d0, d1 + vuzp.u8 d2, d3 + vuzp.u8 d1, d3 + vuzp.u8 d0, d2 + convert_8888_to_0565 d2, d1, d0, q1, tmp1, tmp2 +.if numpix == 4 + vst1.16 {d2}, [OUT]! +.elseif numpix == 2 + vst1.32 {d2[0]}, [OUT]! +.elseif numpix == 1 + vst1.16 {d2[0]}, [OUT]! +.else + .error bilinear_store_0565 numpix is unsupported +.endif +.endm + + +/* + * Macros for loading mask pixels into register 'mask'. + * vdup must be done in somewhere else. + */ +.macro bilinear_load_mask_x numpix, mask +.endm + +.macro bilinear_load_mask_8 numpix, mask +.if numpix == 4 + vld1.32 {mask[0]}, [MASK]! +.elseif numpix == 2 + vld1.16 {mask[0]}, [MASK]! +.elseif numpix == 1 + vld1.8 {mask[0]}, [MASK]! +.else + .error bilinear_load_mask_8 numpix is unsupported +.endif + pld [MASK, #prefetch_offset] +.endm + +.macro bilinear_load_mask mask_fmt, numpix, mask + bilinear_load_mask_&mask_fmt numpix, mask +.endm + + +/* + * Macros for loading destination pixels into register 'dst0' and 'dst1'. + * Interleave should be done somewhere else. + */ +.macro bilinear_load_dst_0565_src numpix, dst0, dst1, dst01 +.endm + +.macro bilinear_load_dst_8888_src numpix, dst0, dst1, dst01 +.endm + +.macro bilinear_load_dst_8888 numpix, dst0, dst1, dst01 +.if numpix == 4 + vld1.32 {dst0, dst1}, [OUT] +.elseif numpix == 2 + vld1.32 {dst0}, [OUT] +.elseif numpix == 1 + vld1.32 {dst0[0]}, [OUT] +.else + .error bilinear_load_dst_8888 numpix is unsupported +.endif + pld [OUT, #(prefetch_offset * 4)] +.endm + +.macro bilinear_load_dst_8888_over numpix, dst0, dst1, dst01 + bilinear_load_dst_8888 numpix, dst0, dst1, dst01 +.endm + +.macro bilinear_load_dst_8888_add numpix, dst0, dst1, dst01 + bilinear_load_dst_8888 numpix, dst0, dst1, dst01 +.endm + +.macro bilinear_load_dst dst_fmt, op, numpix, dst0, dst1, dst01 + bilinear_load_dst_&dst_fmt&_&op numpix, dst0, dst1, dst01 +.endm + +/* + * Macros for duplicating partially loaded mask to fill entire register. + * We will apply mask to interleaved source pixels, that is + * (r0, r1, r2, r3, g0, g1, g2, g3) x (m0, m1, m2, m3, m0, m1, m2, m3) + * (b0, b1, b2, b3, a0, a1, a2, a3) x (m0, m1, m2, m3, m0, m1, m2, m3) + * So, we need to duplicate loaded mask into whole register. + * + * For two pixel case + * (r0, r1, x, x, g0, g1, x, x) x (m0, m1, m0, m1, m0, m1, m0, m1) + * (b0, b1, x, x, a0, a1, x, x) x (m0, m1, m0, m1, m0, m1, m0, m1) + * We can do some optimizations for this including last pixel cases. + */ +.macro bilinear_duplicate_mask_x numpix, mask +.endm + +.macro bilinear_duplicate_mask_8 numpix, mask +.if numpix == 4 + vdup.32 mask, mask[0] +.elseif numpix == 2 + vdup.16 mask, mask[0] +.elseif numpix == 1 + vdup.8 mask, mask[0] +.else + .error bilinear_duplicate_mask_8 is unsupported +.endif +.endm + +.macro bilinear_duplicate_mask mask_fmt, numpix, mask + bilinear_duplicate_mask_&mask_fmt numpix, mask +.endm + +/* + * Macros for interleaving src and dst pixels to rrrr gggg bbbb aaaa form. + * Interleave should be done when maks is enabled or operator is 'over'. + */ +.macro bilinear_interleave src0, src1, dst0, dst1 + vuzp.8 src0, src1 + vuzp.8 dst0, dst1 + vuzp.8 src0, src1 + vuzp.8 dst0, dst1 +.endm + +.macro bilinear_interleave_src_dst_x_src \ + numpix, src0, src1, src01, dst0, dst1, dst01 +.endm + +.macro bilinear_interleave_src_dst_x_over \ + numpix, src0, src1, src01, dst0, dst1, dst01 + + bilinear_interleave src0, src1, dst0, dst1 +.endm + +.macro bilinear_interleave_src_dst_x_add \ + numpix, src0, src1, src01, dst0, dst1, dst01 +.endm + +.macro bilinear_interleave_src_dst_8_src \ + numpix, src0, src1, src01, dst0, dst1, dst01 + + bilinear_interleave src0, src1, dst0, dst1 +.endm + +.macro bilinear_interleave_src_dst_8_over \ + numpix, src0, src1, src01, dst0, dst1, dst01 + + bilinear_interleave src0, src1, dst0, dst1 +.endm + +.macro bilinear_interleave_src_dst_8_add \ + numpix, src0, src1, src01, dst0, dst1, dst01 + + bilinear_interleave src0, src1, dst0, dst1 +.endm + +.macro bilinear_interleave_src_dst \ + mask_fmt, op, numpix, src0, src1, src01, dst0, dst1, dst01 + + bilinear_interleave_src_dst_&mask_fmt&_&op \ + numpix, src0, src1, src01, dst0, dst1, dst01 +.endm + + +/* + * Macros for applying masks to src pixels. (see combine_mask_u() function) + * src, dst should be in interleaved form. + * mask register should be in form (m0, m1, m2, m3). + */ +.macro bilinear_apply_mask_to_src_x \ + numpix, src0, src1, src01, mask, \ + tmp01, tmp23, tmp45, tmp67 +.endm + +.macro bilinear_apply_mask_to_src_8 \ + numpix, src0, src1, src01, mask, \ + tmp01, tmp23, tmp45, tmp67 + + vmull.u8 tmp01, src0, mask + vmull.u8 tmp23, src1, mask + /* bubbles */ + vrshr.u16 tmp45, tmp01, #8 + vrshr.u16 tmp67, tmp23, #8 + /* bubbles */ + vraddhn.u16 src0, tmp45, tmp01 + vraddhn.u16 src1, tmp67, tmp23 +.endm + +.macro bilinear_apply_mask_to_src \ + mask_fmt, numpix, src0, src1, src01, mask, \ + tmp01, tmp23, tmp45, tmp67 + + bilinear_apply_mask_to_src_&mask_fmt \ + numpix, src0, src1, src01, mask, \ + tmp01, tmp23, tmp45, tmp67 +.endm + + +/* + * Macros for combining src and destination pixels. + * Interleave or not is depending on operator 'op'. + */ +.macro bilinear_combine_src \ + numpix, src0, src1, src01, dst0, dst1, dst01, \ + tmp01, tmp23, tmp45, tmp67, tmp8 +.endm + +.macro bilinear_combine_over \ + numpix, src0, src1, src01, dst0, dst1, dst01, \ + tmp01, tmp23, tmp45, tmp67, tmp8 + + vdup.32 tmp8, src1[1] + /* bubbles */ + vmvn.8 tmp8, tmp8 + /* bubbles */ + vmull.u8 tmp01, dst0, tmp8 + /* bubbles */ + vmull.u8 tmp23, dst1, tmp8 + /* bubbles */ + vrshr.u16 tmp45, tmp01, #8 + vrshr.u16 tmp67, tmp23, #8 + /* bubbles */ + vraddhn.u16 dst0, tmp45, tmp01 + vraddhn.u16 dst1, tmp67, tmp23 + /* bubbles */ + vqadd.u8 src01, dst01, src01 +.endm + +.macro bilinear_combine_add \ + numpix, src0, src1, src01, dst0, dst1, dst01, \ + tmp01, tmp23, tmp45, tmp67, tmp8 + + vqadd.u8 src01, dst01, src01 +.endm + +.macro bilinear_combine \ + op, numpix, src0, src1, src01, dst0, dst1, dst01, \ + tmp01, tmp23, tmp45, tmp67, tmp8 + + bilinear_combine_&op \ + numpix, src0, src1, src01, dst0, dst1, dst01, \ + tmp01, tmp23, tmp45, tmp67, tmp8 +.endm + +/* + * Macros for final deinterleaving of destination pixels if needed. + */ +.macro bilinear_deinterleave numpix, dst0, dst1, dst01 + vuzp.8 dst0, dst1 + /* bubbles */ + vuzp.8 dst0, dst1 +.endm + +.macro bilinear_deinterleave_dst_x_src numpix, dst0, dst1, dst01 +.endm + +.macro bilinear_deinterleave_dst_x_over numpix, dst0, dst1, dst01 + bilinear_deinterleave numpix, dst0, dst1, dst01 +.endm + +.macro bilinear_deinterleave_dst_x_add numpix, dst0, dst1, dst01 +.endm + +.macro bilinear_deinterleave_dst_8_src numpix, dst0, dst1, dst01 + bilinear_deinterleave numpix, dst0, dst1, dst01 +.endm + +.macro bilinear_deinterleave_dst_8_over numpix, dst0, dst1, dst01 + bilinear_deinterleave numpix, dst0, dst1, dst01 +.endm + +.macro bilinear_deinterleave_dst_8_add numpix, dst0, dst1, dst01 + bilinear_deinterleave numpix, dst0, dst1, dst01 +.endm + +.macro bilinear_deinterleave_dst mask_fmt, op, numpix, dst0, dst1, dst01 + bilinear_deinterleave_dst_&mask_fmt&_&op numpix, dst0, dst1, dst01 +.endm + + +.macro bilinear_interpolate_last_pixel src_fmt, mask_fmt, dst_fmt, op + bilinear_load_&src_fmt d0, d1, d2 + bilinear_load_mask mask_fmt, 1, d4 + bilinear_load_dst dst_fmt, op, 1, d18, d19, q9 + vmull.u8 q1, d0, d28 + vmlal.u8 q1, d1, d29 + /* 5 cycles bubble */ + vshll.u16 q0, d2, #BILINEAR_INTERPOLATION_BITS + vmlsl.u16 q0, d2, d30 + vmlal.u16 q0, d3, d30 + /* 5 cycles bubble */ + bilinear_duplicate_mask mask_fmt, 1, d4 + vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS) + /* 3 cycles bubble */ + vmovn.u16 d0, q0 + /* 1 cycle bubble */ + bilinear_interleave_src_dst \ + mask_fmt, op, 1, d0, d1, q0, d18, d19, q9 + bilinear_apply_mask_to_src \ + mask_fmt, 1, d0, d1, q0, d4, \ + q3, q8, q10, q11 + bilinear_combine \ + op, 1, d0, d1, q0, d18, d19, q9, \ + q3, q8, q10, q11, d5 + bilinear_deinterleave_dst mask_fmt, op, 1, d0, d1, q0 + bilinear_store_&dst_fmt 1, q2, q3 +.endm + +.macro bilinear_interpolate_two_pixels src_fmt, mask_fmt, dst_fmt, op + bilinear_load_and_vertical_interpolate_two_&src_fmt \ + q1, q11, d0, d1, d20, d21, d22, d23 + bilinear_load_mask mask_fmt, 2, d4 + bilinear_load_dst dst_fmt, op, 2, d18, d19, q9 + vshll.u16 q0, d2, #BILINEAR_INTERPOLATION_BITS + vmlsl.u16 q0, d2, d30 + vmlal.u16 q0, d3, d30 + vshll.u16 q10, d22, #BILINEAR_INTERPOLATION_BITS + vmlsl.u16 q10, d22, d31 + vmlal.u16 q10, d23, d31 + vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS) + vshrn.u32 d1, q10, #(2 * BILINEAR_INTERPOLATION_BITS) + bilinear_duplicate_mask mask_fmt, 2, d4 + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) + vadd.u16 q12, q12, q13 + vmovn.u16 d0, q0 + bilinear_interleave_src_dst \ + mask_fmt, op, 2, d0, d1, q0, d18, d19, q9 + bilinear_apply_mask_to_src \ + mask_fmt, 2, d0, d1, q0, d4, \ + q3, q8, q10, q11 + bilinear_combine \ + op, 2, d0, d1, q0, d18, d19, q9, \ + q3, q8, q10, q11, d5 + bilinear_deinterleave_dst mask_fmt, op, 2, d0, d1, q0 + bilinear_store_&dst_fmt 2, q2, q3 +.endm + +.macro bilinear_interpolate_four_pixels src_fmt, mask_fmt, dst_fmt, op + bilinear_load_and_vertical_interpolate_four_&src_fmt \ + q1, q11, d0, d1, d20, d21, d22, d23 \ + q3, q9, d4, d5, d16, d17, d18, d19 + pld [TMP1, PF_OFFS] + sub TMP1, TMP1, STRIDE + vshll.u16 q0, d2, #BILINEAR_INTERPOLATION_BITS + vmlsl.u16 q0, d2, d30 + vmlal.u16 q0, d3, d30 + vshll.u16 q10, d22, #BILINEAR_INTERPOLATION_BITS + vmlsl.u16 q10, d22, d31 + vmlal.u16 q10, d23, d31 + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) + vshll.u16 q2, d6, #BILINEAR_INTERPOLATION_BITS + vmlsl.u16 q2, d6, d30 + vmlal.u16 q2, d7, d30 + vshll.u16 q8, d18, #BILINEAR_INTERPOLATION_BITS + bilinear_load_mask mask_fmt, 4, d22 + bilinear_load_dst dst_fmt, op, 4, d2, d3, q1 + pld [TMP1, PF_OFFS] + vmlsl.u16 q8, d18, d31 + vmlal.u16 q8, d19, d31 + vadd.u16 q12, q12, q13 + vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS) + vshrn.u32 d1, q10, #(2 * BILINEAR_INTERPOLATION_BITS) + vshrn.u32 d4, q2, #(2 * BILINEAR_INTERPOLATION_BITS) + vshrn.u32 d5, q8, #(2 * BILINEAR_INTERPOLATION_BITS) + bilinear_duplicate_mask mask_fmt, 4, d22 + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) + vmovn.u16 d0, q0 + vmovn.u16 d1, q2 + vadd.u16 q12, q12, q13 + bilinear_interleave_src_dst \ + mask_fmt, op, 4, d0, d1, q0, d2, d3, q1 + bilinear_apply_mask_to_src \ + mask_fmt, 4, d0, d1, q0, d22, \ + q3, q8, q9, q10 + bilinear_combine \ + op, 4, d0, d1, q0, d2, d3, q1, \ + q3, q8, q9, q10, d23 + bilinear_deinterleave_dst mask_fmt, op, 4, d0, d1, q0 + bilinear_store_&dst_fmt 4, q2, q3 +.endm + +.set BILINEAR_FLAG_USE_MASK, 1 +.set BILINEAR_FLAG_USE_ALL_NEON_REGS, 2 + +/* + * Main template macro for generating NEON optimized bilinear scanline functions. + * + * Bilinear scanline generator macro take folling arguments: + * fname - name of the function to generate + * src_fmt - source color format (8888 or 0565) + * dst_fmt - destination color format (8888 or 0565) + * src/dst_bpp_shift - (1 << bpp_shift) is the size of src/dst pixel in bytes + * process_last_pixel - code block that interpolate one pixel and does not + * update horizontal weight + * process_two_pixels - code block that interpolate two pixels and update + * horizontal weight + * process_four_pixels - code block that interpolate four pixels and update + * horizontal weight + * process_pixblock_head - head part of middle loop + * process_pixblock_tail - tail part of middle loop + * process_pixblock_tail_head - tail_head of middle loop + * pixblock_size - number of pixels processed in a single middle loop + * prefetch_distance - prefetch in the source image by that many pixels ahead + */ + +.macro generate_bilinear_scanline_func \ + fname, \ + src_fmt, dst_fmt, src_bpp_shift, dst_bpp_shift, \ + bilinear_process_last_pixel, \ + bilinear_process_two_pixels, \ + bilinear_process_four_pixels, \ + bilinear_process_pixblock_head, \ + bilinear_process_pixblock_tail, \ + bilinear_process_pixblock_tail_head, \ + pixblock_size, \ + prefetch_distance, \ + flags + +pixman_asm_function fname +.if pixblock_size == 8 +.elseif pixblock_size == 4 +.else + .error unsupported pixblock size +.endif + +.if ((flags) & BILINEAR_FLAG_USE_MASK) == 0 + OUT .req r0 + TOP .req r1 + BOTTOM .req r2 + WT .req r3 + WB .req r4 + X .req r5 + UX .req r6 + WIDTH .req ip + TMP1 .req r3 + TMP2 .req r4 + PF_OFFS .req r7 + TMP3 .req r8 + TMP4 .req r9 + STRIDE .req r2 + + mov ip, sp + push {r4, r5, r6, r7, r8, r9} + mov PF_OFFS, #prefetch_distance + ldmia ip, {WB, X, UX, WIDTH} +.else + OUT .req r0 + MASK .req r1 + TOP .req r2 + BOTTOM .req r3 + WT .req r4 + WB .req r5 + X .req r6 + UX .req r7 + WIDTH .req ip + TMP1 .req r4 + TMP2 .req r5 + PF_OFFS .req r8 + TMP3 .req r9 + TMP4 .req r10 + STRIDE .req r3 + + .set prefetch_offset, prefetch_distance + + mov ip, sp + push {r4, r5, r6, r7, r8, r9, r10, ip} + mov PF_OFFS, #prefetch_distance + ldmia ip, {WT, WB, X, UX, WIDTH} +.endif + + mul PF_OFFS, PF_OFFS, UX + +.if ((flags) & BILINEAR_FLAG_USE_ALL_NEON_REGS) != 0 + vpush {d8-d15} +.endif + + sub STRIDE, BOTTOM, TOP + .unreq BOTTOM + + cmp WIDTH, #0 + ble 3f + + vdup.u16 q12, X + vdup.u16 q13, UX + vdup.u8 d28, WT + vdup.u8 d29, WB + vadd.u16 d25, d25, d26 + + /* ensure good destination alignment */ + cmp WIDTH, #1 + blt 0f + tst OUT, #(1 << dst_bpp_shift) + beq 0f + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) + vadd.u16 q12, q12, q13 + bilinear_process_last_pixel + sub WIDTH, WIDTH, #1 +0: + vadd.u16 q13, q13, q13 + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) + vadd.u16 q12, q12, q13 + + cmp WIDTH, #2 + blt 0f + tst OUT, #(1 << (dst_bpp_shift + 1)) + beq 0f + bilinear_process_two_pixels + sub WIDTH, WIDTH, #2 +0: +.if pixblock_size == 8 + cmp WIDTH, #4 + blt 0f + tst OUT, #(1 << (dst_bpp_shift + 2)) + beq 0f + bilinear_process_four_pixels + sub WIDTH, WIDTH, #4 +0: +.endif + subs WIDTH, WIDTH, #pixblock_size + blt 1f + mov PF_OFFS, PF_OFFS, asr #(16 - src_bpp_shift) + bilinear_process_pixblock_head + subs WIDTH, WIDTH, #pixblock_size + blt 5f +0: + bilinear_process_pixblock_tail_head + subs WIDTH, WIDTH, #pixblock_size + bge 0b +5: + bilinear_process_pixblock_tail +1: +.if pixblock_size == 8 + tst WIDTH, #4 + beq 2f + bilinear_process_four_pixels +2: +.endif + /* handle the remaining trailing pixels */ + tst WIDTH, #2 + beq 2f + bilinear_process_two_pixels +2: + tst WIDTH, #1 + beq 3f + bilinear_process_last_pixel +3: +.if ((flags) & BILINEAR_FLAG_USE_ALL_NEON_REGS) != 0 + vpop {d8-d15} +.endif + +.if ((flags) & BILINEAR_FLAG_USE_MASK) == 0 + pop {r4, r5, r6, r7, r8, r9} +.else + pop {r4, r5, r6, r7, r8, r9, r10, ip} +.endif + bx lr + + .unreq OUT + .unreq TOP + .unreq WT + .unreq WB + .unreq X + .unreq UX + .unreq WIDTH + .unreq TMP1 + .unreq TMP2 + .unreq PF_OFFS + .unreq TMP3 + .unreq TMP4 + .unreq STRIDE +.if ((flags) & BILINEAR_FLAG_USE_MASK) != 0 + .unreq MASK +.endif + +.endfunc + +.endm + +/* src_8888_8_8888 */ +.macro bilinear_src_8888_8_8888_process_last_pixel + bilinear_interpolate_last_pixel 8888, 8, 8888, src +.endm + +.macro bilinear_src_8888_8_8888_process_two_pixels + bilinear_interpolate_two_pixels 8888, 8, 8888, src +.endm + +.macro bilinear_src_8888_8_8888_process_four_pixels + bilinear_interpolate_four_pixels 8888, 8, 8888, src +.endm + +.macro bilinear_src_8888_8_8888_process_pixblock_head + bilinear_src_8888_8_8888_process_four_pixels +.endm + +.macro bilinear_src_8888_8_8888_process_pixblock_tail +.endm + +.macro bilinear_src_8888_8_8888_process_pixblock_tail_head + bilinear_src_8888_8_8888_process_pixblock_tail + bilinear_src_8888_8_8888_process_pixblock_head +.endm + +/* src_8888_8_0565 */ +.macro bilinear_src_8888_8_0565_process_last_pixel + bilinear_interpolate_last_pixel 8888, 8, 0565, src +.endm + +.macro bilinear_src_8888_8_0565_process_two_pixels + bilinear_interpolate_two_pixels 8888, 8, 0565, src +.endm + +.macro bilinear_src_8888_8_0565_process_four_pixels + bilinear_interpolate_four_pixels 8888, 8, 0565, src +.endm + +.macro bilinear_src_8888_8_0565_process_pixblock_head + bilinear_src_8888_8_0565_process_four_pixels +.endm + +.macro bilinear_src_8888_8_0565_process_pixblock_tail +.endm + +.macro bilinear_src_8888_8_0565_process_pixblock_tail_head + bilinear_src_8888_8_0565_process_pixblock_tail + bilinear_src_8888_8_0565_process_pixblock_head +.endm + +/* src_0565_8_x888 */ +.macro bilinear_src_0565_8_x888_process_last_pixel + bilinear_interpolate_last_pixel 0565, 8, 8888, src +.endm + +.macro bilinear_src_0565_8_x888_process_two_pixels + bilinear_interpolate_two_pixels 0565, 8, 8888, src +.endm + +.macro bilinear_src_0565_8_x888_process_four_pixels + bilinear_interpolate_four_pixels 0565, 8, 8888, src +.endm + +.macro bilinear_src_0565_8_x888_process_pixblock_head + bilinear_src_0565_8_x888_process_four_pixels +.endm + +.macro bilinear_src_0565_8_x888_process_pixblock_tail +.endm + +.macro bilinear_src_0565_8_x888_process_pixblock_tail_head + bilinear_src_0565_8_x888_process_pixblock_tail + bilinear_src_0565_8_x888_process_pixblock_head +.endm + +/* src_0565_8_0565 */ +.macro bilinear_src_0565_8_0565_process_last_pixel + bilinear_interpolate_last_pixel 0565, 8, 0565, src +.endm + +.macro bilinear_src_0565_8_0565_process_two_pixels + bilinear_interpolate_two_pixels 0565, 8, 0565, src +.endm + +.macro bilinear_src_0565_8_0565_process_four_pixels + bilinear_interpolate_four_pixels 0565, 8, 0565, src +.endm + +.macro bilinear_src_0565_8_0565_process_pixblock_head + bilinear_src_0565_8_0565_process_four_pixels +.endm + +.macro bilinear_src_0565_8_0565_process_pixblock_tail +.endm + +.macro bilinear_src_0565_8_0565_process_pixblock_tail_head + bilinear_src_0565_8_0565_process_pixblock_tail + bilinear_src_0565_8_0565_process_pixblock_head +.endm + +/* over_8888_8888 */ +.macro bilinear_over_8888_8888_process_last_pixel + bilinear_interpolate_last_pixel 8888, x, 8888, over +.endm + +.macro bilinear_over_8888_8888_process_two_pixels + bilinear_interpolate_two_pixels 8888, x, 8888, over +.endm + +.macro bilinear_over_8888_8888_process_four_pixels + bilinear_interpolate_four_pixels 8888, x, 8888, over +.endm + +.macro bilinear_over_8888_8888_process_pixblock_head + mov TMP1, X, asr #16 + add X, X, UX + add TMP1, TOP, TMP1, asl #2 + mov TMP2, X, asr #16 + add X, X, UX + add TMP2, TOP, TMP2, asl #2 + + vld1.32 {d22}, [TMP1], STRIDE + vld1.32 {d23}, [TMP1] + mov TMP3, X, asr #16 + add X, X, UX + add TMP3, TOP, TMP3, asl #2 + vmull.u8 q8, d22, d28 + vmlal.u8 q8, d23, d29 + + vld1.32 {d22}, [TMP2], STRIDE + vld1.32 {d23}, [TMP2] + mov TMP4, X, asr #16 + add X, X, UX + add TMP4, TOP, TMP4, asl #2 + vmull.u8 q9, d22, d28 + vmlal.u8 q9, d23, d29 + + vld1.32 {d22}, [TMP3], STRIDE + vld1.32 {d23}, [TMP3] + vmull.u8 q10, d22, d28 + vmlal.u8 q10, d23, d29 + + vshll.u16 q0, d16, #BILINEAR_INTERPOLATION_BITS + vmlsl.u16 q0, d16, d30 + vmlal.u16 q0, d17, d30 + + pld [TMP4, PF_OFFS] + vld1.32 {d16}, [TMP4], STRIDE + vld1.32 {d17}, [TMP4] + pld [TMP4, PF_OFFS] + vmull.u8 q11, d16, d28 + vmlal.u8 q11, d17, d29 + + vshll.u16 q1, d18, #BILINEAR_INTERPOLATION_BITS + vmlsl.u16 q1, d18, d31 + vmlal.u16 q1, d19, d31 + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) + vadd.u16 q12, q12, q13 +.endm + +.macro bilinear_over_8888_8888_process_pixblock_tail + vshll.u16 q2, d20, #BILINEAR_INTERPOLATION_BITS + vmlsl.u16 q2, d20, d30 + vmlal.u16 q2, d21, d30 + vshll.u16 q3, d22, #BILINEAR_INTERPOLATION_BITS + vmlsl.u16 q3, d22, d31 + vmlal.u16 q3, d23, d31 + vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS) + vshrn.u32 d1, q1, #(2 * BILINEAR_INTERPOLATION_BITS) + vld1.32 {d2, d3}, [OUT, :128] + pld [OUT, #(prefetch_offset * 4)] + vshrn.u32 d4, q2, #(2 * BILINEAR_INTERPOLATION_BITS) + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) + vshrn.u32 d5, q3, #(2 * BILINEAR_INTERPOLATION_BITS) + vmovn.u16 d6, q0 + vmovn.u16 d7, q2 + vuzp.8 d6, d7 + vuzp.8 d2, d3 + vuzp.8 d6, d7 + vuzp.8 d2, d3 + vdup.32 d4, d7[1] + vmvn.8 d4, d4 + vmull.u8 q11, d2, d4 + vmull.u8 q2, d3, d4 + vrshr.u16 q1, q11, #8 + vrshr.u16 q10, q2, #8 + vraddhn.u16 d2, q1, q11 + vraddhn.u16 d3, q10, q2 + vqadd.u8 q3, q1, q3 + vuzp.8 d6, d7 + vuzp.8 d6, d7 + vadd.u16 q12, q12, q13 + vst1.32 {d6, d7}, [OUT, :128]! +.endm + +.macro bilinear_over_8888_8888_process_pixblock_tail_head + vshll.u16 q2, d20, #BILINEAR_INTERPOLATION_BITS + mov TMP1, X, asr #16 + add X, X, UX + add TMP1, TOP, TMP1, asl #2 + vmlsl.u16 q2, d20, d30 + mov TMP2, X, asr #16 + add X, X, UX + add TMP2, TOP, TMP2, asl #2 + vmlal.u16 q2, d21, d30 + vshll.u16 q3, d22, #BILINEAR_INTERPOLATION_BITS + vld1.32 {d20}, [TMP1], STRIDE + vmlsl.u16 q3, d22, d31 + vmlal.u16 q3, d23, d31 + vld1.32 {d21}, [TMP1] + vmull.u8 q8, d20, d28 + vmlal.u8 q8, d21, d29 + vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS) + vshrn.u32 d1, q1, #(2 * BILINEAR_INTERPOLATION_BITS) + vld1.32 {d2, d3}, [OUT, :128] + pld [OUT, PF_OFFS] + vshrn.u32 d4, q2, #(2 * BILINEAR_INTERPOLATION_BITS) + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) + vld1.32 {d22}, [TMP2], STRIDE + vshrn.u32 d5, q3, #(2 * BILINEAR_INTERPOLATION_BITS) + vmovn.u16 d6, q0 + vld1.32 {d23}, [TMP2] + vmull.u8 q9, d22, d28 + mov TMP3, X, asr #16 + add X, X, UX + add TMP3, TOP, TMP3, asl #2 + mov TMP4, X, asr #16 + add X, X, UX + add TMP4, TOP, TMP4, asl #2 + vmlal.u8 q9, d23, d29 + vmovn.u16 d7, q2 + vld1.32 {d22}, [TMP3], STRIDE + vuzp.8 d6, d7 + vuzp.8 d2, d3 + vuzp.8 d6, d7 + vuzp.8 d2, d3 + vdup.32 d4, d7[1] + vld1.32 {d23}, [TMP3] + vmvn.8 d4, d4 + vmull.u8 q10, d22, d28 + vmlal.u8 q10, d23, d29 + vmull.u8 q11, d2, d4 + vmull.u8 q2, d3, d4 + vshll.u16 q0, d16, #BILINEAR_INTERPOLATION_BITS + vmlsl.u16 q0, d16, d30 + vrshr.u16 q1, q11, #8 + vmlal.u16 q0, d17, d30 + vrshr.u16 q8, q2, #8 + vraddhn.u16 d2, q1, q11 + vraddhn.u16 d3, q8, q2 + pld [TMP4, PF_OFFS] + vld1.32 {d16}, [TMP4], STRIDE + vqadd.u8 q3, q1, q3 + vld1.32 {d17}, [TMP4] + pld [TMP4, PF_OFFS] + vmull.u8 q11, d16, d28 + vmlal.u8 q11, d17, d29 + vuzp.8 d6, d7 + vshll.u16 q1, d18, #BILINEAR_INTERPOLATION_BITS + vuzp.8 d6, d7 + vmlsl.u16 q1, d18, d31 + vadd.u16 q12, q12, q13 + vmlal.u16 q1, d19, d31 + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) + vadd.u16 q12, q12, q13 + vst1.32 {d6, d7}, [OUT, :128]! +.endm + +/* over_8888_8_8888 */ +.macro bilinear_over_8888_8_8888_process_last_pixel + bilinear_interpolate_last_pixel 8888, 8, 8888, over +.endm + +.macro bilinear_over_8888_8_8888_process_two_pixels + bilinear_interpolate_two_pixels 8888, 8, 8888, over +.endm + +.macro bilinear_over_8888_8_8888_process_four_pixels + bilinear_interpolate_four_pixels 8888, 8, 8888, over +.endm + +.macro bilinear_over_8888_8_8888_process_pixblock_head + mov TMP1, X, asr #16 + add X, X, UX + add TMP1, TOP, TMP1, asl #2 + vld1.32 {d0}, [TMP1], STRIDE + mov TMP2, X, asr #16 + add X, X, UX + add TMP2, TOP, TMP2, asl #2 + vld1.32 {d1}, [TMP1] + mov TMP3, X, asr #16 + add X, X, UX + add TMP3, TOP, TMP3, asl #2 + vld1.32 {d2}, [TMP2], STRIDE + mov TMP4, X, asr #16 + add X, X, UX + add TMP4, TOP, TMP4, asl #2 + vld1.32 {d3}, [TMP2] + vmull.u8 q2, d0, d28 + vmull.u8 q3, d2, d28 + vmlal.u8 q2, d1, d29 + vmlal.u8 q3, d3, d29 + vshll.u16 q0, d4, #BILINEAR_INTERPOLATION_BITS + vshll.u16 q1, d6, #BILINEAR_INTERPOLATION_BITS + vmlsl.u16 q0, d4, d30 + vmlsl.u16 q1, d6, d31 + vmlal.u16 q0, d5, d30 + vmlal.u16 q1, d7, d31 + vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS) + vshrn.u32 d1, q1, #(2 * BILINEAR_INTERPOLATION_BITS) + vld1.32 {d2}, [TMP3], STRIDE + vld1.32 {d3}, [TMP3] + pld [TMP4, PF_OFFS] + vld1.32 {d4}, [TMP4], STRIDE + vld1.32 {d5}, [TMP4] + pld [TMP4, PF_OFFS] + vmull.u8 q3, d2, d28 + vmlal.u8 q3, d3, d29 + vmull.u8 q1, d4, d28 + vmlal.u8 q1, d5, d29 + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) + vld1.32 {d22[0]}, [MASK]! + pld [MASK, #prefetch_offset] + vadd.u16 q12, q12, q13 + vmovn.u16 d16, q0 +.endm + +.macro bilinear_over_8888_8_8888_process_pixblock_tail + vshll.u16 q9, d6, #BILINEAR_INTERPOLATION_BITS + vshll.u16 q10, d2, #BILINEAR_INTERPOLATION_BITS + vmlsl.u16 q9, d6, d30 + vmlsl.u16 q10, d2, d31 + vmlal.u16 q9, d7, d30 + vmlal.u16 q10, d3, d31 + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) + vadd.u16 q12, q12, q13 + vdup.32 d22, d22[0] + vshrn.u32 d18, q9, #(2 * BILINEAR_INTERPOLATION_BITS) + vshrn.u32 d19, q10, #(2 * BILINEAR_INTERPOLATION_BITS) + vmovn.u16 d17, q9 + vld1.32 {d18, d19}, [OUT, :128] + pld [OUT, PF_OFFS] + vuzp.8 d16, d17 + vuzp.8 d18, d19 + vuzp.8 d16, d17 + vuzp.8 d18, d19 + vmull.u8 q10, d16, d22 + vmull.u8 q11, d17, d22 + vrsra.u16 q10, q10, #8 + vrsra.u16 q11, q11, #8 + vrshrn.u16 d16, q10, #8 + vrshrn.u16 d17, q11, #8 + vdup.32 d22, d17[1] + vmvn.8 d22, d22 + vmull.u8 q10, d18, d22 + vmull.u8 q11, d19, d22 + vrshr.u16 q9, q10, #8 + vrshr.u16 q0, q11, #8 + vraddhn.u16 d18, q9, q10 + vraddhn.u16 d19, q0, q11 + vqadd.u8 q9, q8, q9 + vuzp.8 d18, d19 + vuzp.8 d18, d19 + vst1.32 {d18, d19}, [OUT, :128]! +.endm + +.macro bilinear_over_8888_8_8888_process_pixblock_tail_head + vshll.u16 q9, d6, #BILINEAR_INTERPOLATION_BITS + mov TMP1, X, asr #16 + add X, X, UX + add TMP1, TOP, TMP1, asl #2 + vshll.u16 q10, d2, #BILINEAR_INTERPOLATION_BITS + vld1.32 {d0}, [TMP1], STRIDE + mov TMP2, X, asr #16 + add X, X, UX + add TMP2, TOP, TMP2, asl #2 + vmlsl.u16 q9, d6, d30 + vmlsl.u16 q10, d2, d31 + vld1.32 {d1}, [TMP1] + mov TMP3, X, asr #16 + add X, X, UX + add TMP3, TOP, TMP3, asl #2 + vmlal.u16 q9, d7, d30 + vmlal.u16 q10, d3, d31 + vld1.32 {d2}, [TMP2], STRIDE + mov TMP4, X, asr #16 + add X, X, UX + add TMP4, TOP, TMP4, asl #2 + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) + vadd.u16 q12, q12, q13 + vld1.32 {d3}, [TMP2] + vdup.32 d22, d22[0] + vshrn.u32 d18, q9, #(2 * BILINEAR_INTERPOLATION_BITS) + vshrn.u32 d19, q10, #(2 * BILINEAR_INTERPOLATION_BITS) + vmull.u8 q2, d0, d28 + vmull.u8 q3, d2, d28 + vmovn.u16 d17, q9 + vld1.32 {d18, d19}, [OUT, :128] + pld [OUT, #(prefetch_offset * 4)] + vmlal.u8 q2, d1, d29 + vmlal.u8 q3, d3, d29 + vuzp.8 d16, d17 + vuzp.8 d18, d19 + vshll.u16 q0, d4, #BILINEAR_INTERPOLATION_BITS + vshll.u16 q1, d6, #BILINEAR_INTERPOLATION_BITS + vuzp.8 d16, d17 + vuzp.8 d18, d19 + vmlsl.u16 q0, d4, d30 + vmlsl.u16 q1, d6, d31 + vmull.u8 q10, d16, d22 + vmull.u8 q11, d17, d22 + vmlal.u16 q0, d5, d30 + vmlal.u16 q1, d7, d31 + vrsra.u16 q10, q10, #8 + vrsra.u16 q11, q11, #8 + vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS) + vshrn.u32 d1, q1, #(2 * BILINEAR_INTERPOLATION_BITS) + vrshrn.u16 d16, q10, #8 + vrshrn.u16 d17, q11, #8 + vld1.32 {d2}, [TMP3], STRIDE + vdup.32 d22, d17[1] + vld1.32 {d3}, [TMP3] + vmvn.8 d22, d22 + pld [TMP4, PF_OFFS] + vld1.32 {d4}, [TMP4], STRIDE + vmull.u8 q10, d18, d22 + vmull.u8 q11, d19, d22 + vld1.32 {d5}, [TMP4] + pld [TMP4, PF_OFFS] + vmull.u8 q3, d2, d28 + vrshr.u16 q9, q10, #8 + vrshr.u16 q15, q11, #8 + vmlal.u8 q3, d3, d29 + vmull.u8 q1, d4, d28 + vraddhn.u16 d18, q9, q10 + vraddhn.u16 d19, q15, q11 + vmlal.u8 q1, d5, d29 + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) + vqadd.u8 q9, q8, q9 + vld1.32 {d22[0]}, [MASK]! + vuzp.8 d18, d19 + vadd.u16 q12, q12, q13 + vuzp.8 d18, d19 + vmovn.u16 d16, q0 + vst1.32 {d18, d19}, [OUT, :128]! +.endm + +/* add_8888_8888 */ +.macro bilinear_add_8888_8888_process_last_pixel + bilinear_interpolate_last_pixel 8888, x, 8888, add +.endm + +.macro bilinear_add_8888_8888_process_two_pixels + bilinear_interpolate_two_pixels 8888, x, 8888, add +.endm + +.macro bilinear_add_8888_8888_process_four_pixels + bilinear_interpolate_four_pixels 8888, x, 8888, add +.endm + +.macro bilinear_add_8888_8888_process_pixblock_head + bilinear_add_8888_8888_process_four_pixels +.endm + +.macro bilinear_add_8888_8888_process_pixblock_tail +.endm + +.macro bilinear_add_8888_8888_process_pixblock_tail_head + bilinear_add_8888_8888_process_pixblock_tail + bilinear_add_8888_8888_process_pixblock_head +.endm + +/* add_8888_8_8888 */ +.macro bilinear_add_8888_8_8888_process_last_pixel + bilinear_interpolate_last_pixel 8888, 8, 8888, add +.endm + +.macro bilinear_add_8888_8_8888_process_two_pixels + bilinear_interpolate_two_pixels 8888, 8, 8888, add +.endm + +.macro bilinear_add_8888_8_8888_process_four_pixels + bilinear_interpolate_four_pixels 8888, 8, 8888, add +.endm + +.macro bilinear_add_8888_8_8888_process_pixblock_head + bilinear_add_8888_8_8888_process_four_pixels +.endm + +.macro bilinear_add_8888_8_8888_process_pixblock_tail +.endm + +.macro bilinear_add_8888_8_8888_process_pixblock_tail_head + bilinear_add_8888_8_8888_process_pixblock_tail + bilinear_add_8888_8_8888_process_pixblock_head +.endm + + +/* Bilinear scanline functions */ +generate_bilinear_scanline_func \ + pixman_scaled_bilinear_scanline_8888_8_8888_SRC_asm_neon, \ + 8888, 8888, 2, 2, \ + bilinear_src_8888_8_8888_process_last_pixel, \ + bilinear_src_8888_8_8888_process_two_pixels, \ + bilinear_src_8888_8_8888_process_four_pixels, \ + bilinear_src_8888_8_8888_process_pixblock_head, \ + bilinear_src_8888_8_8888_process_pixblock_tail, \ + bilinear_src_8888_8_8888_process_pixblock_tail_head, \ + 4, 28, BILINEAR_FLAG_USE_MASK + +generate_bilinear_scanline_func \ + pixman_scaled_bilinear_scanline_8888_8_0565_SRC_asm_neon, \ + 8888, 0565, 2, 1, \ + bilinear_src_8888_8_0565_process_last_pixel, \ + bilinear_src_8888_8_0565_process_two_pixels, \ + bilinear_src_8888_8_0565_process_four_pixels, \ + bilinear_src_8888_8_0565_process_pixblock_head, \ + bilinear_src_8888_8_0565_process_pixblock_tail, \ + bilinear_src_8888_8_0565_process_pixblock_tail_head, \ + 4, 28, BILINEAR_FLAG_USE_MASK + +generate_bilinear_scanline_func \ + pixman_scaled_bilinear_scanline_0565_8_x888_SRC_asm_neon, \ + 0565, 8888, 1, 2, \ + bilinear_src_0565_8_x888_process_last_pixel, \ + bilinear_src_0565_8_x888_process_two_pixels, \ + bilinear_src_0565_8_x888_process_four_pixels, \ + bilinear_src_0565_8_x888_process_pixblock_head, \ + bilinear_src_0565_8_x888_process_pixblock_tail, \ + bilinear_src_0565_8_x888_process_pixblock_tail_head, \ + 4, 28, BILINEAR_FLAG_USE_MASK + +generate_bilinear_scanline_func \ + pixman_scaled_bilinear_scanline_0565_8_0565_SRC_asm_neon, \ + 0565, 0565, 1, 1, \ + bilinear_src_0565_8_0565_process_last_pixel, \ + bilinear_src_0565_8_0565_process_two_pixels, \ + bilinear_src_0565_8_0565_process_four_pixels, \ + bilinear_src_0565_8_0565_process_pixblock_head, \ + bilinear_src_0565_8_0565_process_pixblock_tail, \ + bilinear_src_0565_8_0565_process_pixblock_tail_head, \ + 4, 28, BILINEAR_FLAG_USE_MASK + +generate_bilinear_scanline_func \ + pixman_scaled_bilinear_scanline_8888_8888_OVER_asm_neon, \ + 8888, 8888, 2, 2, \ + bilinear_over_8888_8888_process_last_pixel, \ + bilinear_over_8888_8888_process_two_pixels, \ + bilinear_over_8888_8888_process_four_pixels, \ + bilinear_over_8888_8888_process_pixblock_head, \ + bilinear_over_8888_8888_process_pixblock_tail, \ + bilinear_over_8888_8888_process_pixblock_tail_head, \ + 4, 28, 0 + +generate_bilinear_scanline_func \ + pixman_scaled_bilinear_scanline_8888_8_8888_OVER_asm_neon, \ + 8888, 8888, 2, 2, \ + bilinear_over_8888_8_8888_process_last_pixel, \ + bilinear_over_8888_8_8888_process_two_pixels, \ + bilinear_over_8888_8_8888_process_four_pixels, \ + bilinear_over_8888_8_8888_process_pixblock_head, \ + bilinear_over_8888_8_8888_process_pixblock_tail, \ + bilinear_over_8888_8_8888_process_pixblock_tail_head, \ + 4, 28, BILINEAR_FLAG_USE_MASK + +generate_bilinear_scanline_func \ + pixman_scaled_bilinear_scanline_8888_8888_ADD_asm_neon, \ + 8888, 8888, 2, 2, \ + bilinear_add_8888_8888_process_last_pixel, \ + bilinear_add_8888_8888_process_two_pixels, \ + bilinear_add_8888_8888_process_four_pixels, \ + bilinear_add_8888_8888_process_pixblock_head, \ + bilinear_add_8888_8888_process_pixblock_tail, \ + bilinear_add_8888_8888_process_pixblock_tail_head, \ + 4, 28, 0 + +generate_bilinear_scanline_func \ + pixman_scaled_bilinear_scanline_8888_8_8888_ADD_asm_neon, \ + 8888, 8888, 2, 2, \ + bilinear_add_8888_8_8888_process_last_pixel, \ + bilinear_add_8888_8_8888_process_two_pixels, \ + bilinear_add_8888_8_8888_process_four_pixels, \ + bilinear_add_8888_8_8888_process_pixblock_head, \ + bilinear_add_8888_8_8888_process_pixblock_tail, \ + bilinear_add_8888_8_8888_process_pixblock_tail_head, \ + 4, 28, BILINEAR_FLAG_USE_MASK diff --git a/src/pixman/pixman/pixman-arm-neon-asm.S b/src/pixman/pixman/pixman-arm-neon-asm.S new file mode 100644 index 0000000..187197d --- /dev/null +++ b/src/pixman/pixman/pixman-arm-neon-asm.S @@ -0,0 +1,3637 @@ +/* + * Copyright © 2009 Nokia Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Author: Siarhei Siamashka (siarhei.siamashka@nokia.com) + */ + +/* + * This file contains implementations of NEON optimized pixel processing + * functions. There is no full and detailed tutorial, but some functions + * (those which are exposing some new or interesting features) are + * extensively commented and can be used as examples. + * + * You may want to have a look at the comments for following functions: + * - pixman_composite_over_8888_0565_asm_neon + * - pixman_composite_over_n_8_0565_asm_neon + */ + +/* Prevent the stack from becoming executable for no reason... */ +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif + + .text + .fpu neon + .arch armv7a + .object_arch armv4 + .eabi_attribute 10, 0 /* suppress Tag_FP_arch */ + .eabi_attribute 12, 0 /* suppress Tag_Advanced_SIMD_arch */ + .arm + .altmacro + .p2align 2 + +#include "pixman-private.h" +#include "pixman-arm-neon-asm.h" + +/* Global configuration options and preferences */ + +/* + * The code can optionally make use of unaligned memory accesses to improve + * performance of handling leading/trailing pixels for each scanline. + * Configuration variable RESPECT_STRICT_ALIGNMENT can be set to 0 for + * example in linux if unaligned memory accesses are not configured to + * generate.exceptions. + */ +.set RESPECT_STRICT_ALIGNMENT, 1 + +/* + * Set default prefetch type. There is a choice between the following options: + * + * PREFETCH_TYPE_NONE (may be useful for the ARM cores where PLD is set to work + * as NOP to workaround some HW bugs or for whatever other reason) + * + * PREFETCH_TYPE_SIMPLE (may be useful for simple single-issue ARM cores where + * advanced prefetch intruduces heavy overhead) + * + * PREFETCH_TYPE_ADVANCED (useful for superscalar cores such as ARM Cortex-A8 + * which can run ARM and NEON instructions simultaneously so that extra ARM + * instructions do not add (many) extra cycles, but improve prefetch efficiency) + * + * Note: some types of function can't support advanced prefetch and fallback + * to simple one (those which handle 24bpp pixels) + */ +.set PREFETCH_TYPE_DEFAULT, PREFETCH_TYPE_ADVANCED + +/* Prefetch distance in pixels for simple prefetch */ +.set PREFETCH_DISTANCE_SIMPLE, 64 + +/* + * Implementation of pixman_composite_over_8888_0565_asm_neon + * + * This function takes a8r8g8b8 source buffer, r5g6b5 destination buffer and + * performs OVER compositing operation. Function fast_composite_over_8888_0565 + * from pixman-fast-path.c does the same in C and can be used as a reference. + * + * First we need to have some NEON assembly code which can do the actual + * operation on the pixels and provide it to the template macro. + * + * Template macro quite conveniently takes care of emitting all the necessary + * code for memory reading and writing (including quite tricky cases of + * handling unaligned leading/trailing pixels), so we only need to deal with + * the data in NEON registers. + * + * NEON registers allocation in general is recommented to be the following: + * d0, d1, d2, d3 - contain loaded source pixel data + * d4, d5, d6, d7 - contain loaded destination pixels (if they are needed) + * d24, d25, d26, d27 - contain loading mask pixel data (if mask is used) + * d28, d29, d30, d31 - place for storing the result (destination pixels) + * + * As can be seen above, four 64-bit NEON registers are used for keeping + * intermediate pixel data and up to 8 pixels can be processed in one step + * for 32bpp formats (16 pixels for 16bpp, 32 pixels for 8bpp). + * + * This particular function uses the following registers allocation: + * d0, d1, d2, d3 - contain loaded source pixel data + * d4, d5 - contain loaded destination pixels (they are needed) + * d28, d29 - place for storing the result (destination pixels) + */ + +/* + * Step one. We need to have some code to do some arithmetics on pixel data. + * This is implemented as a pair of macros: '*_head' and '*_tail'. When used + * back-to-back, they take pixel data from {d0, d1, d2, d3} and {d4, d5}, + * perform all the needed calculations and write the result to {d28, d29}. + * The rationale for having two macros and not just one will be explained + * later. In practice, any single monolitic function which does the work can + * be split into two parts in any arbitrary way without affecting correctness. + * + * There is one special trick here too. Common template macro can optionally + * make our life a bit easier by doing R, G, B, A color components + * deinterleaving for 32bpp pixel formats (and this feature is used in + * 'pixman_composite_over_8888_0565_asm_neon' function). So it means that + * instead of having 8 packed pixels in {d0, d1, d2, d3} registers, we + * actually use d0 register for blue channel (a vector of eight 8-bit + * values), d1 register for green, d2 for red and d3 for alpha. This + * simple conversion can be also done with a few NEON instructions: + * + * Packed to planar conversion: + * vuzp.8 d0, d1 + * vuzp.8 d2, d3 + * vuzp.8 d1, d3 + * vuzp.8 d0, d2 + * + * Planar to packed conversion: + * vzip.8 d0, d2 + * vzip.8 d1, d3 + * vzip.8 d2, d3 + * vzip.8 d0, d1 + * + * But pixel can be loaded directly in planar format using VLD4.8 NEON + * instruction. It is 1 cycle slower than VLD1.32, so this is not always + * desirable, that's why deinterleaving is optional. + * + * But anyway, here is the code: + */ +.macro pixman_composite_over_8888_0565_process_pixblock_head + /* convert 8 r5g6b5 pixel data from {d4, d5} to planar 8-bit format + and put data into d6 - red, d7 - green, d30 - blue */ + vshrn.u16 d6, q2, #8 + vshrn.u16 d7, q2, #3 + vsli.u16 q2, q2, #5 + vsri.u8 d6, d6, #5 + vmvn.8 d3, d3 /* invert source alpha */ + vsri.u8 d7, d7, #6 + vshrn.u16 d30, q2, #2 + /* now do alpha blending, storing results in 8-bit planar format + into d16 - red, d19 - green, d18 - blue */ + vmull.u8 q10, d3, d6 + vmull.u8 q11, d3, d7 + vmull.u8 q12, d3, d30 + vrshr.u16 q13, q10, #8 + vrshr.u16 q3, q11, #8 + vrshr.u16 q15, q12, #8 + vraddhn.u16 d20, q10, q13 + vraddhn.u16 d23, q11, q3 + vraddhn.u16 d22, q12, q15 +.endm + +.macro pixman_composite_over_8888_0565_process_pixblock_tail + /* ... continue alpha blending */ + vqadd.u8 d16, d2, d20 + vqadd.u8 q9, q0, q11 + /* convert the result to r5g6b5 and store it into {d28, d29} */ + vshll.u8 q14, d16, #8 + vshll.u8 q8, d19, #8 + vshll.u8 q9, d18, #8 + vsri.u16 q14, q8, #5 + vsri.u16 q14, q9, #11 +.endm + +/* + * OK, now we got almost everything that we need. Using the above two + * macros, the work can be done right. But now we want to optimize + * it a bit. ARM Cortex-A8 is an in-order core, and benefits really + * a lot from good code scheduling and software pipelining. + * + * Let's construct some code, which will run in the core main loop. + * Some pseudo-code of the main loop will look like this: + * head + * while (...) { + * tail + * head + * } + * tail + * + * It may look a bit weird, but this setup allows to hide instruction + * latencies better and also utilize dual-issue capability more + * efficiently (make pairs of load-store and ALU instructions). + * + * So what we need now is a '*_tail_head' macro, which will be used + * in the core main loop. A trivial straightforward implementation + * of this macro would look like this: + * + * pixman_composite_over_8888_0565_process_pixblock_tail + * vst1.16 {d28, d29}, [DST_W, :128]! + * vld1.16 {d4, d5}, [DST_R, :128]! + * vld4.32 {d0, d1, d2, d3}, [SRC]! + * pixman_composite_over_8888_0565_process_pixblock_head + * cache_preload 8, 8 + * + * Now it also got some VLD/VST instructions. We simply can't move from + * processing one block of pixels to the other one with just arithmetics. + * The previously processed data needs to be written to memory and new + * data needs to be fetched. Fortunately, this main loop does not deal + * with partial leading/trailing pixels and can load/store a full block + * of pixels in a bulk. Additionally, destination buffer is already + * 16 bytes aligned here (which is good for performance). + * + * New things here are DST_R, DST_W, SRC and MASK identifiers. These + * are the aliases for ARM registers which are used as pointers for + * accessing data. We maintain separate pointers for reading and writing + * destination buffer (DST_R and DST_W). + * + * Another new thing is 'cache_preload' macro. It is used for prefetching + * data into CPU L2 cache and improve performance when dealing with large + * images which are far larger than cache size. It uses one argument + * (actually two, but they need to be the same here) - number of pixels + * in a block. Looking into 'pixman-arm-neon-asm.h' can provide some + * details about this macro. Moreover, if good performance is needed + * the code from this macro needs to be copied into '*_tail_head' macro + * and mixed with the rest of code for optimal instructions scheduling. + * We are actually doing it below. + * + * Now after all the explanations, here is the optimized code. + * Different instruction streams (originaling from '*_head', '*_tail' + * and 'cache_preload' macro) use different indentation levels for + * better readability. Actually taking the code from one of these + * indentation levels and ignoring a few VLD/VST instructions would + * result in exactly the code from '*_head', '*_tail' or 'cache_preload' + * macro! + */ + +#if 1 + +.macro pixman_composite_over_8888_0565_process_pixblock_tail_head + vqadd.u8 d16, d2, d20 + vld1.16 {d4, d5}, [DST_R, :128]! + vqadd.u8 q9, q0, q11 + vshrn.u16 d6, q2, #8 + fetch_src_pixblock + vshrn.u16 d7, q2, #3 + vsli.u16 q2, q2, #5 + vshll.u8 q14, d16, #8 + PF add PF_X, PF_X, #8 + vshll.u8 q8, d19, #8 + PF tst PF_CTL, #0xF + vsri.u8 d6, d6, #5 + PF addne PF_X, PF_X, #8 + vmvn.8 d3, d3 + PF subne PF_CTL, PF_CTL, #1 + vsri.u8 d7, d7, #6 + vshrn.u16 d30, q2, #2 + vmull.u8 q10, d3, d6 + PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] + vmull.u8 q11, d3, d7 + vmull.u8 q12, d3, d30 + PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] + vsri.u16 q14, q8, #5 + PF cmp PF_X, ORIG_W + vshll.u8 q9, d18, #8 + vrshr.u16 q13, q10, #8 + PF subge PF_X, PF_X, ORIG_W + vrshr.u16 q3, q11, #8 + vrshr.u16 q15, q12, #8 + PF subges PF_CTL, PF_CTL, #0x10 + vsri.u16 q14, q9, #11 + PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! + vraddhn.u16 d20, q10, q13 + vraddhn.u16 d23, q11, q3 + PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! + vraddhn.u16 d22, q12, q15 + vst1.16 {d28, d29}, [DST_W, :128]! +.endm + +#else + +/* If we did not care much about the performance, we would just use this... */ +.macro pixman_composite_over_8888_0565_process_pixblock_tail_head + pixman_composite_over_8888_0565_process_pixblock_tail + vst1.16 {d28, d29}, [DST_W, :128]! + vld1.16 {d4, d5}, [DST_R, :128]! + fetch_src_pixblock + pixman_composite_over_8888_0565_process_pixblock_head + cache_preload 8, 8 +.endm + +#endif + +/* + * And now the final part. We are using 'generate_composite_function' macro + * to put all the stuff together. We are specifying the name of the function + * which we want to get, number of bits per pixel for the source, mask and + * destination (0 if unused, like mask in this case). Next come some bit + * flags: + * FLAG_DST_READWRITE - tells that the destination buffer is both read + * and written, for write-only buffer we would use + * FLAG_DST_WRITEONLY flag instead + * FLAG_DEINTERLEAVE_32BPP - tells that we prefer to work with planar data + * and separate color channels for 32bpp format. + * The next things are: + * - the number of pixels processed per iteration (8 in this case, because + * that's the maximum what can fit into four 64-bit NEON registers). + * - prefetch distance, measured in pixel blocks. In this case it is 5 times + * by 8 pixels. That would be 40 pixels, or up to 160 bytes. Optimal + * prefetch distance can be selected by running some benchmarks. + * + * After that we specify some macros, these are 'default_init', + * 'default_cleanup' here which are empty (but it is possible to have custom + * init/cleanup macros to be able to save/restore some extra NEON registers + * like d8-d15 or do anything else) followed by + * 'pixman_composite_over_8888_0565_process_pixblock_head', + * 'pixman_composite_over_8888_0565_process_pixblock_tail' and + * 'pixman_composite_over_8888_0565_process_pixblock_tail_head' + * which we got implemented above. + * + * The last part is the NEON registers allocation scheme. + */ +generate_composite_function \ + pixman_composite_over_8888_0565_asm_neon, 32, 0, 16, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_over_8888_0565_process_pixblock_head, \ + pixman_composite_over_8888_0565_process_pixblock_tail, \ + pixman_composite_over_8888_0565_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 24 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_over_n_0565_process_pixblock_head + /* convert 8 r5g6b5 pixel data from {d4, d5} to planar 8-bit format + and put data into d6 - red, d7 - green, d30 - blue */ + vshrn.u16 d6, q2, #8 + vshrn.u16 d7, q2, #3 + vsli.u16 q2, q2, #5 + vsri.u8 d6, d6, #5 + vsri.u8 d7, d7, #6 + vshrn.u16 d30, q2, #2 + /* now do alpha blending, storing results in 8-bit planar format + into d16 - red, d19 - green, d18 - blue */ + vmull.u8 q10, d3, d6 + vmull.u8 q11, d3, d7 + vmull.u8 q12, d3, d30 + vrshr.u16 q13, q10, #8 + vrshr.u16 q3, q11, #8 + vrshr.u16 q15, q12, #8 + vraddhn.u16 d20, q10, q13 + vraddhn.u16 d23, q11, q3 + vraddhn.u16 d22, q12, q15 +.endm + +.macro pixman_composite_over_n_0565_process_pixblock_tail + /* ... continue alpha blending */ + vqadd.u8 d16, d2, d20 + vqadd.u8 q9, q0, q11 + /* convert the result to r5g6b5 and store it into {d28, d29} */ + vshll.u8 q14, d16, #8 + vshll.u8 q8, d19, #8 + vshll.u8 q9, d18, #8 + vsri.u16 q14, q8, #5 + vsri.u16 q14, q9, #11 +.endm + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_over_n_0565_process_pixblock_tail_head + pixman_composite_over_n_0565_process_pixblock_tail + vld1.16 {d4, d5}, [DST_R, :128]! + vst1.16 {d28, d29}, [DST_W, :128]! + pixman_composite_over_n_0565_process_pixblock_head + cache_preload 8, 8 +.endm + +.macro pixman_composite_over_n_0565_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vld1.32 {d3[0]}, [DUMMY] + vdup.8 d0, d3[0] + vdup.8 d1, d3[1] + vdup.8 d2, d3[2] + vdup.8 d3, d3[3] + vmvn.8 d3, d3 /* invert source alpha */ +.endm + +generate_composite_function \ + pixman_composite_over_n_0565_asm_neon, 0, 0, 16, \ + FLAG_DST_READWRITE, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_over_n_0565_init, \ + default_cleanup, \ + pixman_composite_over_n_0565_process_pixblock_head, \ + pixman_composite_over_n_0565_process_pixblock_tail, \ + pixman_composite_over_n_0565_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 24 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_src_8888_0565_process_pixblock_head + vshll.u8 q8, d1, #8 + vshll.u8 q14, d2, #8 + vshll.u8 q9, d0, #8 +.endm + +.macro pixman_composite_src_8888_0565_process_pixblock_tail + vsri.u16 q14, q8, #5 + vsri.u16 q14, q9, #11 +.endm + +.macro pixman_composite_src_8888_0565_process_pixblock_tail_head + vsri.u16 q14, q8, #5 + PF add PF_X, PF_X, #8 + PF tst PF_CTL, #0xF + fetch_src_pixblock + PF addne PF_X, PF_X, #8 + PF subne PF_CTL, PF_CTL, #1 + vsri.u16 q14, q9, #11 + PF cmp PF_X, ORIG_W + PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] + vshll.u8 q8, d1, #8 + vst1.16 {d28, d29}, [DST_W, :128]! + PF subge PF_X, PF_X, ORIG_W + PF subges PF_CTL, PF_CTL, #0x10 + vshll.u8 q14, d2, #8 + PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! + vshll.u8 q9, d0, #8 +.endm + +generate_composite_function \ + pixman_composite_src_8888_0565_asm_neon, 32, 0, 16, \ + FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_src_8888_0565_process_pixblock_head, \ + pixman_composite_src_8888_0565_process_pixblock_tail, \ + pixman_composite_src_8888_0565_process_pixblock_tail_head + +/******************************************************************************/ + +.macro pixman_composite_src_0565_8888_process_pixblock_head + vshrn.u16 d30, q0, #8 + vshrn.u16 d29, q0, #3 + vsli.u16 q0, q0, #5 + vmov.u8 d31, #255 + vsri.u8 d30, d30, #5 + vsri.u8 d29, d29, #6 + vshrn.u16 d28, q0, #2 +.endm + +.macro pixman_composite_src_0565_8888_process_pixblock_tail +.endm + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_src_0565_8888_process_pixblock_tail_head + pixman_composite_src_0565_8888_process_pixblock_tail + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! + fetch_src_pixblock + pixman_composite_src_0565_8888_process_pixblock_head + cache_preload 8, 8 +.endm + +generate_composite_function \ + pixman_composite_src_0565_8888_asm_neon, 16, 0, 32, \ + FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_src_0565_8888_process_pixblock_head, \ + pixman_composite_src_0565_8888_process_pixblock_tail, \ + pixman_composite_src_0565_8888_process_pixblock_tail_head + +/******************************************************************************/ + +.macro pixman_composite_add_8_8_process_pixblock_head + vqadd.u8 q14, q0, q2 + vqadd.u8 q15, q1, q3 +.endm + +.macro pixman_composite_add_8_8_process_pixblock_tail +.endm + +.macro pixman_composite_add_8_8_process_pixblock_tail_head + fetch_src_pixblock + PF add PF_X, PF_X, #32 + PF tst PF_CTL, #0xF + vld1.8 {d4, d5, d6, d7}, [DST_R, :128]! + PF addne PF_X, PF_X, #32 + PF subne PF_CTL, PF_CTL, #1 + vst1.8 {d28, d29, d30, d31}, [DST_W, :128]! + PF cmp PF_X, ORIG_W + PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] + PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] + PF subge PF_X, PF_X, ORIG_W + PF subges PF_CTL, PF_CTL, #0x10 + vqadd.u8 q14, q0, q2 + PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! + PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! + vqadd.u8 q15, q1, q3 +.endm + +generate_composite_function \ + pixman_composite_add_8_8_asm_neon, 8, 0, 8, \ + FLAG_DST_READWRITE, \ + 32, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_add_8_8_process_pixblock_head, \ + pixman_composite_add_8_8_process_pixblock_tail, \ + pixman_composite_add_8_8_process_pixblock_tail_head + +/******************************************************************************/ + +.macro pixman_composite_add_8888_8888_process_pixblock_tail_head + fetch_src_pixblock + PF add PF_X, PF_X, #8 + PF tst PF_CTL, #0xF + vld1.32 {d4, d5, d6, d7}, [DST_R, :128]! + PF addne PF_X, PF_X, #8 + PF subne PF_CTL, PF_CTL, #1 + vst1.32 {d28, d29, d30, d31}, [DST_W, :128]! + PF cmp PF_X, ORIG_W + PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] + PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] + PF subge PF_X, PF_X, ORIG_W + PF subges PF_CTL, PF_CTL, #0x10 + vqadd.u8 q14, q0, q2 + PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! + PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! + vqadd.u8 q15, q1, q3 +.endm + +generate_composite_function \ + pixman_composite_add_8888_8888_asm_neon, 32, 0, 32, \ + FLAG_DST_READWRITE, \ + 8, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_add_8_8_process_pixblock_head, \ + pixman_composite_add_8_8_process_pixblock_tail, \ + pixman_composite_add_8888_8888_process_pixblock_tail_head + +generate_composite_function_single_scanline \ + pixman_composite_scanline_add_asm_neon, 32, 0, 32, \ + FLAG_DST_READWRITE, \ + 8, /* number of pixels, processed in a single block */ \ + default_init, \ + default_cleanup, \ + pixman_composite_add_8_8_process_pixblock_head, \ + pixman_composite_add_8_8_process_pixblock_tail, \ + pixman_composite_add_8888_8888_process_pixblock_tail_head + +/******************************************************************************/ + +.macro pixman_composite_out_reverse_8888_8888_process_pixblock_head + vmvn.8 d24, d3 /* get inverted alpha */ + /* do alpha blending */ + vmull.u8 q8, d24, d4 + vmull.u8 q9, d24, d5 + vmull.u8 q10, d24, d6 + vmull.u8 q11, d24, d7 +.endm + +.macro pixman_composite_out_reverse_8888_8888_process_pixblock_tail + vrshr.u16 q14, q8, #8 + vrshr.u16 q15, q9, #8 + vrshr.u16 q12, q10, #8 + vrshr.u16 q13, q11, #8 + vraddhn.u16 d28, q14, q8 + vraddhn.u16 d29, q15, q9 + vraddhn.u16 d30, q12, q10 + vraddhn.u16 d31, q13, q11 +.endm + +.macro pixman_composite_out_reverse_8888_8888_process_pixblock_tail_head + vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! + vrshr.u16 q14, q8, #8 + PF add PF_X, PF_X, #8 + PF tst PF_CTL, #0xF + vrshr.u16 q15, q9, #8 + vrshr.u16 q12, q10, #8 + vrshr.u16 q13, q11, #8 + PF addne PF_X, PF_X, #8 + PF subne PF_CTL, PF_CTL, #1 + vraddhn.u16 d28, q14, q8 + vraddhn.u16 d29, q15, q9 + PF cmp PF_X, ORIG_W + vraddhn.u16 d30, q12, q10 + vraddhn.u16 d31, q13, q11 + fetch_src_pixblock + PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] + vmvn.8 d22, d3 + PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! + PF subge PF_X, PF_X, ORIG_W + vmull.u8 q8, d22, d4 + PF subges PF_CTL, PF_CTL, #0x10 + vmull.u8 q9, d22, d5 + PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! + vmull.u8 q10, d22, d6 + PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! + vmull.u8 q11, d22, d7 +.endm + +generate_composite_function_single_scanline \ + pixman_composite_scanline_out_reverse_asm_neon, 32, 0, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + default_init, \ + default_cleanup, \ + pixman_composite_out_reverse_8888_8888_process_pixblock_head, \ + pixman_composite_out_reverse_8888_8888_process_pixblock_tail, \ + pixman_composite_out_reverse_8888_8888_process_pixblock_tail_head + +/******************************************************************************/ + +.macro pixman_composite_over_8888_8888_process_pixblock_head + pixman_composite_out_reverse_8888_8888_process_pixblock_head +.endm + +.macro pixman_composite_over_8888_8888_process_pixblock_tail + pixman_composite_out_reverse_8888_8888_process_pixblock_tail + vqadd.u8 q14, q0, q14 + vqadd.u8 q15, q1, q15 +.endm + +.macro pixman_composite_over_8888_8888_process_pixblock_tail_head + vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! + vrshr.u16 q14, q8, #8 + PF add PF_X, PF_X, #8 + PF tst PF_CTL, #0xF + vrshr.u16 q15, q9, #8 + vrshr.u16 q12, q10, #8 + vrshr.u16 q13, q11, #8 + PF addne PF_X, PF_X, #8 + PF subne PF_CTL, PF_CTL, #1 + vraddhn.u16 d28, q14, q8 + vraddhn.u16 d29, q15, q9 + PF cmp PF_X, ORIG_W + vraddhn.u16 d30, q12, q10 + vraddhn.u16 d31, q13, q11 + vqadd.u8 q14, q0, q14 + vqadd.u8 q15, q1, q15 + fetch_src_pixblock + PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] + vmvn.8 d22, d3 + PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! + PF subge PF_X, PF_X, ORIG_W + vmull.u8 q8, d22, d4 + PF subges PF_CTL, PF_CTL, #0x10 + vmull.u8 q9, d22, d5 + PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! + vmull.u8 q10, d22, d6 + PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! + vmull.u8 q11, d22, d7 +.endm + +generate_composite_function \ + pixman_composite_over_8888_8888_asm_neon, 32, 0, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_over_8888_8888_process_pixblock_head, \ + pixman_composite_over_8888_8888_process_pixblock_tail, \ + pixman_composite_over_8888_8888_process_pixblock_tail_head + +generate_composite_function_single_scanline \ + pixman_composite_scanline_over_asm_neon, 32, 0, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + default_init, \ + default_cleanup, \ + pixman_composite_over_8888_8888_process_pixblock_head, \ + pixman_composite_over_8888_8888_process_pixblock_tail, \ + pixman_composite_over_8888_8888_process_pixblock_tail_head + +/******************************************************************************/ + +.macro pixman_composite_over_n_8888_process_pixblock_head + /* deinterleaved source pixels in {d0, d1, d2, d3} */ + /* inverted alpha in {d24} */ + /* destination pixels in {d4, d5, d6, d7} */ + vmull.u8 q8, d24, d4 + vmull.u8 q9, d24, d5 + vmull.u8 q10, d24, d6 + vmull.u8 q11, d24, d7 +.endm + +.macro pixman_composite_over_n_8888_process_pixblock_tail + vrshr.u16 q14, q8, #8 + vrshr.u16 q15, q9, #8 + vrshr.u16 q2, q10, #8 + vrshr.u16 q3, q11, #8 + vraddhn.u16 d28, q14, q8 + vraddhn.u16 d29, q15, q9 + vraddhn.u16 d30, q2, q10 + vraddhn.u16 d31, q3, q11 + vqadd.u8 q14, q0, q14 + vqadd.u8 q15, q1, q15 +.endm + +.macro pixman_composite_over_n_8888_process_pixblock_tail_head + vrshr.u16 q14, q8, #8 + vrshr.u16 q15, q9, #8 + vrshr.u16 q2, q10, #8 + vrshr.u16 q3, q11, #8 + vraddhn.u16 d28, q14, q8 + vraddhn.u16 d29, q15, q9 + vraddhn.u16 d30, q2, q10 + vraddhn.u16 d31, q3, q11 + vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! + vqadd.u8 q14, q0, q14 + PF add PF_X, PF_X, #8 + PF tst PF_CTL, #0x0F + PF addne PF_X, PF_X, #8 + PF subne PF_CTL, PF_CTL, #1 + vqadd.u8 q15, q1, q15 + PF cmp PF_X, ORIG_W + vmull.u8 q8, d24, d4 + PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] + vmull.u8 q9, d24, d5 + PF subge PF_X, PF_X, ORIG_W + vmull.u8 q10, d24, d6 + PF subges PF_CTL, PF_CTL, #0x10 + vmull.u8 q11, d24, d7 + PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! +.endm + +.macro pixman_composite_over_n_8888_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vld1.32 {d3[0]}, [DUMMY] + vdup.8 d0, d3[0] + vdup.8 d1, d3[1] + vdup.8 d2, d3[2] + vdup.8 d3, d3[3] + vmvn.8 d24, d3 /* get inverted alpha */ +.endm + +generate_composite_function \ + pixman_composite_over_n_8888_asm_neon, 0, 0, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_over_n_8888_init, \ + default_cleanup, \ + pixman_composite_over_8888_8888_process_pixblock_head, \ + pixman_composite_over_8888_8888_process_pixblock_tail, \ + pixman_composite_over_n_8888_process_pixblock_tail_head + +/******************************************************************************/ + +.macro pixman_composite_over_reverse_n_8888_process_pixblock_tail_head + vrshr.u16 q14, q8, #8 + PF add PF_X, PF_X, #8 + PF tst PF_CTL, #0xF + vrshr.u16 q15, q9, #8 + vrshr.u16 q12, q10, #8 + vrshr.u16 q13, q11, #8 + PF addne PF_X, PF_X, #8 + PF subne PF_CTL, PF_CTL, #1 + vraddhn.u16 d28, q14, q8 + vraddhn.u16 d29, q15, q9 + PF cmp PF_X, ORIG_W + vraddhn.u16 d30, q12, q10 + vraddhn.u16 d31, q13, q11 + vqadd.u8 q14, q0, q14 + vqadd.u8 q15, q1, q15 + vld4.8 {d0, d1, d2, d3}, [DST_R, :128]! + vmvn.8 d22, d3 + PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! + PF subge PF_X, PF_X, ORIG_W + vmull.u8 q8, d22, d4 + PF subges PF_CTL, PF_CTL, #0x10 + vmull.u8 q9, d22, d5 + vmull.u8 q10, d22, d6 + PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! + vmull.u8 q11, d22, d7 +.endm + +.macro pixman_composite_over_reverse_n_8888_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vld1.32 {d7[0]}, [DUMMY] + vdup.8 d4, d7[0] + vdup.8 d5, d7[1] + vdup.8 d6, d7[2] + vdup.8 d7, d7[3] +.endm + +generate_composite_function \ + pixman_composite_over_reverse_n_8888_asm_neon, 0, 0, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_over_reverse_n_8888_init, \ + default_cleanup, \ + pixman_composite_over_8888_8888_process_pixblock_head, \ + pixman_composite_over_8888_8888_process_pixblock_tail, \ + pixman_composite_over_reverse_n_8888_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 0, /* dst_r_basereg */ \ + 4, /* src_basereg */ \ + 24 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_over_8888_8_0565_process_pixblock_head + vmull.u8 q0, d24, d8 /* IN for SRC pixels (part1) */ + vmull.u8 q1, d24, d9 + vmull.u8 q6, d24, d10 + vmull.u8 q7, d24, d11 + vshrn.u16 d6, q2, #8 /* convert DST_R data to 32-bpp (part1) */ + vshrn.u16 d7, q2, #3 + vsli.u16 q2, q2, #5 + vrshr.u16 q8, q0, #8 /* IN for SRC pixels (part2) */ + vrshr.u16 q9, q1, #8 + vrshr.u16 q10, q6, #8 + vrshr.u16 q11, q7, #8 + vraddhn.u16 d0, q0, q8 + vraddhn.u16 d1, q1, q9 + vraddhn.u16 d2, q6, q10 + vraddhn.u16 d3, q7, q11 + vsri.u8 d6, d6, #5 /* convert DST_R data to 32-bpp (part2) */ + vsri.u8 d7, d7, #6 + vmvn.8 d3, d3 + vshrn.u16 d30, q2, #2 + vmull.u8 q8, d3, d6 /* now do alpha blending */ + vmull.u8 q9, d3, d7 + vmull.u8 q10, d3, d30 +.endm + +.macro pixman_composite_over_8888_8_0565_process_pixblock_tail + /* 3 cycle bubble (after vmull.u8) */ + vrshr.u16 q13, q8, #8 + vrshr.u16 q11, q9, #8 + vrshr.u16 q15, q10, #8 + vraddhn.u16 d16, q8, q13 + vraddhn.u16 d27, q9, q11 + vraddhn.u16 d26, q10, q15 + vqadd.u8 d16, d2, d16 + /* 1 cycle bubble */ + vqadd.u8 q9, q0, q13 + vshll.u8 q14, d16, #8 /* convert to 16bpp */ + vshll.u8 q8, d19, #8 + vshll.u8 q9, d18, #8 + vsri.u16 q14, q8, #5 + /* 1 cycle bubble */ + vsri.u16 q14, q9, #11 +.endm + +.macro pixman_composite_over_8888_8_0565_process_pixblock_tail_head + vld1.16 {d4, d5}, [DST_R, :128]! + vshrn.u16 d6, q2, #8 + fetch_mask_pixblock + vshrn.u16 d7, q2, #3 + fetch_src_pixblock + vmull.u8 q6, d24, d10 + vrshr.u16 q13, q8, #8 + vrshr.u16 q11, q9, #8 + vrshr.u16 q15, q10, #8 + vraddhn.u16 d16, q8, q13 + vraddhn.u16 d27, q9, q11 + vraddhn.u16 d26, q10, q15 + vqadd.u8 d16, d2, d16 + vmull.u8 q1, d24, d9 + vqadd.u8 q9, q0, q13 + vshll.u8 q14, d16, #8 + vmull.u8 q0, d24, d8 + vshll.u8 q8, d19, #8 + vshll.u8 q9, d18, #8 + vsri.u16 q14, q8, #5 + vmull.u8 q7, d24, d11 + vsri.u16 q14, q9, #11 + + cache_preload 8, 8 + + vsli.u16 q2, q2, #5 + vrshr.u16 q8, q0, #8 + vrshr.u16 q9, q1, #8 + vrshr.u16 q10, q6, #8 + vrshr.u16 q11, q7, #8 + vraddhn.u16 d0, q0, q8 + vraddhn.u16 d1, q1, q9 + vraddhn.u16 d2, q6, q10 + vraddhn.u16 d3, q7, q11 + vsri.u8 d6, d6, #5 + vsri.u8 d7, d7, #6 + vmvn.8 d3, d3 + vshrn.u16 d30, q2, #2 + vst1.16 {d28, d29}, [DST_W, :128]! + vmull.u8 q8, d3, d6 + vmull.u8 q9, d3, d7 + vmull.u8 q10, d3, d30 +.endm + +generate_composite_function \ + pixman_composite_over_8888_8_0565_asm_neon, 32, 8, 16, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + default_init_need_all_regs, \ + default_cleanup_need_all_regs, \ + pixman_composite_over_8888_8_0565_process_pixblock_head, \ + pixman_composite_over_8888_8_0565_process_pixblock_tail, \ + pixman_composite_over_8888_8_0565_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 8, /* src_basereg */ \ + 24 /* mask_basereg */ + +/******************************************************************************/ + +/* + * This function needs a special initialization of solid mask. + * Solid source pixel data is fetched from stack at ARGS_STACK_OFFSET + * offset, split into color components and replicated in d8-d11 + * registers. Additionally, this function needs all the NEON registers, + * so it has to save d8-d15 registers which are callee saved according + * to ABI. These registers are restored from 'cleanup' macro. All the + * other NEON registers are caller saved, so can be clobbered freely + * without introducing any problems. + */ +.macro pixman_composite_over_n_8_0565_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vpush {d8-d15} + vld1.32 {d11[0]}, [DUMMY] + vdup.8 d8, d11[0] + vdup.8 d9, d11[1] + vdup.8 d10, d11[2] + vdup.8 d11, d11[3] +.endm + +.macro pixman_composite_over_n_8_0565_cleanup + vpop {d8-d15} +.endm + +generate_composite_function \ + pixman_composite_over_n_8_0565_asm_neon, 0, 8, 16, \ + FLAG_DST_READWRITE, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_over_n_8_0565_init, \ + pixman_composite_over_n_8_0565_cleanup, \ + pixman_composite_over_8888_8_0565_process_pixblock_head, \ + pixman_composite_over_8888_8_0565_process_pixblock_tail, \ + pixman_composite_over_8888_8_0565_process_pixblock_tail_head + +/******************************************************************************/ + +.macro pixman_composite_over_8888_n_0565_init + add DUMMY, sp, #(ARGS_STACK_OFFSET + 8) + vpush {d8-d15} + vld1.32 {d24[0]}, [DUMMY] + vdup.8 d24, d24[3] +.endm + +.macro pixman_composite_over_8888_n_0565_cleanup + vpop {d8-d15} +.endm + +generate_composite_function \ + pixman_composite_over_8888_n_0565_asm_neon, 32, 0, 16, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_over_8888_n_0565_init, \ + pixman_composite_over_8888_n_0565_cleanup, \ + pixman_composite_over_8888_8_0565_process_pixblock_head, \ + pixman_composite_over_8888_8_0565_process_pixblock_tail, \ + pixman_composite_over_8888_8_0565_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 8, /* src_basereg */ \ + 24 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_src_0565_0565_process_pixblock_head +.endm + +.macro pixman_composite_src_0565_0565_process_pixblock_tail +.endm + +.macro pixman_composite_src_0565_0565_process_pixblock_tail_head + vst1.16 {d0, d1, d2, d3}, [DST_W, :128]! + fetch_src_pixblock + cache_preload 16, 16 +.endm + +generate_composite_function \ + pixman_composite_src_0565_0565_asm_neon, 16, 0, 16, \ + FLAG_DST_WRITEONLY, \ + 16, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_src_0565_0565_process_pixblock_head, \ + pixman_composite_src_0565_0565_process_pixblock_tail, \ + pixman_composite_src_0565_0565_process_pixblock_tail_head, \ + 0, /* dst_w_basereg */ \ + 0, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 0 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_src_n_8_process_pixblock_head +.endm + +.macro pixman_composite_src_n_8_process_pixblock_tail +.endm + +.macro pixman_composite_src_n_8_process_pixblock_tail_head + vst1.8 {d0, d1, d2, d3}, [DST_W, :128]! +.endm + +.macro pixman_composite_src_n_8_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vld1.32 {d0[0]}, [DUMMY] + vsli.u64 d0, d0, #8 + vsli.u64 d0, d0, #16 + vsli.u64 d0, d0, #32 + vorr d1, d0, d0 + vorr q1, q0, q0 +.endm + +.macro pixman_composite_src_n_8_cleanup +.endm + +generate_composite_function \ + pixman_composite_src_n_8_asm_neon, 0, 0, 8, \ + FLAG_DST_WRITEONLY, \ + 32, /* number of pixels, processed in a single block */ \ + 0, /* prefetch distance */ \ + pixman_composite_src_n_8_init, \ + pixman_composite_src_n_8_cleanup, \ + pixman_composite_src_n_8_process_pixblock_head, \ + pixman_composite_src_n_8_process_pixblock_tail, \ + pixman_composite_src_n_8_process_pixblock_tail_head, \ + 0, /* dst_w_basereg */ \ + 0, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 0 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_src_n_0565_process_pixblock_head +.endm + +.macro pixman_composite_src_n_0565_process_pixblock_tail +.endm + +.macro pixman_composite_src_n_0565_process_pixblock_tail_head + vst1.16 {d0, d1, d2, d3}, [DST_W, :128]! +.endm + +.macro pixman_composite_src_n_0565_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vld1.32 {d0[0]}, [DUMMY] + vsli.u64 d0, d0, #16 + vsli.u64 d0, d0, #32 + vorr d1, d0, d0 + vorr q1, q0, q0 +.endm + +.macro pixman_composite_src_n_0565_cleanup +.endm + +generate_composite_function \ + pixman_composite_src_n_0565_asm_neon, 0, 0, 16, \ + FLAG_DST_WRITEONLY, \ + 16, /* number of pixels, processed in a single block */ \ + 0, /* prefetch distance */ \ + pixman_composite_src_n_0565_init, \ + pixman_composite_src_n_0565_cleanup, \ + pixman_composite_src_n_0565_process_pixblock_head, \ + pixman_composite_src_n_0565_process_pixblock_tail, \ + pixman_composite_src_n_0565_process_pixblock_tail_head, \ + 0, /* dst_w_basereg */ \ + 0, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 0 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_src_n_8888_process_pixblock_head +.endm + +.macro pixman_composite_src_n_8888_process_pixblock_tail +.endm + +.macro pixman_composite_src_n_8888_process_pixblock_tail_head + vst1.32 {d0, d1, d2, d3}, [DST_W, :128]! +.endm + +.macro pixman_composite_src_n_8888_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vld1.32 {d0[0]}, [DUMMY] + vsli.u64 d0, d0, #32 + vorr d1, d0, d0 + vorr q1, q0, q0 +.endm + +.macro pixman_composite_src_n_8888_cleanup +.endm + +generate_composite_function \ + pixman_composite_src_n_8888_asm_neon, 0, 0, 32, \ + FLAG_DST_WRITEONLY, \ + 8, /* number of pixels, processed in a single block */ \ + 0, /* prefetch distance */ \ + pixman_composite_src_n_8888_init, \ + pixman_composite_src_n_8888_cleanup, \ + pixman_composite_src_n_8888_process_pixblock_head, \ + pixman_composite_src_n_8888_process_pixblock_tail, \ + pixman_composite_src_n_8888_process_pixblock_tail_head, \ + 0, /* dst_w_basereg */ \ + 0, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 0 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_src_8888_8888_process_pixblock_head +.endm + +.macro pixman_composite_src_8888_8888_process_pixblock_tail +.endm + +.macro pixman_composite_src_8888_8888_process_pixblock_tail_head + vst1.32 {d0, d1, d2, d3}, [DST_W, :128]! + fetch_src_pixblock + cache_preload 8, 8 +.endm + +generate_composite_function \ + pixman_composite_src_8888_8888_asm_neon, 32, 0, 32, \ + FLAG_DST_WRITEONLY, \ + 8, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_src_8888_8888_process_pixblock_head, \ + pixman_composite_src_8888_8888_process_pixblock_tail, \ + pixman_composite_src_8888_8888_process_pixblock_tail_head, \ + 0, /* dst_w_basereg */ \ + 0, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 0 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_src_x888_8888_process_pixblock_head + vorr q0, q0, q2 + vorr q1, q1, q2 +.endm + +.macro pixman_composite_src_x888_8888_process_pixblock_tail +.endm + +.macro pixman_composite_src_x888_8888_process_pixblock_tail_head + vst1.32 {d0, d1, d2, d3}, [DST_W, :128]! + fetch_src_pixblock + vorr q0, q0, q2 + vorr q1, q1, q2 + cache_preload 8, 8 +.endm + +.macro pixman_composite_src_x888_8888_init + vmov.u8 q2, #0xFF + vshl.u32 q2, q2, #24 +.endm + +generate_composite_function \ + pixman_composite_src_x888_8888_asm_neon, 32, 0, 32, \ + FLAG_DST_WRITEONLY, \ + 8, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + pixman_composite_src_x888_8888_init, \ + default_cleanup, \ + pixman_composite_src_x888_8888_process_pixblock_head, \ + pixman_composite_src_x888_8888_process_pixblock_tail, \ + pixman_composite_src_x888_8888_process_pixblock_tail_head, \ + 0, /* dst_w_basereg */ \ + 0, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 0 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_src_n_8_8888_process_pixblock_head + /* expecting solid source in {d0, d1, d2, d3} */ + /* mask is in d24 (d25, d26, d27 are unused) */ + + /* in */ + vmull.u8 q8, d24, d0 + vmull.u8 q9, d24, d1 + vmull.u8 q10, d24, d2 + vmull.u8 q11, d24, d3 + vrsra.u16 q8, q8, #8 + vrsra.u16 q9, q9, #8 + vrsra.u16 q10, q10, #8 + vrsra.u16 q11, q11, #8 +.endm + +.macro pixman_composite_src_n_8_8888_process_pixblock_tail + vrshrn.u16 d28, q8, #8 + vrshrn.u16 d29, q9, #8 + vrshrn.u16 d30, q10, #8 + vrshrn.u16 d31, q11, #8 +.endm + +.macro pixman_composite_src_n_8_8888_process_pixblock_tail_head + fetch_mask_pixblock + PF add PF_X, PF_X, #8 + vrshrn.u16 d28, q8, #8 + PF tst PF_CTL, #0x0F + vrshrn.u16 d29, q9, #8 + PF addne PF_X, PF_X, #8 + vrshrn.u16 d30, q10, #8 + PF subne PF_CTL, PF_CTL, #1 + vrshrn.u16 d31, q11, #8 + PF cmp PF_X, ORIG_W + vmull.u8 q8, d24, d0 + PF pld, [PF_MASK, PF_X, lsl #mask_bpp_shift] + vmull.u8 q9, d24, d1 + PF subge PF_X, PF_X, ORIG_W + vmull.u8 q10, d24, d2 + PF subges PF_CTL, PF_CTL, #0x10 + vmull.u8 q11, d24, d3 + PF ldrgeb DUMMY, [PF_MASK, MASK_STRIDE, lsl #mask_bpp_shift]! + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! + vrsra.u16 q8, q8, #8 + vrsra.u16 q9, q9, #8 + vrsra.u16 q10, q10, #8 + vrsra.u16 q11, q11, #8 +.endm + +.macro pixman_composite_src_n_8_8888_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vld1.32 {d3[0]}, [DUMMY] + vdup.8 d0, d3[0] + vdup.8 d1, d3[1] + vdup.8 d2, d3[2] + vdup.8 d3, d3[3] +.endm + +.macro pixman_composite_src_n_8_8888_cleanup +.endm + +generate_composite_function \ + pixman_composite_src_n_8_8888_asm_neon, 0, 8, 32, \ + FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_src_n_8_8888_init, \ + pixman_composite_src_n_8_8888_cleanup, \ + pixman_composite_src_n_8_8888_process_pixblock_head, \ + pixman_composite_src_n_8_8888_process_pixblock_tail, \ + pixman_composite_src_n_8_8888_process_pixblock_tail_head, \ + +/******************************************************************************/ + +.macro pixman_composite_src_n_8_8_process_pixblock_head + vmull.u8 q0, d24, d16 + vmull.u8 q1, d25, d16 + vmull.u8 q2, d26, d16 + vmull.u8 q3, d27, d16 + vrsra.u16 q0, q0, #8 + vrsra.u16 q1, q1, #8 + vrsra.u16 q2, q2, #8 + vrsra.u16 q3, q3, #8 +.endm + +.macro pixman_composite_src_n_8_8_process_pixblock_tail + vrshrn.u16 d28, q0, #8 + vrshrn.u16 d29, q1, #8 + vrshrn.u16 d30, q2, #8 + vrshrn.u16 d31, q3, #8 +.endm + +.macro pixman_composite_src_n_8_8_process_pixblock_tail_head + fetch_mask_pixblock + PF add PF_X, PF_X, #8 + vrshrn.u16 d28, q0, #8 + PF tst PF_CTL, #0x0F + vrshrn.u16 d29, q1, #8 + PF addne PF_X, PF_X, #8 + vrshrn.u16 d30, q2, #8 + PF subne PF_CTL, PF_CTL, #1 + vrshrn.u16 d31, q3, #8 + PF cmp PF_X, ORIG_W + vmull.u8 q0, d24, d16 + PF pld, [PF_MASK, PF_X, lsl #mask_bpp_shift] + vmull.u8 q1, d25, d16 + PF subge PF_X, PF_X, ORIG_W + vmull.u8 q2, d26, d16 + PF subges PF_CTL, PF_CTL, #0x10 + vmull.u8 q3, d27, d16 + PF ldrgeb DUMMY, [PF_MASK, MASK_STRIDE, lsl #mask_bpp_shift]! + vst1.8 {d28, d29, d30, d31}, [DST_W, :128]! + vrsra.u16 q0, q0, #8 + vrsra.u16 q1, q1, #8 + vrsra.u16 q2, q2, #8 + vrsra.u16 q3, q3, #8 +.endm + +.macro pixman_composite_src_n_8_8_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vld1.32 {d16[0]}, [DUMMY] + vdup.8 d16, d16[3] +.endm + +.macro pixman_composite_src_n_8_8_cleanup +.endm + +generate_composite_function \ + pixman_composite_src_n_8_8_asm_neon, 0, 8, 8, \ + FLAG_DST_WRITEONLY, \ + 32, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_src_n_8_8_init, \ + pixman_composite_src_n_8_8_cleanup, \ + pixman_composite_src_n_8_8_process_pixblock_head, \ + pixman_composite_src_n_8_8_process_pixblock_tail, \ + pixman_composite_src_n_8_8_process_pixblock_tail_head + +/******************************************************************************/ + +.macro pixman_composite_over_n_8_8888_process_pixblock_head + /* expecting deinterleaved source data in {d8, d9, d10, d11} */ + /* d8 - blue, d9 - green, d10 - red, d11 - alpha */ + /* and destination data in {d4, d5, d6, d7} */ + /* mask is in d24 (d25, d26, d27 are unused) */ + + /* in */ + vmull.u8 q6, d24, d8 + vmull.u8 q7, d24, d9 + vmull.u8 q8, d24, d10 + vmull.u8 q9, d24, d11 + vrshr.u16 q10, q6, #8 + vrshr.u16 q11, q7, #8 + vrshr.u16 q12, q8, #8 + vrshr.u16 q13, q9, #8 + vraddhn.u16 d0, q6, q10 + vraddhn.u16 d1, q7, q11 + vraddhn.u16 d2, q8, q12 + vraddhn.u16 d3, q9, q13 + vmvn.8 d25, d3 /* get inverted alpha */ + /* source: d0 - blue, d1 - green, d2 - red, d3 - alpha */ + /* destination: d4 - blue, d5 - green, d6 - red, d7 - alpha */ + /* now do alpha blending */ + vmull.u8 q8, d25, d4 + vmull.u8 q9, d25, d5 + vmull.u8 q10, d25, d6 + vmull.u8 q11, d25, d7 +.endm + +.macro pixman_composite_over_n_8_8888_process_pixblock_tail + vrshr.u16 q14, q8, #8 + vrshr.u16 q15, q9, #8 + vrshr.u16 q6, q10, #8 + vrshr.u16 q7, q11, #8 + vraddhn.u16 d28, q14, q8 + vraddhn.u16 d29, q15, q9 + vraddhn.u16 d30, q6, q10 + vraddhn.u16 d31, q7, q11 + vqadd.u8 q14, q0, q14 + vqadd.u8 q15, q1, q15 +.endm + +.macro pixman_composite_over_n_8_8888_process_pixblock_tail_head + vrshr.u16 q14, q8, #8 + vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! + vrshr.u16 q15, q9, #8 + fetch_mask_pixblock + vrshr.u16 q6, q10, #8 + PF add PF_X, PF_X, #8 + vrshr.u16 q7, q11, #8 + PF tst PF_CTL, #0x0F + vraddhn.u16 d28, q14, q8 + PF addne PF_X, PF_X, #8 + vraddhn.u16 d29, q15, q9 + PF subne PF_CTL, PF_CTL, #1 + vraddhn.u16 d30, q6, q10 + PF cmp PF_X, ORIG_W + vraddhn.u16 d31, q7, q11 + PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] + vmull.u8 q6, d24, d8 + PF pld, [PF_MASK, PF_X, lsl #mask_bpp_shift] + vmull.u8 q7, d24, d9 + PF subge PF_X, PF_X, ORIG_W + vmull.u8 q8, d24, d10 + PF subges PF_CTL, PF_CTL, #0x10 + vmull.u8 q9, d24, d11 + PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! + vqadd.u8 q14, q0, q14 + PF ldrgeb DUMMY, [PF_MASK, MASK_STRIDE, lsl #mask_bpp_shift]! + vqadd.u8 q15, q1, q15 + vrshr.u16 q10, q6, #8 + vrshr.u16 q11, q7, #8 + vrshr.u16 q12, q8, #8 + vrshr.u16 q13, q9, #8 + vraddhn.u16 d0, q6, q10 + vraddhn.u16 d1, q7, q11 + vraddhn.u16 d2, q8, q12 + vraddhn.u16 d3, q9, q13 + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! + vmvn.8 d25, d3 + vmull.u8 q8, d25, d4 + vmull.u8 q9, d25, d5 + vmull.u8 q10, d25, d6 + vmull.u8 q11, d25, d7 +.endm + +.macro pixman_composite_over_n_8_8888_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vpush {d8-d15} + vld1.32 {d11[0]}, [DUMMY] + vdup.8 d8, d11[0] + vdup.8 d9, d11[1] + vdup.8 d10, d11[2] + vdup.8 d11, d11[3] +.endm + +.macro pixman_composite_over_n_8_8888_cleanup + vpop {d8-d15} +.endm + +generate_composite_function \ + pixman_composite_over_n_8_8888_asm_neon, 0, 8, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_over_n_8_8888_init, \ + pixman_composite_over_n_8_8888_cleanup, \ + pixman_composite_over_n_8_8888_process_pixblock_head, \ + pixman_composite_over_n_8_8888_process_pixblock_tail, \ + pixman_composite_over_n_8_8888_process_pixblock_tail_head + +/******************************************************************************/ + +.macro pixman_composite_over_n_8_8_process_pixblock_head + vmull.u8 q0, d24, d8 + vmull.u8 q1, d25, d8 + vmull.u8 q6, d26, d8 + vmull.u8 q7, d27, d8 + vrshr.u16 q10, q0, #8 + vrshr.u16 q11, q1, #8 + vrshr.u16 q12, q6, #8 + vrshr.u16 q13, q7, #8 + vraddhn.u16 d0, q0, q10 + vraddhn.u16 d1, q1, q11 + vraddhn.u16 d2, q6, q12 + vraddhn.u16 d3, q7, q13 + vmvn.8 q12, q0 + vmvn.8 q13, q1 + vmull.u8 q8, d24, d4 + vmull.u8 q9, d25, d5 + vmull.u8 q10, d26, d6 + vmull.u8 q11, d27, d7 +.endm + +.macro pixman_composite_over_n_8_8_process_pixblock_tail + vrshr.u16 q14, q8, #8 + vrshr.u16 q15, q9, #8 + vrshr.u16 q12, q10, #8 + vrshr.u16 q13, q11, #8 + vraddhn.u16 d28, q14, q8 + vraddhn.u16 d29, q15, q9 + vraddhn.u16 d30, q12, q10 + vraddhn.u16 d31, q13, q11 + vqadd.u8 q14, q0, q14 + vqadd.u8 q15, q1, q15 +.endm + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_over_n_8_8_process_pixblock_tail_head + vld1.8 {d4, d5, d6, d7}, [DST_R, :128]! + pixman_composite_over_n_8_8_process_pixblock_tail + fetch_mask_pixblock + cache_preload 32, 32 + vst1.8 {d28, d29, d30, d31}, [DST_W, :128]! + pixman_composite_over_n_8_8_process_pixblock_head +.endm + +.macro pixman_composite_over_n_8_8_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vpush {d8-d15} + vld1.32 {d8[0]}, [DUMMY] + vdup.8 d8, d8[3] +.endm + +.macro pixman_composite_over_n_8_8_cleanup + vpop {d8-d15} +.endm + +generate_composite_function \ + pixman_composite_over_n_8_8_asm_neon, 0, 8, 8, \ + FLAG_DST_READWRITE, \ + 32, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_over_n_8_8_init, \ + pixman_composite_over_n_8_8_cleanup, \ + pixman_composite_over_n_8_8_process_pixblock_head, \ + pixman_composite_over_n_8_8_process_pixblock_tail, \ + pixman_composite_over_n_8_8_process_pixblock_tail_head + +/******************************************************************************/ + +.macro pixman_composite_over_n_8888_8888_ca_process_pixblock_head + /* + * 'combine_mask_ca' replacement + * + * input: solid src (n) in {d8, d9, d10, d11} + * dest in {d4, d5, d6, d7 } + * mask in {d24, d25, d26, d27} + * output: updated src in {d0, d1, d2, d3 } + * updated mask in {d24, d25, d26, d3 } + */ + vmull.u8 q0, d24, d8 + vmull.u8 q1, d25, d9 + vmull.u8 q6, d26, d10 + vmull.u8 q7, d27, d11 + vmull.u8 q9, d11, d25 + vmull.u8 q12, d11, d24 + vmull.u8 q13, d11, d26 + vrshr.u16 q8, q0, #8 + vrshr.u16 q10, q1, #8 + vrshr.u16 q11, q6, #8 + vraddhn.u16 d0, q0, q8 + vraddhn.u16 d1, q1, q10 + vraddhn.u16 d2, q6, q11 + vrshr.u16 q11, q12, #8 + vrshr.u16 q8, q9, #8 + vrshr.u16 q6, q13, #8 + vrshr.u16 q10, q7, #8 + vraddhn.u16 d24, q12, q11 + vraddhn.u16 d25, q9, q8 + vraddhn.u16 d26, q13, q6 + vraddhn.u16 d3, q7, q10 + /* + * 'combine_over_ca' replacement + * + * output: updated dest in {d28, d29, d30, d31} + */ + vmvn.8 q12, q12 + vmvn.8 d26, d26 + vmull.u8 q8, d24, d4 + vmull.u8 q9, d25, d5 + vmvn.8 d27, d3 + vmull.u8 q10, d26, d6 + vmull.u8 q11, d27, d7 +.endm + +.macro pixman_composite_over_n_8888_8888_ca_process_pixblock_tail + /* ... continue 'combine_over_ca' replacement */ + vrshr.u16 q14, q8, #8 + vrshr.u16 q15, q9, #8 + vrshr.u16 q6, q10, #8 + vrshr.u16 q7, q11, #8 + vraddhn.u16 d28, q14, q8 + vraddhn.u16 d29, q15, q9 + vraddhn.u16 d30, q6, q10 + vraddhn.u16 d31, q7, q11 + vqadd.u8 q14, q0, q14 + vqadd.u8 q15, q1, q15 +.endm + +.macro pixman_composite_over_n_8888_8888_ca_process_pixblock_tail_head + vrshr.u16 q14, q8, #8 + vrshr.u16 q15, q9, #8 + vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! + vrshr.u16 q6, q10, #8 + vrshr.u16 q7, q11, #8 + vraddhn.u16 d28, q14, q8 + vraddhn.u16 d29, q15, q9 + vraddhn.u16 d30, q6, q10 + vraddhn.u16 d31, q7, q11 + fetch_mask_pixblock + vqadd.u8 q14, q0, q14 + vqadd.u8 q15, q1, q15 + cache_preload 8, 8 + pixman_composite_over_n_8888_8888_ca_process_pixblock_head + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! +.endm + +.macro pixman_composite_over_n_8888_8888_ca_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vpush {d8-d15} + vld1.32 {d11[0]}, [DUMMY] + vdup.8 d8, d11[0] + vdup.8 d9, d11[1] + vdup.8 d10, d11[2] + vdup.8 d11, d11[3] +.endm + +.macro pixman_composite_over_n_8888_8888_ca_cleanup + vpop {d8-d15} +.endm + +generate_composite_function \ + pixman_composite_over_n_8888_8888_ca_asm_neon, 0, 32, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_over_n_8888_8888_ca_init, \ + pixman_composite_over_n_8888_8888_ca_cleanup, \ + pixman_composite_over_n_8888_8888_ca_process_pixblock_head, \ + pixman_composite_over_n_8888_8888_ca_process_pixblock_tail, \ + pixman_composite_over_n_8888_8888_ca_process_pixblock_tail_head + +/******************************************************************************/ + +.macro pixman_composite_over_n_8888_0565_ca_process_pixblock_head + /* + * 'combine_mask_ca' replacement + * + * input: solid src (n) in {d8, d9, d10, d11} [B, G, R, A] + * mask in {d24, d25, d26} [B, G, R] + * output: updated src in {d0, d1, d2 } [B, G, R] + * updated mask in {d24, d25, d26} [B, G, R] + */ + vmull.u8 q0, d24, d8 + vmull.u8 q1, d25, d9 + vmull.u8 q6, d26, d10 + vmull.u8 q9, d11, d25 + vmull.u8 q12, d11, d24 + vmull.u8 q13, d11, d26 + vrshr.u16 q8, q0, #8 + vrshr.u16 q10, q1, #8 + vrshr.u16 q11, q6, #8 + vraddhn.u16 d0, q0, q8 + vraddhn.u16 d1, q1, q10 + vraddhn.u16 d2, q6, q11 + vrshr.u16 q11, q12, #8 + vrshr.u16 q8, q9, #8 + vrshr.u16 q6, q13, #8 + vraddhn.u16 d24, q12, q11 + vraddhn.u16 d25, q9, q8 + /* + * convert 8 r5g6b5 pixel data from {d4, d5} to planar 8-bit format + * and put data into d16 - blue, d17 - green, d18 - red + */ + vshrn.u16 d17, q2, #3 + vshrn.u16 d18, q2, #8 + vraddhn.u16 d26, q13, q6 + vsli.u16 q2, q2, #5 + vsri.u8 d18, d18, #5 + vsri.u8 d17, d17, #6 + /* + * 'combine_over_ca' replacement + * + * output: updated dest in d16 - blue, d17 - green, d18 - red + */ + vmvn.8 q12, q12 + vshrn.u16 d16, q2, #2 + vmvn.8 d26, d26 + vmull.u8 q6, d16, d24 + vmull.u8 q7, d17, d25 + vmull.u8 q11, d18, d26 +.endm + +.macro pixman_composite_over_n_8888_0565_ca_process_pixblock_tail + /* ... continue 'combine_over_ca' replacement */ + vrshr.u16 q10, q6, #8 + vrshr.u16 q14, q7, #8 + vrshr.u16 q15, q11, #8 + vraddhn.u16 d16, q10, q6 + vraddhn.u16 d17, q14, q7 + vraddhn.u16 d18, q15, q11 + vqadd.u8 q8, q0, q8 + vqadd.u8 d18, d2, d18 + /* + * convert the results in d16, d17, d18 to r5g6b5 and store + * them into {d28, d29} + */ + vshll.u8 q14, d18, #8 + vshll.u8 q10, d17, #8 + vshll.u8 q15, d16, #8 + vsri.u16 q14, q10, #5 + vsri.u16 q14, q15, #11 +.endm + +.macro pixman_composite_over_n_8888_0565_ca_process_pixblock_tail_head + fetch_mask_pixblock + vrshr.u16 q10, q6, #8 + vrshr.u16 q14, q7, #8 + vld1.16 {d4, d5}, [DST_R, :128]! + vrshr.u16 q15, q11, #8 + vraddhn.u16 d16, q10, q6 + vraddhn.u16 d17, q14, q7 + vraddhn.u16 d22, q15, q11 + /* process_pixblock_head */ + /* + * 'combine_mask_ca' replacement + * + * input: solid src (n) in {d8, d9, d10, d11} [B, G, R, A] + * mask in {d24, d25, d26} [B, G, R] + * output: updated src in {d0, d1, d2 } [B, G, R] + * updated mask in {d24, d25, d26} [B, G, R] + */ + vmull.u8 q6, d26, d10 + vqadd.u8 q8, q0, q8 + vmull.u8 q0, d24, d8 + vqadd.u8 d22, d2, d22 + vmull.u8 q1, d25, d9 + /* + * convert the result in d16, d17, d22 to r5g6b5 and store + * it into {d28, d29} + */ + vshll.u8 q14, d22, #8 + vshll.u8 q10, d17, #8 + vshll.u8 q15, d16, #8 + vmull.u8 q9, d11, d25 + vsri.u16 q14, q10, #5 + vmull.u8 q12, d11, d24 + vmull.u8 q13, d11, d26 + vsri.u16 q14, q15, #11 + cache_preload 8, 8 + vrshr.u16 q8, q0, #8 + vrshr.u16 q10, q1, #8 + vrshr.u16 q11, q6, #8 + vraddhn.u16 d0, q0, q8 + vraddhn.u16 d1, q1, q10 + vraddhn.u16 d2, q6, q11 + vrshr.u16 q11, q12, #8 + vrshr.u16 q8, q9, #8 + vrshr.u16 q6, q13, #8 + vraddhn.u16 d24, q12, q11 + vraddhn.u16 d25, q9, q8 + /* + * convert 8 r5g6b5 pixel data from {d4, d5} to planar + * 8-bit format and put data into d16 - blue, d17 - green, + * d18 - red + */ + vshrn.u16 d17, q2, #3 + vshrn.u16 d18, q2, #8 + vraddhn.u16 d26, q13, q6 + vsli.u16 q2, q2, #5 + vsri.u8 d17, d17, #6 + vsri.u8 d18, d18, #5 + /* + * 'combine_over_ca' replacement + * + * output: updated dest in d16 - blue, d17 - green, d18 - red + */ + vmvn.8 q12, q12 + vshrn.u16 d16, q2, #2 + vmvn.8 d26, d26 + vmull.u8 q7, d17, d25 + vmull.u8 q6, d16, d24 + vmull.u8 q11, d18, d26 + vst1.16 {d28, d29}, [DST_W, :128]! +.endm + +.macro pixman_composite_over_n_8888_0565_ca_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vpush {d8-d15} + vld1.32 {d11[0]}, [DUMMY] + vdup.8 d8, d11[0] + vdup.8 d9, d11[1] + vdup.8 d10, d11[2] + vdup.8 d11, d11[3] +.endm + +.macro pixman_composite_over_n_8888_0565_ca_cleanup + vpop {d8-d15} +.endm + +generate_composite_function \ + pixman_composite_over_n_8888_0565_ca_asm_neon, 0, 32, 16, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_over_n_8888_0565_ca_init, \ + pixman_composite_over_n_8888_0565_ca_cleanup, \ + pixman_composite_over_n_8888_0565_ca_process_pixblock_head, \ + pixman_composite_over_n_8888_0565_ca_process_pixblock_tail, \ + pixman_composite_over_n_8888_0565_ca_process_pixblock_tail_head + +/******************************************************************************/ + +.macro pixman_composite_in_n_8_process_pixblock_head + /* expecting source data in {d0, d1, d2, d3} */ + /* and destination data in {d4, d5, d6, d7} */ + vmull.u8 q8, d4, d3 + vmull.u8 q9, d5, d3 + vmull.u8 q10, d6, d3 + vmull.u8 q11, d7, d3 +.endm + +.macro pixman_composite_in_n_8_process_pixblock_tail + vrshr.u16 q14, q8, #8 + vrshr.u16 q15, q9, #8 + vrshr.u16 q12, q10, #8 + vrshr.u16 q13, q11, #8 + vraddhn.u16 d28, q8, q14 + vraddhn.u16 d29, q9, q15 + vraddhn.u16 d30, q10, q12 + vraddhn.u16 d31, q11, q13 +.endm + +.macro pixman_composite_in_n_8_process_pixblock_tail_head + pixman_composite_in_n_8_process_pixblock_tail + vld1.8 {d4, d5, d6, d7}, [DST_R, :128]! + cache_preload 32, 32 + pixman_composite_in_n_8_process_pixblock_head + vst1.8 {d28, d29, d30, d31}, [DST_W, :128]! +.endm + +.macro pixman_composite_in_n_8_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vld1.32 {d3[0]}, [DUMMY] + vdup.8 d3, d3[3] +.endm + +.macro pixman_composite_in_n_8_cleanup +.endm + +generate_composite_function \ + pixman_composite_in_n_8_asm_neon, 0, 0, 8, \ + FLAG_DST_READWRITE, \ + 32, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_in_n_8_init, \ + pixman_composite_in_n_8_cleanup, \ + pixman_composite_in_n_8_process_pixblock_head, \ + pixman_composite_in_n_8_process_pixblock_tail, \ + pixman_composite_in_n_8_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 24 /* mask_basereg */ + +.macro pixman_composite_add_n_8_8_process_pixblock_head + /* expecting source data in {d8, d9, d10, d11} */ + /* d8 - blue, d9 - green, d10 - red, d11 - alpha */ + /* and destination data in {d4, d5, d6, d7} */ + /* mask is in d24, d25, d26, d27 */ + vmull.u8 q0, d24, d11 + vmull.u8 q1, d25, d11 + vmull.u8 q6, d26, d11 + vmull.u8 q7, d27, d11 + vrshr.u16 q10, q0, #8 + vrshr.u16 q11, q1, #8 + vrshr.u16 q12, q6, #8 + vrshr.u16 q13, q7, #8 + vraddhn.u16 d0, q0, q10 + vraddhn.u16 d1, q1, q11 + vraddhn.u16 d2, q6, q12 + vraddhn.u16 d3, q7, q13 + vqadd.u8 q14, q0, q2 + vqadd.u8 q15, q1, q3 +.endm + +.macro pixman_composite_add_n_8_8_process_pixblock_tail +.endm + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_add_n_8_8_process_pixblock_tail_head + pixman_composite_add_n_8_8_process_pixblock_tail + vst1.8 {d28, d29, d30, d31}, [DST_W, :128]! + vld1.8 {d4, d5, d6, d7}, [DST_R, :128]! + fetch_mask_pixblock + cache_preload 32, 32 + pixman_composite_add_n_8_8_process_pixblock_head +.endm + +.macro pixman_composite_add_n_8_8_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vpush {d8-d15} + vld1.32 {d11[0]}, [DUMMY] + vdup.8 d11, d11[3] +.endm + +.macro pixman_composite_add_n_8_8_cleanup + vpop {d8-d15} +.endm + +generate_composite_function \ + pixman_composite_add_n_8_8_asm_neon, 0, 8, 8, \ + FLAG_DST_READWRITE, \ + 32, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_add_n_8_8_init, \ + pixman_composite_add_n_8_8_cleanup, \ + pixman_composite_add_n_8_8_process_pixblock_head, \ + pixman_composite_add_n_8_8_process_pixblock_tail, \ + pixman_composite_add_n_8_8_process_pixblock_tail_head + +/******************************************************************************/ + +.macro pixman_composite_add_8_8_8_process_pixblock_head + /* expecting source data in {d0, d1, d2, d3} */ + /* destination data in {d4, d5, d6, d7} */ + /* mask in {d24, d25, d26, d27} */ + vmull.u8 q8, d24, d0 + vmull.u8 q9, d25, d1 + vmull.u8 q10, d26, d2 + vmull.u8 q11, d27, d3 + vrshr.u16 q0, q8, #8 + vrshr.u16 q1, q9, #8 + vrshr.u16 q12, q10, #8 + vrshr.u16 q13, q11, #8 + vraddhn.u16 d0, q0, q8 + vraddhn.u16 d1, q1, q9 + vraddhn.u16 d2, q12, q10 + vraddhn.u16 d3, q13, q11 + vqadd.u8 q14, q0, q2 + vqadd.u8 q15, q1, q3 +.endm + +.macro pixman_composite_add_8_8_8_process_pixblock_tail +.endm + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_add_8_8_8_process_pixblock_tail_head + pixman_composite_add_8_8_8_process_pixblock_tail + vst1.8 {d28, d29, d30, d31}, [DST_W, :128]! + vld1.8 {d4, d5, d6, d7}, [DST_R, :128]! + fetch_mask_pixblock + fetch_src_pixblock + cache_preload 32, 32 + pixman_composite_add_8_8_8_process_pixblock_head +.endm + +.macro pixman_composite_add_8_8_8_init +.endm + +.macro pixman_composite_add_8_8_8_cleanup +.endm + +generate_composite_function \ + pixman_composite_add_8_8_8_asm_neon, 8, 8, 8, \ + FLAG_DST_READWRITE, \ + 32, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_add_8_8_8_init, \ + pixman_composite_add_8_8_8_cleanup, \ + pixman_composite_add_8_8_8_process_pixblock_head, \ + pixman_composite_add_8_8_8_process_pixblock_tail, \ + pixman_composite_add_8_8_8_process_pixblock_tail_head + +/******************************************************************************/ + +.macro pixman_composite_add_8888_8888_8888_process_pixblock_head + /* expecting source data in {d0, d1, d2, d3} */ + /* destination data in {d4, d5, d6, d7} */ + /* mask in {d24, d25, d26, d27} */ + vmull.u8 q8, d27, d0 + vmull.u8 q9, d27, d1 + vmull.u8 q10, d27, d2 + vmull.u8 q11, d27, d3 + /* 1 cycle bubble */ + vrsra.u16 q8, q8, #8 + vrsra.u16 q9, q9, #8 + vrsra.u16 q10, q10, #8 + vrsra.u16 q11, q11, #8 +.endm + +.macro pixman_composite_add_8888_8888_8888_process_pixblock_tail + /* 2 cycle bubble */ + vrshrn.u16 d28, q8, #8 + vrshrn.u16 d29, q9, #8 + vrshrn.u16 d30, q10, #8 + vrshrn.u16 d31, q11, #8 + vqadd.u8 q14, q2, q14 + /* 1 cycle bubble */ + vqadd.u8 q15, q3, q15 +.endm + +.macro pixman_composite_add_8888_8888_8888_process_pixblock_tail_head + fetch_src_pixblock + vrshrn.u16 d28, q8, #8 + fetch_mask_pixblock + vrshrn.u16 d29, q9, #8 + vmull.u8 q8, d27, d0 + vrshrn.u16 d30, q10, #8 + vmull.u8 q9, d27, d1 + vrshrn.u16 d31, q11, #8 + vmull.u8 q10, d27, d2 + vqadd.u8 q14, q2, q14 + vmull.u8 q11, d27, d3 + vqadd.u8 q15, q3, q15 + vrsra.u16 q8, q8, #8 + vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! + vrsra.u16 q9, q9, #8 + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! + vrsra.u16 q10, q10, #8 + + cache_preload 8, 8 + + vrsra.u16 q11, q11, #8 +.endm + +generate_composite_function \ + pixman_composite_add_8888_8888_8888_asm_neon, 32, 32, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_add_8888_8888_8888_process_pixblock_head, \ + pixman_composite_add_8888_8888_8888_process_pixblock_tail, \ + pixman_composite_add_8888_8888_8888_process_pixblock_tail_head + +generate_composite_function_single_scanline \ + pixman_composite_scanline_add_mask_asm_neon, 32, 32, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + default_init, \ + default_cleanup, \ + pixman_composite_add_8888_8888_8888_process_pixblock_head, \ + pixman_composite_add_8888_8888_8888_process_pixblock_tail, \ + pixman_composite_add_8888_8888_8888_process_pixblock_tail_head + +/******************************************************************************/ + +generate_composite_function \ + pixman_composite_add_8888_8_8888_asm_neon, 32, 8, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_add_8888_8888_8888_process_pixblock_head, \ + pixman_composite_add_8888_8888_8888_process_pixblock_tail, \ + pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 27 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_add_n_8_8888_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vld1.32 {d3[0]}, [DUMMY] + vdup.8 d0, d3[0] + vdup.8 d1, d3[1] + vdup.8 d2, d3[2] + vdup.8 d3, d3[3] +.endm + +.macro pixman_composite_add_n_8_8888_cleanup +.endm + +generate_composite_function \ + pixman_composite_add_n_8_8888_asm_neon, 0, 8, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_add_n_8_8888_init, \ + pixman_composite_add_n_8_8888_cleanup, \ + pixman_composite_add_8888_8888_8888_process_pixblock_head, \ + pixman_composite_add_8888_8888_8888_process_pixblock_tail, \ + pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 27 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_add_8888_n_8888_init + add DUMMY, sp, #(ARGS_STACK_OFFSET + 8) + vld1.32 {d27[0]}, [DUMMY] + vdup.8 d27, d27[3] +.endm + +.macro pixman_composite_add_8888_n_8888_cleanup +.endm + +generate_composite_function \ + pixman_composite_add_8888_n_8888_asm_neon, 32, 0, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_add_8888_n_8888_init, \ + pixman_composite_add_8888_n_8888_cleanup, \ + pixman_composite_add_8888_8888_8888_process_pixblock_head, \ + pixman_composite_add_8888_8888_8888_process_pixblock_tail, \ + pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 27 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_out_reverse_8888_n_8888_process_pixblock_head + /* expecting source data in {d0, d1, d2, d3} */ + /* destination data in {d4, d5, d6, d7} */ + /* solid mask is in d15 */ + + /* 'in' */ + vmull.u8 q8, d15, d3 + vmull.u8 q6, d15, d2 + vmull.u8 q5, d15, d1 + vmull.u8 q4, d15, d0 + vrshr.u16 q13, q8, #8 + vrshr.u16 q12, q6, #8 + vrshr.u16 q11, q5, #8 + vrshr.u16 q10, q4, #8 + vraddhn.u16 d3, q8, q13 + vraddhn.u16 d2, q6, q12 + vraddhn.u16 d1, q5, q11 + vraddhn.u16 d0, q4, q10 + vmvn.8 d24, d3 /* get inverted alpha */ + /* now do alpha blending */ + vmull.u8 q8, d24, d4 + vmull.u8 q9, d24, d5 + vmull.u8 q10, d24, d6 + vmull.u8 q11, d24, d7 +.endm + +.macro pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail + vrshr.u16 q14, q8, #8 + vrshr.u16 q15, q9, #8 + vrshr.u16 q12, q10, #8 + vrshr.u16 q13, q11, #8 + vraddhn.u16 d28, q14, q8 + vraddhn.u16 d29, q15, q9 + vraddhn.u16 d30, q12, q10 + vraddhn.u16 d31, q13, q11 +.endm + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_out_reverse_8888_8888_8888_process_pixblock_tail_head + vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! + pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail + fetch_src_pixblock + cache_preload 8, 8 + fetch_mask_pixblock + pixman_composite_out_reverse_8888_n_8888_process_pixblock_head + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! +.endm + +generate_composite_function_single_scanline \ + pixman_composite_scanline_out_reverse_mask_asm_neon, 32, 32, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + default_init_need_all_regs, \ + default_cleanup_need_all_regs, \ + pixman_composite_out_reverse_8888_n_8888_process_pixblock_head, \ + pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail, \ + pixman_composite_out_reverse_8888_8888_8888_process_pixblock_tail_head \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 12 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_over_8888_n_8888_process_pixblock_head + pixman_composite_out_reverse_8888_n_8888_process_pixblock_head +.endm + +.macro pixman_composite_over_8888_n_8888_process_pixblock_tail + pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail + vqadd.u8 q14, q0, q14 + vqadd.u8 q15, q1, q15 +.endm + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_over_8888_n_8888_process_pixblock_tail_head + vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! + pixman_composite_over_8888_n_8888_process_pixblock_tail + fetch_src_pixblock + cache_preload 8, 8 + pixman_composite_over_8888_n_8888_process_pixblock_head + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! +.endm + +.macro pixman_composite_over_8888_n_8888_init + add DUMMY, sp, #48 + vpush {d8-d15} + vld1.32 {d15[0]}, [DUMMY] + vdup.8 d15, d15[3] +.endm + +.macro pixman_composite_over_8888_n_8888_cleanup + vpop {d8-d15} +.endm + +generate_composite_function \ + pixman_composite_over_8888_n_8888_asm_neon, 32, 0, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_over_8888_n_8888_init, \ + pixman_composite_over_8888_n_8888_cleanup, \ + pixman_composite_over_8888_n_8888_process_pixblock_head, \ + pixman_composite_over_8888_n_8888_process_pixblock_tail, \ + pixman_composite_over_8888_n_8888_process_pixblock_tail_head + +/******************************************************************************/ + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_over_8888_8888_8888_process_pixblock_tail_head + vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! + pixman_composite_over_8888_n_8888_process_pixblock_tail + fetch_src_pixblock + cache_preload 8, 8 + fetch_mask_pixblock + pixman_composite_over_8888_n_8888_process_pixblock_head + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! +.endm + +generate_composite_function \ + pixman_composite_over_8888_8888_8888_asm_neon, 32, 32, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + default_init_need_all_regs, \ + default_cleanup_need_all_regs, \ + pixman_composite_over_8888_n_8888_process_pixblock_head, \ + pixman_composite_over_8888_n_8888_process_pixblock_tail, \ + pixman_composite_over_8888_8888_8888_process_pixblock_tail_head \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 12 /* mask_basereg */ + +generate_composite_function_single_scanline \ + pixman_composite_scanline_over_mask_asm_neon, 32, 32, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + default_init_need_all_regs, \ + default_cleanup_need_all_regs, \ + pixman_composite_over_8888_n_8888_process_pixblock_head, \ + pixman_composite_over_8888_n_8888_process_pixblock_tail, \ + pixman_composite_over_8888_8888_8888_process_pixblock_tail_head \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 12 /* mask_basereg */ + +/******************************************************************************/ + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_over_8888_8_8888_process_pixblock_tail_head + vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! + pixman_composite_over_8888_n_8888_process_pixblock_tail + fetch_src_pixblock + cache_preload 8, 8 + fetch_mask_pixblock + pixman_composite_over_8888_n_8888_process_pixblock_head + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! +.endm + +generate_composite_function \ + pixman_composite_over_8888_8_8888_asm_neon, 32, 8, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + default_init_need_all_regs, \ + default_cleanup_need_all_regs, \ + pixman_composite_over_8888_n_8888_process_pixblock_head, \ + pixman_composite_over_8888_n_8888_process_pixblock_tail, \ + pixman_composite_over_8888_8_8888_process_pixblock_tail_head \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 15 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_src_0888_0888_process_pixblock_head +.endm + +.macro pixman_composite_src_0888_0888_process_pixblock_tail +.endm + +.macro pixman_composite_src_0888_0888_process_pixblock_tail_head + vst3.8 {d0, d1, d2}, [DST_W]! + fetch_src_pixblock + cache_preload 8, 8 +.endm + +generate_composite_function \ + pixman_composite_src_0888_0888_asm_neon, 24, 0, 24, \ + FLAG_DST_WRITEONLY, \ + 8, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_src_0888_0888_process_pixblock_head, \ + pixman_composite_src_0888_0888_process_pixblock_tail, \ + pixman_composite_src_0888_0888_process_pixblock_tail_head, \ + 0, /* dst_w_basereg */ \ + 0, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 0 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_src_0888_8888_rev_process_pixblock_head + vswp d0, d2 +.endm + +.macro pixman_composite_src_0888_8888_rev_process_pixblock_tail +.endm + +.macro pixman_composite_src_0888_8888_rev_process_pixblock_tail_head + vst4.8 {d0, d1, d2, d3}, [DST_W]! + fetch_src_pixblock + vswp d0, d2 + cache_preload 8, 8 +.endm + +.macro pixman_composite_src_0888_8888_rev_init + veor d3, d3, d3 +.endm + +generate_composite_function \ + pixman_composite_src_0888_8888_rev_asm_neon, 24, 0, 32, \ + FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + pixman_composite_src_0888_8888_rev_init, \ + default_cleanup, \ + pixman_composite_src_0888_8888_rev_process_pixblock_head, \ + pixman_composite_src_0888_8888_rev_process_pixblock_tail, \ + pixman_composite_src_0888_8888_rev_process_pixblock_tail_head, \ + 0, /* dst_w_basereg */ \ + 0, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 0 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_src_0888_0565_rev_process_pixblock_head + vshll.u8 q8, d1, #8 + vshll.u8 q9, d2, #8 +.endm + +.macro pixman_composite_src_0888_0565_rev_process_pixblock_tail + vshll.u8 q14, d0, #8 + vsri.u16 q14, q8, #5 + vsri.u16 q14, q9, #11 +.endm + +.macro pixman_composite_src_0888_0565_rev_process_pixblock_tail_head + vshll.u8 q14, d0, #8 + fetch_src_pixblock + vsri.u16 q14, q8, #5 + vsri.u16 q14, q9, #11 + vshll.u8 q8, d1, #8 + vst1.16 {d28, d29}, [DST_W, :128]! + vshll.u8 q9, d2, #8 +.endm + +generate_composite_function \ + pixman_composite_src_0888_0565_rev_asm_neon, 24, 0, 16, \ + FLAG_DST_WRITEONLY, \ + 8, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_src_0888_0565_rev_process_pixblock_head, \ + pixman_composite_src_0888_0565_rev_process_pixblock_tail, \ + pixman_composite_src_0888_0565_rev_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 0, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 0 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_src_pixbuf_8888_process_pixblock_head + vmull.u8 q8, d3, d0 + vmull.u8 q9, d3, d1 + vmull.u8 q10, d3, d2 +.endm + +.macro pixman_composite_src_pixbuf_8888_process_pixblock_tail + vrshr.u16 q11, q8, #8 + vswp d3, d31 + vrshr.u16 q12, q9, #8 + vrshr.u16 q13, q10, #8 + vraddhn.u16 d30, q11, q8 + vraddhn.u16 d29, q12, q9 + vraddhn.u16 d28, q13, q10 +.endm + +.macro pixman_composite_src_pixbuf_8888_process_pixblock_tail_head + vrshr.u16 q11, q8, #8 + vswp d3, d31 + vrshr.u16 q12, q9, #8 + vrshr.u16 q13, q10, #8 + fetch_src_pixblock + vraddhn.u16 d30, q11, q8 + PF add PF_X, PF_X, #8 + PF tst PF_CTL, #0xF + PF addne PF_X, PF_X, #8 + PF subne PF_CTL, PF_CTL, #1 + vraddhn.u16 d29, q12, q9 + vraddhn.u16 d28, q13, q10 + vmull.u8 q8, d3, d0 + vmull.u8 q9, d3, d1 + vmull.u8 q10, d3, d2 + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! + PF cmp PF_X, ORIG_W + PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] + PF subge PF_X, PF_X, ORIG_W + PF subges PF_CTL, PF_CTL, #0x10 + PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! +.endm + +generate_composite_function \ + pixman_composite_src_pixbuf_8888_asm_neon, 32, 0, 32, \ + FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_src_pixbuf_8888_process_pixblock_head, \ + pixman_composite_src_pixbuf_8888_process_pixblock_tail, \ + pixman_composite_src_pixbuf_8888_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 0, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 0 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_src_rpixbuf_8888_process_pixblock_head + vmull.u8 q8, d3, d0 + vmull.u8 q9, d3, d1 + vmull.u8 q10, d3, d2 +.endm + +.macro pixman_composite_src_rpixbuf_8888_process_pixblock_tail + vrshr.u16 q11, q8, #8 + vswp d3, d31 + vrshr.u16 q12, q9, #8 + vrshr.u16 q13, q10, #8 + vraddhn.u16 d28, q11, q8 + vraddhn.u16 d29, q12, q9 + vraddhn.u16 d30, q13, q10 +.endm + +.macro pixman_composite_src_rpixbuf_8888_process_pixblock_tail_head + vrshr.u16 q11, q8, #8 + vswp d3, d31 + vrshr.u16 q12, q9, #8 + vrshr.u16 q13, q10, #8 + fetch_src_pixblock + vraddhn.u16 d28, q11, q8 + PF add PF_X, PF_X, #8 + PF tst PF_CTL, #0xF + PF addne PF_X, PF_X, #8 + PF subne PF_CTL, PF_CTL, #1 + vraddhn.u16 d29, q12, q9 + vraddhn.u16 d30, q13, q10 + vmull.u8 q8, d3, d0 + vmull.u8 q9, d3, d1 + vmull.u8 q10, d3, d2 + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! + PF cmp PF_X, ORIG_W + PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] + PF subge PF_X, PF_X, ORIG_W + PF subges PF_CTL, PF_CTL, #0x10 + PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! +.endm + +generate_composite_function \ + pixman_composite_src_rpixbuf_8888_asm_neon, 32, 0, 32, \ + FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_src_rpixbuf_8888_process_pixblock_head, \ + pixman_composite_src_rpixbuf_8888_process_pixblock_tail, \ + pixman_composite_src_rpixbuf_8888_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 0, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 0 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_over_0565_8_0565_process_pixblock_head + /* mask is in d15 */ + convert_0565_to_x888 q4, d2, d1, d0 + convert_0565_to_x888 q5, d6, d5, d4 + /* source pixel data is in {d0, d1, d2, XX} */ + /* destination pixel data is in {d4, d5, d6, XX} */ + vmvn.8 d7, d15 + vmull.u8 q6, d15, d2 + vmull.u8 q5, d15, d1 + vmull.u8 q4, d15, d0 + vmull.u8 q8, d7, d4 + vmull.u8 q9, d7, d5 + vmull.u8 q13, d7, d6 + vrshr.u16 q12, q6, #8 + vrshr.u16 q11, q5, #8 + vrshr.u16 q10, q4, #8 + vraddhn.u16 d2, q6, q12 + vraddhn.u16 d1, q5, q11 + vraddhn.u16 d0, q4, q10 +.endm + +.macro pixman_composite_over_0565_8_0565_process_pixblock_tail + vrshr.u16 q14, q8, #8 + vrshr.u16 q15, q9, #8 + vrshr.u16 q12, q13, #8 + vraddhn.u16 d28, q14, q8 + vraddhn.u16 d29, q15, q9 + vraddhn.u16 d30, q12, q13 + vqadd.u8 q0, q0, q14 + vqadd.u8 q1, q1, q15 + /* 32bpp result is in {d0, d1, d2, XX} */ + convert_8888_to_0565 d2, d1, d0, q14, q15, q3 +.endm + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_over_0565_8_0565_process_pixblock_tail_head + fetch_mask_pixblock + pixman_composite_over_0565_8_0565_process_pixblock_tail + fetch_src_pixblock + vld1.16 {d10, d11}, [DST_R, :128]! + cache_preload 8, 8 + pixman_composite_over_0565_8_0565_process_pixblock_head + vst1.16 {d28, d29}, [DST_W, :128]! +.endm + +generate_composite_function \ + pixman_composite_over_0565_8_0565_asm_neon, 16, 8, 16, \ + FLAG_DST_READWRITE, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + default_init_need_all_regs, \ + default_cleanup_need_all_regs, \ + pixman_composite_over_0565_8_0565_process_pixblock_head, \ + pixman_composite_over_0565_8_0565_process_pixblock_tail, \ + pixman_composite_over_0565_8_0565_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 10, /* dst_r_basereg */ \ + 8, /* src_basereg */ \ + 15 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_over_0565_n_0565_init + add DUMMY, sp, #(ARGS_STACK_OFFSET + 8) + vpush {d8-d15} + vld1.32 {d15[0]}, [DUMMY] + vdup.8 d15, d15[3] +.endm + +.macro pixman_composite_over_0565_n_0565_cleanup + vpop {d8-d15} +.endm + +generate_composite_function \ + pixman_composite_over_0565_n_0565_asm_neon, 16, 0, 16, \ + FLAG_DST_READWRITE, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_over_0565_n_0565_init, \ + pixman_composite_over_0565_n_0565_cleanup, \ + pixman_composite_over_0565_8_0565_process_pixblock_head, \ + pixman_composite_over_0565_8_0565_process_pixblock_tail, \ + pixman_composite_over_0565_8_0565_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 10, /* dst_r_basereg */ \ + 8, /* src_basereg */ \ + 15 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_add_0565_8_0565_process_pixblock_head + /* mask is in d15 */ + convert_0565_to_x888 q4, d2, d1, d0 + convert_0565_to_x888 q5, d6, d5, d4 + /* source pixel data is in {d0, d1, d2, XX} */ + /* destination pixel data is in {d4, d5, d6, XX} */ + vmull.u8 q6, d15, d2 + vmull.u8 q5, d15, d1 + vmull.u8 q4, d15, d0 + vrshr.u16 q12, q6, #8 + vrshr.u16 q11, q5, #8 + vrshr.u16 q10, q4, #8 + vraddhn.u16 d2, q6, q12 + vraddhn.u16 d1, q5, q11 + vraddhn.u16 d0, q4, q10 +.endm + +.macro pixman_composite_add_0565_8_0565_process_pixblock_tail + vqadd.u8 q0, q0, q2 + vqadd.u8 q1, q1, q3 + /* 32bpp result is in {d0, d1, d2, XX} */ + convert_8888_to_0565 d2, d1, d0, q14, q15, q3 +.endm + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_add_0565_8_0565_process_pixblock_tail_head + fetch_mask_pixblock + pixman_composite_add_0565_8_0565_process_pixblock_tail + fetch_src_pixblock + vld1.16 {d10, d11}, [DST_R, :128]! + cache_preload 8, 8 + pixman_composite_add_0565_8_0565_process_pixblock_head + vst1.16 {d28, d29}, [DST_W, :128]! +.endm + +generate_composite_function \ + pixman_composite_add_0565_8_0565_asm_neon, 16, 8, 16, \ + FLAG_DST_READWRITE, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + default_init_need_all_regs, \ + default_cleanup_need_all_regs, \ + pixman_composite_add_0565_8_0565_process_pixblock_head, \ + pixman_composite_add_0565_8_0565_process_pixblock_tail, \ + pixman_composite_add_0565_8_0565_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 10, /* dst_r_basereg */ \ + 8, /* src_basereg */ \ + 15 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_out_reverse_8_0565_process_pixblock_head + /* mask is in d15 */ + convert_0565_to_x888 q5, d6, d5, d4 + /* destination pixel data is in {d4, d5, d6, xx} */ + vmvn.8 d24, d15 /* get inverted alpha */ + /* now do alpha blending */ + vmull.u8 q8, d24, d4 + vmull.u8 q9, d24, d5 + vmull.u8 q10, d24, d6 +.endm + +.macro pixman_composite_out_reverse_8_0565_process_pixblock_tail + vrshr.u16 q14, q8, #8 + vrshr.u16 q15, q9, #8 + vrshr.u16 q12, q10, #8 + vraddhn.u16 d0, q14, q8 + vraddhn.u16 d1, q15, q9 + vraddhn.u16 d2, q12, q10 + /* 32bpp result is in {d0, d1, d2, XX} */ + convert_8888_to_0565 d2, d1, d0, q14, q15, q3 +.endm + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_out_reverse_8_0565_process_pixblock_tail_head + fetch_src_pixblock + pixman_composite_out_reverse_8_0565_process_pixblock_tail + vld1.16 {d10, d11}, [DST_R, :128]! + cache_preload 8, 8 + pixman_composite_out_reverse_8_0565_process_pixblock_head + vst1.16 {d28, d29}, [DST_W, :128]! +.endm + +generate_composite_function \ + pixman_composite_out_reverse_8_0565_asm_neon, 8, 0, 16, \ + FLAG_DST_READWRITE, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + default_init_need_all_regs, \ + default_cleanup_need_all_regs, \ + pixman_composite_out_reverse_8_0565_process_pixblock_head, \ + pixman_composite_out_reverse_8_0565_process_pixblock_tail, \ + pixman_composite_out_reverse_8_0565_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 10, /* dst_r_basereg */ \ + 15, /* src_basereg */ \ + 0 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_out_reverse_8_8888_process_pixblock_head + /* src is in d0 */ + /* destination pixel data is in {d4, d5, d6, d7} */ + vmvn.8 d1, d0 /* get inverted alpha */ + /* now do alpha blending */ + vmull.u8 q8, d1, d4 + vmull.u8 q9, d1, d5 + vmull.u8 q10, d1, d6 + vmull.u8 q11, d1, d7 +.endm + +.macro pixman_composite_out_reverse_8_8888_process_pixblock_tail + vrshr.u16 q14, q8, #8 + vrshr.u16 q15, q9, #8 + vrshr.u16 q12, q10, #8 + vrshr.u16 q13, q11, #8 + vraddhn.u16 d28, q14, q8 + vraddhn.u16 d29, q15, q9 + vraddhn.u16 d30, q12, q10 + vraddhn.u16 d31, q13, q11 + /* 32bpp result is in {d28, d29, d30, d31} */ +.endm + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_out_reverse_8_8888_process_pixblock_tail_head + fetch_src_pixblock + pixman_composite_out_reverse_8_8888_process_pixblock_tail + vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! + cache_preload 8, 8 + pixman_composite_out_reverse_8_8888_process_pixblock_head + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! +.endm + +generate_composite_function \ + pixman_composite_out_reverse_8_8888_asm_neon, 8, 0, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_out_reverse_8_8888_process_pixblock_head, \ + pixman_composite_out_reverse_8_8888_process_pixblock_tail, \ + pixman_composite_out_reverse_8_8888_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 0 /* mask_basereg */ + +/******************************************************************************/ + +generate_composite_function_nearest_scanline \ + pixman_scaled_nearest_scanline_8888_8888_OVER_asm_neon, 32, 0, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + default_init, \ + default_cleanup, \ + pixman_composite_over_8888_8888_process_pixblock_head, \ + pixman_composite_over_8888_8888_process_pixblock_tail, \ + pixman_composite_over_8888_8888_process_pixblock_tail_head + +generate_composite_function_nearest_scanline \ + pixman_scaled_nearest_scanline_8888_0565_OVER_asm_neon, 32, 0, 16, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + default_init, \ + default_cleanup, \ + pixman_composite_over_8888_0565_process_pixblock_head, \ + pixman_composite_over_8888_0565_process_pixblock_tail, \ + pixman_composite_over_8888_0565_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 24 /* mask_basereg */ + +generate_composite_function_nearest_scanline \ + pixman_scaled_nearest_scanline_8888_0565_SRC_asm_neon, 32, 0, 16, \ + FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + default_init, \ + default_cleanup, \ + pixman_composite_src_8888_0565_process_pixblock_head, \ + pixman_composite_src_8888_0565_process_pixblock_tail, \ + pixman_composite_src_8888_0565_process_pixblock_tail_head + +generate_composite_function_nearest_scanline \ + pixman_scaled_nearest_scanline_0565_8888_SRC_asm_neon, 16, 0, 32, \ + FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + default_init, \ + default_cleanup, \ + pixman_composite_src_0565_8888_process_pixblock_head, \ + pixman_composite_src_0565_8888_process_pixblock_tail, \ + pixman_composite_src_0565_8888_process_pixblock_tail_head + +generate_composite_function_nearest_scanline \ + pixman_scaled_nearest_scanline_8888_8_0565_OVER_asm_neon, 32, 8, 16, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + default_init_need_all_regs, \ + default_cleanup_need_all_regs, \ + pixman_composite_over_8888_8_0565_process_pixblock_head, \ + pixman_composite_over_8888_8_0565_process_pixblock_tail, \ + pixman_composite_over_8888_8_0565_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 8, /* src_basereg */ \ + 24 /* mask_basereg */ + +generate_composite_function_nearest_scanline \ + pixman_scaled_nearest_scanline_0565_8_0565_OVER_asm_neon, 16, 8, 16, \ + FLAG_DST_READWRITE, \ + 8, /* number of pixels, processed in a single block */ \ + default_init_need_all_regs, \ + default_cleanup_need_all_regs, \ + pixman_composite_over_0565_8_0565_process_pixblock_head, \ + pixman_composite_over_0565_8_0565_process_pixblock_tail, \ + pixman_composite_over_0565_8_0565_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 10, /* dst_r_basereg */ \ + 8, /* src_basereg */ \ + 15 /* mask_basereg */ + +/******************************************************************************/ + +/* Supplementary macro for setting function attributes */ +.macro pixman_asm_function fname + .func fname + .global fname +#ifdef __ELF__ + .hidden fname + .type fname, %function +#endif +fname: +.endm + +/* + * Bilinear scaling support code which tries to provide pixel fetching, color + * format conversion, and interpolation as separate macros which can be used + * as the basic building blocks for constructing bilinear scanline functions. + */ + +.macro bilinear_load_8888 reg1, reg2, tmp + mov TMP1, X, asr #16 + add X, X, UX + add TMP1, TOP, TMP1, asl #2 + vld1.32 {reg1}, [TMP1], STRIDE + vld1.32 {reg2}, [TMP1] +.endm + +.macro bilinear_load_0565 reg1, reg2, tmp + mov TMP1, X, asr #16 + add X, X, UX + add TMP1, TOP, TMP1, asl #1 + vld1.32 {reg2[0]}, [TMP1], STRIDE + vld1.32 {reg2[1]}, [TMP1] + convert_four_0565_to_x888_packed reg2, reg1, reg2, tmp +.endm + +.macro bilinear_load_and_vertical_interpolate_two_8888 \ + acc1, acc2, reg1, reg2, reg3, reg4, tmp1, tmp2 + + bilinear_load_8888 reg1, reg2, tmp1 + vmull.u8 acc1, reg1, d28 + vmlal.u8 acc1, reg2, d29 + bilinear_load_8888 reg3, reg4, tmp2 + vmull.u8 acc2, reg3, d28 + vmlal.u8 acc2, reg4, d29 +.endm + +.macro bilinear_load_and_vertical_interpolate_four_8888 \ + xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \ + yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi + + bilinear_load_and_vertical_interpolate_two_8888 \ + xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi + bilinear_load_and_vertical_interpolate_two_8888 \ + yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi +.endm + +.macro bilinear_load_and_vertical_interpolate_two_0565 \ + acc1, acc2, reg1, reg2, reg3, reg4, acc2lo, acc2hi + + mov TMP1, X, asr #16 + add X, X, UX + add TMP1, TOP, TMP1, asl #1 + mov TMP2, X, asr #16 + add X, X, UX + add TMP2, TOP, TMP2, asl #1 + vld1.32 {acc2lo[0]}, [TMP1], STRIDE + vld1.32 {acc2hi[0]}, [TMP2], STRIDE + vld1.32 {acc2lo[1]}, [TMP1] + vld1.32 {acc2hi[1]}, [TMP2] + convert_0565_to_x888 acc2, reg3, reg2, reg1 + vzip.u8 reg1, reg3 + vzip.u8 reg2, reg4 + vzip.u8 reg3, reg4 + vzip.u8 reg1, reg2 + vmull.u8 acc1, reg1, d28 + vmlal.u8 acc1, reg2, d29 + vmull.u8 acc2, reg3, d28 + vmlal.u8 acc2, reg4, d29 +.endm + +.macro bilinear_load_and_vertical_interpolate_four_0565 \ + xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \ + yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi + + mov TMP1, X, asr #16 + add X, X, UX + add TMP1, TOP, TMP1, asl #1 + mov TMP2, X, asr #16 + add X, X, UX + add TMP2, TOP, TMP2, asl #1 + vld1.32 {xacc2lo[0]}, [TMP1], STRIDE + vld1.32 {xacc2hi[0]}, [TMP2], STRIDE + vld1.32 {xacc2lo[1]}, [TMP1] + vld1.32 {xacc2hi[1]}, [TMP2] + convert_0565_to_x888 xacc2, xreg3, xreg2, xreg1 + mov TMP1, X, asr #16 + add X, X, UX + add TMP1, TOP, TMP1, asl #1 + mov TMP2, X, asr #16 + add X, X, UX + add TMP2, TOP, TMP2, asl #1 + vld1.32 {yacc2lo[0]}, [TMP1], STRIDE + vzip.u8 xreg1, xreg3 + vld1.32 {yacc2hi[0]}, [TMP2], STRIDE + vzip.u8 xreg2, xreg4 + vld1.32 {yacc2lo[1]}, [TMP1] + vzip.u8 xreg3, xreg4 + vld1.32 {yacc2hi[1]}, [TMP2] + vzip.u8 xreg1, xreg2 + convert_0565_to_x888 yacc2, yreg3, yreg2, yreg1 + vmull.u8 xacc1, xreg1, d28 + vzip.u8 yreg1, yreg3 + vmlal.u8 xacc1, xreg2, d29 + vzip.u8 yreg2, yreg4 + vmull.u8 xacc2, xreg3, d28 + vzip.u8 yreg3, yreg4 + vmlal.u8 xacc2, xreg4, d29 + vzip.u8 yreg1, yreg2 + vmull.u8 yacc1, yreg1, d28 + vmlal.u8 yacc1, yreg2, d29 + vmull.u8 yacc2, yreg3, d28 + vmlal.u8 yacc2, yreg4, d29 +.endm + +.macro bilinear_store_8888 numpix, tmp1, tmp2 +.if numpix == 4 + vst1.32 {d0, d1}, [OUT, :128]! +.elseif numpix == 2 + vst1.32 {d0}, [OUT, :64]! +.elseif numpix == 1 + vst1.32 {d0[0]}, [OUT, :32]! +.else + .error bilinear_store_8888 numpix is unsupported +.endif +.endm + +.macro bilinear_store_0565 numpix, tmp1, tmp2 + vuzp.u8 d0, d1 + vuzp.u8 d2, d3 + vuzp.u8 d1, d3 + vuzp.u8 d0, d2 + convert_8888_to_0565 d2, d1, d0, q1, tmp1, tmp2 +.if numpix == 4 + vst1.16 {d2}, [OUT, :64]! +.elseif numpix == 2 + vst1.32 {d2[0]}, [OUT, :32]! +.elseif numpix == 1 + vst1.16 {d2[0]}, [OUT, :16]! +.else + .error bilinear_store_0565 numpix is unsupported +.endif +.endm + +.macro bilinear_interpolate_last_pixel src_fmt, dst_fmt + bilinear_load_&src_fmt d0, d1, d2 + vmull.u8 q1, d0, d28 + vmlal.u8 q1, d1, d29 + /* 5 cycles bubble */ + vshll.u16 q0, d2, #BILINEAR_INTERPOLATION_BITS + vmlsl.u16 q0, d2, d30 + vmlal.u16 q0, d3, d30 + /* 5 cycles bubble */ + vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS) + /* 3 cycles bubble */ + vmovn.u16 d0, q0 + /* 1 cycle bubble */ + bilinear_store_&dst_fmt 1, q2, q3 +.endm + +.macro bilinear_interpolate_two_pixels src_fmt, dst_fmt + bilinear_load_and_vertical_interpolate_two_&src_fmt \ + q1, q11, d0, d1, d20, d21, d22, d23 + vshll.u16 q0, d2, #BILINEAR_INTERPOLATION_BITS + vmlsl.u16 q0, d2, d30 + vmlal.u16 q0, d3, d30 + vshll.u16 q10, d22, #BILINEAR_INTERPOLATION_BITS + vmlsl.u16 q10, d22, d31 + vmlal.u16 q10, d23, d31 + vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS) + vshrn.u32 d1, q10, #(2 * BILINEAR_INTERPOLATION_BITS) + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) + vadd.u16 q12, q12, q13 + vmovn.u16 d0, q0 + bilinear_store_&dst_fmt 2, q2, q3 +.endm + +.macro bilinear_interpolate_four_pixels src_fmt, dst_fmt + bilinear_load_and_vertical_interpolate_four_&src_fmt \ + q1, q11, d0, d1, d20, d21, d22, d23 \ + q3, q9, d4, d5, d16, d17, d18, d19 + pld [TMP1, PF_OFFS] + sub TMP1, TMP1, STRIDE + vshll.u16 q0, d2, #BILINEAR_INTERPOLATION_BITS + vmlsl.u16 q0, d2, d30 + vmlal.u16 q0, d3, d30 + vshll.u16 q10, d22, #BILINEAR_INTERPOLATION_BITS + vmlsl.u16 q10, d22, d31 + vmlal.u16 q10, d23, d31 + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) + vshll.u16 q2, d6, #BILINEAR_INTERPOLATION_BITS + vmlsl.u16 q2, d6, d30 + vmlal.u16 q2, d7, d30 + vshll.u16 q8, d18, #BILINEAR_INTERPOLATION_BITS + pld [TMP2, PF_OFFS] + vmlsl.u16 q8, d18, d31 + vmlal.u16 q8, d19, d31 + vadd.u16 q12, q12, q13 + vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS) + vshrn.u32 d1, q10, #(2 * BILINEAR_INTERPOLATION_BITS) + vshrn.u32 d4, q2, #(2 * BILINEAR_INTERPOLATION_BITS) + vshrn.u32 d5, q8, #(2 * BILINEAR_INTERPOLATION_BITS) + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) + vmovn.u16 d0, q0 + vmovn.u16 d1, q2 + vadd.u16 q12, q12, q13 + bilinear_store_&dst_fmt 4, q2, q3 +.endm + +.macro bilinear_interpolate_four_pixels_head src_fmt, dst_fmt +.ifdef have_bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt + bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt&_head +.else + bilinear_interpolate_four_pixels src_fmt, dst_fmt +.endif +.endm + +.macro bilinear_interpolate_four_pixels_tail src_fmt, dst_fmt +.ifdef have_bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt + bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt&_tail +.endif +.endm + +.macro bilinear_interpolate_four_pixels_tail_head src_fmt, dst_fmt +.ifdef have_bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt + bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt&_tail_head +.else + bilinear_interpolate_four_pixels src_fmt, dst_fmt +.endif +.endm + +.macro bilinear_interpolate_eight_pixels_head src_fmt, dst_fmt +.ifdef have_bilinear_interpolate_eight_pixels_&src_fmt&_&dst_fmt + bilinear_interpolate_eight_pixels_&src_fmt&_&dst_fmt&_head +.else + bilinear_interpolate_four_pixels_head src_fmt, dst_fmt + bilinear_interpolate_four_pixels_tail_head src_fmt, dst_fmt +.endif +.endm + +.macro bilinear_interpolate_eight_pixels_tail src_fmt, dst_fmt +.ifdef have_bilinear_interpolate_eight_pixels_&src_fmt&_&dst_fmt + bilinear_interpolate_eight_pixels_&src_fmt&_&dst_fmt&_tail +.else + bilinear_interpolate_four_pixels_tail src_fmt, dst_fmt +.endif +.endm + +.macro bilinear_interpolate_eight_pixels_tail_head src_fmt, dst_fmt +.ifdef have_bilinear_interpolate_eight_pixels_&src_fmt&_&dst_fmt + bilinear_interpolate_eight_pixels_&src_fmt&_&dst_fmt&_tail_head +.else + bilinear_interpolate_four_pixels_tail_head src_fmt, dst_fmt + bilinear_interpolate_four_pixels_tail_head src_fmt, dst_fmt +.endif +.endm + +.set BILINEAR_FLAG_UNROLL_4, 0 +.set BILINEAR_FLAG_UNROLL_8, 1 +.set BILINEAR_FLAG_USE_ALL_NEON_REGS, 2 + +/* + * Main template macro for generating NEON optimized bilinear scanline + * functions. + * + * Bilinear scanline scaler macro template uses the following arguments: + * fname - name of the function to generate + * src_fmt - source color format (8888 or 0565) + * dst_fmt - destination color format (8888 or 0565) + * bpp_shift - (1 << bpp_shift) is the size of source pixel in bytes + * prefetch_distance - prefetch in the source image by that many + * pixels ahead + */ + +.macro generate_bilinear_scanline_func fname, src_fmt, dst_fmt, \ + src_bpp_shift, dst_bpp_shift, \ + prefetch_distance, flags + +pixman_asm_function fname + OUT .req r0 + TOP .req r1 + BOTTOM .req r2 + WT .req r3 + WB .req r4 + X .req r5 + UX .req r6 + WIDTH .req ip + TMP1 .req r3 + TMP2 .req r4 + PF_OFFS .req r7 + TMP3 .req r8 + TMP4 .req r9 + STRIDE .req r2 + + mov ip, sp + push {r4, r5, r6, r7, r8, r9} + mov PF_OFFS, #prefetch_distance + ldmia ip, {WB, X, UX, WIDTH} + mul PF_OFFS, PF_OFFS, UX + +.if ((flags) & BILINEAR_FLAG_USE_ALL_NEON_REGS) != 0 + vpush {d8-d15} +.endif + + sub STRIDE, BOTTOM, TOP + .unreq BOTTOM + + cmp WIDTH, #0 + ble 3f + + vdup.u16 q12, X + vdup.u16 q13, UX + vdup.u8 d28, WT + vdup.u8 d29, WB + vadd.u16 d25, d25, d26 + + /* ensure good destination alignment */ + cmp WIDTH, #1 + blt 0f + tst OUT, #(1 << dst_bpp_shift) + beq 0f + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) + vadd.u16 q12, q12, q13 + bilinear_interpolate_last_pixel src_fmt, dst_fmt + sub WIDTH, WIDTH, #1 +0: + vadd.u16 q13, q13, q13 + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) + vadd.u16 q12, q12, q13 + + cmp WIDTH, #2 + blt 0f + tst OUT, #(1 << (dst_bpp_shift + 1)) + beq 0f + bilinear_interpolate_two_pixels src_fmt, dst_fmt + sub WIDTH, WIDTH, #2 +0: +.if ((flags) & BILINEAR_FLAG_UNROLL_8) != 0 +/*********** 8 pixels per iteration *****************/ + cmp WIDTH, #4 + blt 0f + tst OUT, #(1 << (dst_bpp_shift + 2)) + beq 0f + bilinear_interpolate_four_pixels src_fmt, dst_fmt + sub WIDTH, WIDTH, #4 +0: + subs WIDTH, WIDTH, #8 + blt 1f + mov PF_OFFS, PF_OFFS, asr #(16 - src_bpp_shift) + bilinear_interpolate_eight_pixels_head src_fmt, dst_fmt + subs WIDTH, WIDTH, #8 + blt 5f +0: + bilinear_interpolate_eight_pixels_tail_head src_fmt, dst_fmt + subs WIDTH, WIDTH, #8 + bge 0b +5: + bilinear_interpolate_eight_pixels_tail src_fmt, dst_fmt +1: + tst WIDTH, #4 + beq 2f + bilinear_interpolate_four_pixels src_fmt, dst_fmt +2: +.else +/*********** 4 pixels per iteration *****************/ + subs WIDTH, WIDTH, #4 + blt 1f + mov PF_OFFS, PF_OFFS, asr #(16 - src_bpp_shift) + bilinear_interpolate_four_pixels_head src_fmt, dst_fmt + subs WIDTH, WIDTH, #4 + blt 5f +0: + bilinear_interpolate_four_pixels_tail_head src_fmt, dst_fmt + subs WIDTH, WIDTH, #4 + bge 0b +5: + bilinear_interpolate_four_pixels_tail src_fmt, dst_fmt +1: +/****************************************************/ +.endif + /* handle the remaining trailing pixels */ + tst WIDTH, #2 + beq 2f + bilinear_interpolate_two_pixels src_fmt, dst_fmt +2: + tst WIDTH, #1 + beq 3f + bilinear_interpolate_last_pixel src_fmt, dst_fmt +3: +.if ((flags) & BILINEAR_FLAG_USE_ALL_NEON_REGS) != 0 + vpop {d8-d15} +.endif + pop {r4, r5, r6, r7, r8, r9} + bx lr + + .unreq OUT + .unreq TOP + .unreq WT + .unreq WB + .unreq X + .unreq UX + .unreq WIDTH + .unreq TMP1 + .unreq TMP2 + .unreq PF_OFFS + .unreq TMP3 + .unreq TMP4 + .unreq STRIDE +.endfunc + +.endm + +/*****************************************************************************/ + +.set have_bilinear_interpolate_four_pixels_8888_8888, 1 + +.macro bilinear_interpolate_four_pixels_8888_8888_head + mov TMP1, X, asr #16 + add X, X, UX + add TMP1, TOP, TMP1, asl #2 + mov TMP2, X, asr #16 + add X, X, UX + add TMP2, TOP, TMP2, asl #2 + + vld1.32 {d22}, [TMP1], STRIDE + vld1.32 {d23}, [TMP1] + mov TMP3, X, asr #16 + add X, X, UX + add TMP3, TOP, TMP3, asl #2 + vmull.u8 q8, d22, d28 + vmlal.u8 q8, d23, d29 + + vld1.32 {d22}, [TMP2], STRIDE + vld1.32 {d23}, [TMP2] + mov TMP4, X, asr #16 + add X, X, UX + add TMP4, TOP, TMP4, asl #2 + vmull.u8 q9, d22, d28 + vmlal.u8 q9, d23, d29 + + vld1.32 {d22}, [TMP3], STRIDE + vld1.32 {d23}, [TMP3] + vmull.u8 q10, d22, d28 + vmlal.u8 q10, d23, d29 + + vshll.u16 q0, d16, #BILINEAR_INTERPOLATION_BITS + vmlsl.u16 q0, d16, d30 + vmlal.u16 q0, d17, d30 + + pld [TMP4, PF_OFFS] + vld1.32 {d16}, [TMP4], STRIDE + vld1.32 {d17}, [TMP4] + pld [TMP4, PF_OFFS] + vmull.u8 q11, d16, d28 + vmlal.u8 q11, d17, d29 + + vshll.u16 q1, d18, #BILINEAR_INTERPOLATION_BITS + vmlsl.u16 q1, d18, d31 +.endm + +.macro bilinear_interpolate_four_pixels_8888_8888_tail + vmlal.u16 q1, d19, d31 + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) + vshll.u16 q2, d20, #BILINEAR_INTERPOLATION_BITS + vmlsl.u16 q2, d20, d30 + vmlal.u16 q2, d21, d30 + vshll.u16 q3, d22, #BILINEAR_INTERPOLATION_BITS + vmlsl.u16 q3, d22, d31 + vmlal.u16 q3, d23, d31 + vadd.u16 q12, q12, q13 + vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS) + vshrn.u32 d1, q1, #(2 * BILINEAR_INTERPOLATION_BITS) + vshrn.u32 d4, q2, #(2 * BILINEAR_INTERPOLATION_BITS) + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) + vshrn.u32 d5, q3, #(2 * BILINEAR_INTERPOLATION_BITS) + vmovn.u16 d6, q0 + vmovn.u16 d7, q2 + vadd.u16 q12, q12, q13 + vst1.32 {d6, d7}, [OUT, :128]! +.endm + +.macro bilinear_interpolate_four_pixels_8888_8888_tail_head + mov TMP1, X, asr #16 + add X, X, UX + add TMP1, TOP, TMP1, asl #2 + mov TMP2, X, asr #16 + add X, X, UX + add TMP2, TOP, TMP2, asl #2 + vmlal.u16 q1, d19, d31 + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) + vshll.u16 q2, d20, #BILINEAR_INTERPOLATION_BITS + vmlsl.u16 q2, d20, d30 + vmlal.u16 q2, d21, d30 + vshll.u16 q3, d22, #BILINEAR_INTERPOLATION_BITS + vld1.32 {d20}, [TMP1], STRIDE + vmlsl.u16 q3, d22, d31 + vmlal.u16 q3, d23, d31 + vld1.32 {d21}, [TMP1] + vmull.u8 q8, d20, d28 + vmlal.u8 q8, d21, d29 + vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS) + vshrn.u32 d1, q1, #(2 * BILINEAR_INTERPOLATION_BITS) + vshrn.u32 d4, q2, #(2 * BILINEAR_INTERPOLATION_BITS) + vld1.32 {d22}, [TMP2], STRIDE + vshrn.u32 d5, q3, #(2 * BILINEAR_INTERPOLATION_BITS) + vadd.u16 q12, q12, q13 + vld1.32 {d23}, [TMP2] + vmull.u8 q9, d22, d28 + mov TMP3, X, asr #16 + add X, X, UX + add TMP3, TOP, TMP3, asl #2 + mov TMP4, X, asr #16 + add X, X, UX + add TMP4, TOP, TMP4, asl #2 + vmlal.u8 q9, d23, d29 + vld1.32 {d22}, [TMP3], STRIDE + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) + vld1.32 {d23}, [TMP3] + vmull.u8 q10, d22, d28 + vmlal.u8 q10, d23, d29 + vmovn.u16 d6, q0 + vshll.u16 q0, d16, #BILINEAR_INTERPOLATION_BITS + vmovn.u16 d7, q2 + vmlsl.u16 q0, d16, d30 + vmlal.u16 q0, d17, d30 + pld [TMP4, PF_OFFS] + vld1.32 {d16}, [TMP4], STRIDE + vadd.u16 q12, q12, q13 + vld1.32 {d17}, [TMP4] + pld [TMP4, PF_OFFS] + vmull.u8 q11, d16, d28 + vmlal.u8 q11, d17, d29 + vst1.32 {d6, d7}, [OUT, :128]! + vshll.u16 q1, d18, #BILINEAR_INTERPOLATION_BITS + vmlsl.u16 q1, d18, d31 +.endm + +/*****************************************************************************/ + +.set have_bilinear_interpolate_eight_pixels_8888_0565, 1 + +.macro bilinear_interpolate_eight_pixels_8888_0565_head + mov TMP1, X, asr #16 + add X, X, UX + add TMP1, TOP, TMP1, asl #2 + mov TMP2, X, asr #16 + add X, X, UX + add TMP2, TOP, TMP2, asl #2 + vld1.32 {d20}, [TMP1], STRIDE + vld1.32 {d21}, [TMP1] + vmull.u8 q8, d20, d28 + vmlal.u8 q8, d21, d29 + vld1.32 {d22}, [TMP2], STRIDE + vld1.32 {d23}, [TMP2] + vmull.u8 q9, d22, d28 + mov TMP3, X, asr #16 + add X, X, UX + add TMP3, TOP, TMP3, asl #2 + mov TMP4, X, asr #16 + add X, X, UX + add TMP4, TOP, TMP4, asl #2 + vmlal.u8 q9, d23, d29 + vld1.32 {d22}, [TMP3], STRIDE + vld1.32 {d23}, [TMP3] + vmull.u8 q10, d22, d28 + vmlal.u8 q10, d23, d29 + vshll.u16 q0, d16, #BILINEAR_INTERPOLATION_BITS + vmlsl.u16 q0, d16, d30 + vmlal.u16 q0, d17, d30 + pld [TMP4, PF_OFFS] + vld1.32 {d16}, [TMP4], STRIDE + vld1.32 {d17}, [TMP4] + pld [TMP4, PF_OFFS] + vmull.u8 q11, d16, d28 + vmlal.u8 q11, d17, d29 + vshll.u16 q1, d18, #BILINEAR_INTERPOLATION_BITS + vmlsl.u16 q1, d18, d31 + + mov TMP1, X, asr #16 + add X, X, UX + add TMP1, TOP, TMP1, asl #2 + mov TMP2, X, asr #16 + add X, X, UX + add TMP2, TOP, TMP2, asl #2 + vmlal.u16 q1, d19, d31 + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) + vshll.u16 q2, d20, #BILINEAR_INTERPOLATION_BITS + vmlsl.u16 q2, d20, d30 + vmlal.u16 q2, d21, d30 + vshll.u16 q3, d22, #BILINEAR_INTERPOLATION_BITS + vld1.32 {d20}, [TMP1], STRIDE + vmlsl.u16 q3, d22, d31 + vmlal.u16 q3, d23, d31 + vld1.32 {d21}, [TMP1] + vmull.u8 q8, d20, d28 + vmlal.u8 q8, d21, d29 + vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS) + vshrn.u32 d1, q1, #(2 * BILINEAR_INTERPOLATION_BITS) + vshrn.u32 d4, q2, #(2 * BILINEAR_INTERPOLATION_BITS) + vld1.32 {d22}, [TMP2], STRIDE + vshrn.u32 d5, q3, #(2 * BILINEAR_INTERPOLATION_BITS) + vadd.u16 q12, q12, q13 + vld1.32 {d23}, [TMP2] + vmull.u8 q9, d22, d28 + mov TMP3, X, asr #16 + add X, X, UX + add TMP3, TOP, TMP3, asl #2 + mov TMP4, X, asr #16 + add X, X, UX + add TMP4, TOP, TMP4, asl #2 + vmlal.u8 q9, d23, d29 + vld1.32 {d22}, [TMP3], STRIDE + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) + vld1.32 {d23}, [TMP3] + vmull.u8 q10, d22, d28 + vmlal.u8 q10, d23, d29 + vmovn.u16 d8, q0 + vshll.u16 q0, d16, #BILINEAR_INTERPOLATION_BITS + vmovn.u16 d9, q2 + vmlsl.u16 q0, d16, d30 + vmlal.u16 q0, d17, d30 + pld [TMP4, PF_OFFS] + vld1.32 {d16}, [TMP4], STRIDE + vadd.u16 q12, q12, q13 + vld1.32 {d17}, [TMP4] + pld [TMP4, PF_OFFS] + vmull.u8 q11, d16, d28 + vmlal.u8 q11, d17, d29 + vshll.u16 q1, d18, #BILINEAR_INTERPOLATION_BITS + vmlsl.u16 q1, d18, d31 +.endm + +.macro bilinear_interpolate_eight_pixels_8888_0565_tail + vmlal.u16 q1, d19, d31 + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) + vshll.u16 q2, d20, #BILINEAR_INTERPOLATION_BITS + vmlsl.u16 q2, d20, d30 + vmlal.u16 q2, d21, d30 + vshll.u16 q3, d22, #BILINEAR_INTERPOLATION_BITS + vmlsl.u16 q3, d22, d31 + vmlal.u16 q3, d23, d31 + vadd.u16 q12, q12, q13 + vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS) + vshrn.u32 d1, q1, #(2 * BILINEAR_INTERPOLATION_BITS) + vshrn.u32 d4, q2, #(2 * BILINEAR_INTERPOLATION_BITS) + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) + vshrn.u32 d5, q3, #(2 * BILINEAR_INTERPOLATION_BITS) + vmovn.u16 d10, q0 + vmovn.u16 d11, q2 + vadd.u16 q12, q12, q13 + + vuzp.u8 d8, d9 + vuzp.u8 d10, d11 + vuzp.u8 d9, d11 + vuzp.u8 d8, d10 + vshll.u8 q6, d9, #8 + vshll.u8 q5, d10, #8 + vshll.u8 q7, d8, #8 + vsri.u16 q5, q6, #5 + vsri.u16 q5, q7, #11 + vst1.32 {d10, d11}, [OUT, :128]! +.endm + +.macro bilinear_interpolate_eight_pixels_8888_0565_tail_head + mov TMP1, X, asr #16 + add X, X, UX + add TMP1, TOP, TMP1, asl #2 + mov TMP2, X, asr #16 + add X, X, UX + add TMP2, TOP, TMP2, asl #2 + vmlal.u16 q1, d19, d31 + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) + vuzp.u8 d8, d9 + vshll.u16 q2, d20, #BILINEAR_INTERPOLATION_BITS + vmlsl.u16 q2, d20, d30 + vmlal.u16 q2, d21, d30 + vshll.u16 q3, d22, #BILINEAR_INTERPOLATION_BITS + vld1.32 {d20}, [TMP1], STRIDE + vmlsl.u16 q3, d22, d31 + vmlal.u16 q3, d23, d31 + vld1.32 {d21}, [TMP1] + vmull.u8 q8, d20, d28 + vmlal.u8 q8, d21, d29 + vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS) + vshrn.u32 d1, q1, #(2 * BILINEAR_INTERPOLATION_BITS) + vshrn.u32 d4, q2, #(2 * BILINEAR_INTERPOLATION_BITS) + vld1.32 {d22}, [TMP2], STRIDE + vshrn.u32 d5, q3, #(2 * BILINEAR_INTERPOLATION_BITS) + vadd.u16 q12, q12, q13 + vld1.32 {d23}, [TMP2] + vmull.u8 q9, d22, d28 + mov TMP3, X, asr #16 + add X, X, UX + add TMP3, TOP, TMP3, asl #2 + mov TMP4, X, asr #16 + add X, X, UX + add TMP4, TOP, TMP4, asl #2 + vmlal.u8 q9, d23, d29 + vld1.32 {d22}, [TMP3], STRIDE + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) + vld1.32 {d23}, [TMP3] + vmull.u8 q10, d22, d28 + vmlal.u8 q10, d23, d29 + vmovn.u16 d10, q0 + vshll.u16 q0, d16, #BILINEAR_INTERPOLATION_BITS + vmovn.u16 d11, q2 + vmlsl.u16 q0, d16, d30 + vmlal.u16 q0, d17, d30 + pld [TMP4, PF_OFFS] + vld1.32 {d16}, [TMP4], STRIDE + vadd.u16 q12, q12, q13 + vld1.32 {d17}, [TMP4] + pld [TMP4, PF_OFFS] + vmull.u8 q11, d16, d28 + vmlal.u8 q11, d17, d29 + vuzp.u8 d10, d11 + vshll.u16 q1, d18, #BILINEAR_INTERPOLATION_BITS + vmlsl.u16 q1, d18, d31 + + mov TMP1, X, asr #16 + add X, X, UX + add TMP1, TOP, TMP1, asl #2 + mov TMP2, X, asr #16 + add X, X, UX + add TMP2, TOP, TMP2, asl #2 + vmlal.u16 q1, d19, d31 + vuzp.u8 d9, d11 + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) + vshll.u16 q2, d20, #BILINEAR_INTERPOLATION_BITS + vuzp.u8 d8, d10 + vmlsl.u16 q2, d20, d30 + vmlal.u16 q2, d21, d30 + vshll.u16 q3, d22, #BILINEAR_INTERPOLATION_BITS + vld1.32 {d20}, [TMP1], STRIDE + vmlsl.u16 q3, d22, d31 + vmlal.u16 q3, d23, d31 + vld1.32 {d21}, [TMP1] + vmull.u8 q8, d20, d28 + vmlal.u8 q8, d21, d29 + vshll.u8 q6, d9, #8 + vshll.u8 q5, d10, #8 + vshll.u8 q7, d8, #8 + vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS) + vsri.u16 q5, q6, #5 + vshrn.u32 d1, q1, #(2 * BILINEAR_INTERPOLATION_BITS) + vsri.u16 q5, q7, #11 + vshrn.u32 d4, q2, #(2 * BILINEAR_INTERPOLATION_BITS) + vld1.32 {d22}, [TMP2], STRIDE + vshrn.u32 d5, q3, #(2 * BILINEAR_INTERPOLATION_BITS) + vadd.u16 q12, q12, q13 + vld1.32 {d23}, [TMP2] + vmull.u8 q9, d22, d28 + mov TMP3, X, asr #16 + add X, X, UX + add TMP3, TOP, TMP3, asl #2 + mov TMP4, X, asr #16 + add X, X, UX + add TMP4, TOP, TMP4, asl #2 + vmlal.u8 q9, d23, d29 + vld1.32 {d22}, [TMP3], STRIDE + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) + vld1.32 {d23}, [TMP3] + vmull.u8 q10, d22, d28 + vmlal.u8 q10, d23, d29 + vmovn.u16 d8, q0 + vshll.u16 q0, d16, #BILINEAR_INTERPOLATION_BITS + vmovn.u16 d9, q2 + vmlsl.u16 q0, d16, d30 + vmlal.u16 q0, d17, d30 + pld [TMP4, PF_OFFS] + vld1.32 {d16}, [TMP4], STRIDE + vadd.u16 q12, q12, q13 + vld1.32 {d17}, [TMP4] + pld [TMP4, PF_OFFS] + vmull.u8 q11, d16, d28 + vmlal.u8 q11, d17, d29 + vshll.u16 q1, d18, #BILINEAR_INTERPOLATION_BITS + vst1.32 {d10, d11}, [OUT, :128]! + vmlsl.u16 q1, d18, d31 +.endm +/*****************************************************************************/ + +generate_bilinear_scanline_func \ + pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon, 8888, 8888, \ + 2, 2, 28, BILINEAR_FLAG_UNROLL_4 + +generate_bilinear_scanline_func \ + pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_neon, 8888, 0565, \ + 2, 1, 28, BILINEAR_FLAG_UNROLL_8 | BILINEAR_FLAG_USE_ALL_NEON_REGS + +generate_bilinear_scanline_func \ + pixman_scaled_bilinear_scanline_0565_x888_SRC_asm_neon, 0565, 8888, \ + 1, 2, 28, BILINEAR_FLAG_UNROLL_4 + +generate_bilinear_scanline_func \ + pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_neon, 0565, 0565, \ + 1, 1, 28, BILINEAR_FLAG_UNROLL_4 diff --git a/src/pixman/pixman/pixman-arm-neon-asm.h b/src/pixman/pixman/pixman-arm-neon-asm.h new file mode 100644 index 0000000..d0d92d7 --- /dev/null +++ b/src/pixman/pixman/pixman-arm-neon-asm.h @@ -0,0 +1,1196 @@ +/* + * Copyright © 2009 Nokia Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Author: Siarhei Siamashka (siarhei.siamashka@nokia.com) + */ + +/* + * This file contains a macro ('generate_composite_function') which can + * construct 2D image processing functions, based on a common template. + * Any combinations of source, destination and mask images with 8bpp, + * 16bpp, 24bpp, 32bpp color formats are supported. + * + * This macro takes care of: + * - handling of leading and trailing unaligned pixels + * - doing most of the work related to L2 cache preload + * - encourages the use of software pipelining for better instructions + * scheduling + * + * The user of this macro has to provide some configuration parameters + * (bit depths for the images, prefetch distance, etc.) and a set of + * macros, which should implement basic code chunks responsible for + * pixels processing. See 'pixman-arm-neon-asm.S' file for the usage + * examples. + * + * TODO: + * - try overlapped pixel method (from Ian Rickards) when processing + * exactly two blocks of pixels + * - maybe add an option to do reverse scanline processing + */ + +/* + * Bit flags for 'generate_composite_function' macro which are used + * to tune generated functions behavior. + */ +.set FLAG_DST_WRITEONLY, 0 +.set FLAG_DST_READWRITE, 1 +.set FLAG_DEINTERLEAVE_32BPP, 2 + +/* + * Offset in stack where mask and source pointer/stride can be accessed + * from 'init' macro. This is useful for doing special handling for solid mask. + */ +.set ARGS_STACK_OFFSET, 40 + +/* + * Constants for selecting preferable prefetch type. + */ +.set PREFETCH_TYPE_NONE, 0 /* No prefetch at all */ +.set PREFETCH_TYPE_SIMPLE, 1 /* A simple, fixed-distance-ahead prefetch */ +.set PREFETCH_TYPE_ADVANCED, 2 /* Advanced fine-grained prefetch */ + +/* + * Definitions of supplementary pixld/pixst macros (for partial load/store of + * pixel data). + */ + +.macro pixldst1 op, elem_size, reg1, mem_operand, abits +.if abits > 0 + op&.&elem_size {d®1}, [&mem_operand&, :&abits&]! +.else + op&.&elem_size {d®1}, [&mem_operand&]! +.endif +.endm + +.macro pixldst2 op, elem_size, reg1, reg2, mem_operand, abits +.if abits > 0 + op&.&elem_size {d®1, d®2}, [&mem_operand&, :&abits&]! +.else + op&.&elem_size {d®1, d®2}, [&mem_operand&]! +.endif +.endm + +.macro pixldst4 op, elem_size, reg1, reg2, reg3, reg4, mem_operand, abits +.if abits > 0 + op&.&elem_size {d®1, d®2, d®3, d®4}, [&mem_operand&, :&abits&]! +.else + op&.&elem_size {d®1, d®2, d®3, d®4}, [&mem_operand&]! +.endif +.endm + +.macro pixldst0 op, elem_size, reg1, idx, mem_operand, abits + op&.&elem_size {d®1[idx]}, [&mem_operand&]! +.endm + +.macro pixldst3 op, elem_size, reg1, reg2, reg3, mem_operand + op&.&elem_size {d®1, d®2, d®3}, [&mem_operand&]! +.endm + +.macro pixldst30 op, elem_size, reg1, reg2, reg3, idx, mem_operand + op&.&elem_size {d®1[idx], d®2[idx], d®3[idx]}, [&mem_operand&]! +.endm + +.macro pixldst numbytes, op, elem_size, basereg, mem_operand, abits +.if numbytes == 32 + pixldst4 op, elem_size, %(basereg+4), %(basereg+5), \ + %(basereg+6), %(basereg+7), mem_operand, abits +.elseif numbytes == 16 + pixldst2 op, elem_size, %(basereg+2), %(basereg+3), mem_operand, abits +.elseif numbytes == 8 + pixldst1 op, elem_size, %(basereg+1), mem_operand, abits +.elseif numbytes == 4 + .if !RESPECT_STRICT_ALIGNMENT || (elem_size == 32) + pixldst0 op, 32, %(basereg+0), 1, mem_operand, abits + .elseif elem_size == 16 + pixldst0 op, 16, %(basereg+0), 2, mem_operand, abits + pixldst0 op, 16, %(basereg+0), 3, mem_operand, abits + .else + pixldst0 op, 8, %(basereg+0), 4, mem_operand, abits + pixldst0 op, 8, %(basereg+0), 5, mem_operand, abits + pixldst0 op, 8, %(basereg+0), 6, mem_operand, abits + pixldst0 op, 8, %(basereg+0), 7, mem_operand, abits + .endif +.elseif numbytes == 2 + .if !RESPECT_STRICT_ALIGNMENT || (elem_size == 16) + pixldst0 op, 16, %(basereg+0), 1, mem_operand, abits + .else + pixldst0 op, 8, %(basereg+0), 2, mem_operand, abits + pixldst0 op, 8, %(basereg+0), 3, mem_operand, abits + .endif +.elseif numbytes == 1 + pixldst0 op, 8, %(basereg+0), 1, mem_operand, abits +.else + .error "unsupported size: numbytes" +.endif +.endm + +.macro pixld numpix, bpp, basereg, mem_operand, abits=0 +.if bpp > 0 +.if (bpp == 32) && (numpix == 8) && (DEINTERLEAVE_32BPP_ENABLED != 0) + pixldst4 vld4, 8, %(basereg+4), %(basereg+5), \ + %(basereg+6), %(basereg+7), mem_operand, abits +.elseif (bpp == 24) && (numpix == 8) + pixldst3 vld3, 8, %(basereg+3), %(basereg+4), %(basereg+5), mem_operand +.elseif (bpp == 24) && (numpix == 4) + pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 4, mem_operand + pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 5, mem_operand + pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 6, mem_operand + pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 7, mem_operand +.elseif (bpp == 24) && (numpix == 2) + pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 2, mem_operand + pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 3, mem_operand +.elseif (bpp == 24) && (numpix == 1) + pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 1, mem_operand +.else + pixldst %(numpix * bpp / 8), vld1, %(bpp), basereg, mem_operand, abits +.endif +.endif +.endm + +.macro pixst numpix, bpp, basereg, mem_operand, abits=0 +.if bpp > 0 +.if (bpp == 32) && (numpix == 8) && (DEINTERLEAVE_32BPP_ENABLED != 0) + pixldst4 vst4, 8, %(basereg+4), %(basereg+5), \ + %(basereg+6), %(basereg+7), mem_operand, abits +.elseif (bpp == 24) && (numpix == 8) + pixldst3 vst3, 8, %(basereg+3), %(basereg+4), %(basereg+5), mem_operand +.elseif (bpp == 24) && (numpix == 4) + pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 4, mem_operand + pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 5, mem_operand + pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 6, mem_operand + pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 7, mem_operand +.elseif (bpp == 24) && (numpix == 2) + pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 2, mem_operand + pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 3, mem_operand +.elseif (bpp == 24) && (numpix == 1) + pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 1, mem_operand +.else + pixldst %(numpix * bpp / 8), vst1, %(bpp), basereg, mem_operand, abits +.endif +.endif +.endm + +.macro pixld_a numpix, bpp, basereg, mem_operand +.if (bpp * numpix) <= 128 + pixld numpix, bpp, basereg, mem_operand, %(bpp * numpix) +.else + pixld numpix, bpp, basereg, mem_operand, 128 +.endif +.endm + +.macro pixst_a numpix, bpp, basereg, mem_operand +.if (bpp * numpix) <= 128 + pixst numpix, bpp, basereg, mem_operand, %(bpp * numpix) +.else + pixst numpix, bpp, basereg, mem_operand, 128 +.endif +.endm + +/* + * Pixel fetcher for nearest scaling (needs TMP1, TMP2, VX, UNIT_X register + * aliases to be defined) + */ +.macro pixld1_s elem_size, reg1, mem_operand +.if elem_size == 16 + mov TMP1, VX, asr #16 + adds VX, VX, UNIT_X +5: subpls VX, VX, SRC_WIDTH_FIXED + bpl 5b + add TMP1, mem_operand, TMP1, asl #1 + mov TMP2, VX, asr #16 + adds VX, VX, UNIT_X +5: subpls VX, VX, SRC_WIDTH_FIXED + bpl 5b + add TMP2, mem_operand, TMP2, asl #1 + vld1.16 {d®1&[0]}, [TMP1, :16] + mov TMP1, VX, asr #16 + adds VX, VX, UNIT_X +5: subpls VX, VX, SRC_WIDTH_FIXED + bpl 5b + add TMP1, mem_operand, TMP1, asl #1 + vld1.16 {d®1&[1]}, [TMP2, :16] + mov TMP2, VX, asr #16 + adds VX, VX, UNIT_X +5: subpls VX, VX, SRC_WIDTH_FIXED + bpl 5b + add TMP2, mem_operand, TMP2, asl #1 + vld1.16 {d®1&[2]}, [TMP1, :16] + vld1.16 {d®1&[3]}, [TMP2, :16] +.elseif elem_size == 32 + mov TMP1, VX, asr #16 + adds VX, VX, UNIT_X +5: subpls VX, VX, SRC_WIDTH_FIXED + bpl 5b + add TMP1, mem_operand, TMP1, asl #2 + mov TMP2, VX, asr #16 + adds VX, VX, UNIT_X +5: subpls VX, VX, SRC_WIDTH_FIXED + bpl 5b + add TMP2, mem_operand, TMP2, asl #2 + vld1.32 {d®1&[0]}, [TMP1, :32] + vld1.32 {d®1&[1]}, [TMP2, :32] +.else + .error "unsupported" +.endif +.endm + +.macro pixld2_s elem_size, reg1, reg2, mem_operand +.if 0 /* elem_size == 32 */ + mov TMP1, VX, asr #16 + add VX, VX, UNIT_X, asl #1 + add TMP1, mem_operand, TMP1, asl #2 + mov TMP2, VX, asr #16 + sub VX, VX, UNIT_X + add TMP2, mem_operand, TMP2, asl #2 + vld1.32 {d®1&[0]}, [TMP1, :32] + mov TMP1, VX, asr #16 + add VX, VX, UNIT_X, asl #1 + add TMP1, mem_operand, TMP1, asl #2 + vld1.32 {d®2&[0]}, [TMP2, :32] + mov TMP2, VX, asr #16 + add VX, VX, UNIT_X + add TMP2, mem_operand, TMP2, asl #2 + vld1.32 {d®1&[1]}, [TMP1, :32] + vld1.32 {d®2&[1]}, [TMP2, :32] +.else + pixld1_s elem_size, reg1, mem_operand + pixld1_s elem_size, reg2, mem_operand +.endif +.endm + +.macro pixld0_s elem_size, reg1, idx, mem_operand +.if elem_size == 16 + mov TMP1, VX, asr #16 + adds VX, VX, UNIT_X +5: subpls VX, VX, SRC_WIDTH_FIXED + bpl 5b + add TMP1, mem_operand, TMP1, asl #1 + vld1.16 {d®1&[idx]}, [TMP1, :16] +.elseif elem_size == 32 + mov TMP1, VX, asr #16 + adds VX, VX, UNIT_X +5: subpls VX, VX, SRC_WIDTH_FIXED + bpl 5b + add TMP1, mem_operand, TMP1, asl #2 + vld1.32 {d®1&[idx]}, [TMP1, :32] +.endif +.endm + +.macro pixld_s_internal numbytes, elem_size, basereg, mem_operand +.if numbytes == 32 + pixld2_s elem_size, %(basereg+4), %(basereg+5), mem_operand + pixld2_s elem_size, %(basereg+6), %(basereg+7), mem_operand + pixdeinterleave elem_size, %(basereg+4) +.elseif numbytes == 16 + pixld2_s elem_size, %(basereg+2), %(basereg+3), mem_operand +.elseif numbytes == 8 + pixld1_s elem_size, %(basereg+1), mem_operand +.elseif numbytes == 4 + .if elem_size == 32 + pixld0_s elem_size, %(basereg+0), 1, mem_operand + .elseif elem_size == 16 + pixld0_s elem_size, %(basereg+0), 2, mem_operand + pixld0_s elem_size, %(basereg+0), 3, mem_operand + .else + pixld0_s elem_size, %(basereg+0), 4, mem_operand + pixld0_s elem_size, %(basereg+0), 5, mem_operand + pixld0_s elem_size, %(basereg+0), 6, mem_operand + pixld0_s elem_size, %(basereg+0), 7, mem_operand + .endif +.elseif numbytes == 2 + .if elem_size == 16 + pixld0_s elem_size, %(basereg+0), 1, mem_operand + .else + pixld0_s elem_size, %(basereg+0), 2, mem_operand + pixld0_s elem_size, %(basereg+0), 3, mem_operand + .endif +.elseif numbytes == 1 + pixld0_s elem_size, %(basereg+0), 1, mem_operand +.else + .error "unsupported size: numbytes" +.endif +.endm + +.macro pixld_s numpix, bpp, basereg, mem_operand +.if bpp > 0 + pixld_s_internal %(numpix * bpp / 8), %(bpp), basereg, mem_operand +.endif +.endm + +.macro vuzp8 reg1, reg2 + vuzp.8 d®1, d®2 +.endm + +.macro vzip8 reg1, reg2 + vzip.8 d®1, d®2 +.endm + +/* deinterleave B, G, R, A channels for eight 32bpp pixels in 4 registers */ +.macro pixdeinterleave bpp, basereg +.if (bpp == 32) && (DEINTERLEAVE_32BPP_ENABLED != 0) + vuzp8 %(basereg+0), %(basereg+1) + vuzp8 %(basereg+2), %(basereg+3) + vuzp8 %(basereg+1), %(basereg+3) + vuzp8 %(basereg+0), %(basereg+2) +.endif +.endm + +/* interleave B, G, R, A channels for eight 32bpp pixels in 4 registers */ +.macro pixinterleave bpp, basereg +.if (bpp == 32) && (DEINTERLEAVE_32BPP_ENABLED != 0) + vzip8 %(basereg+0), %(basereg+2) + vzip8 %(basereg+1), %(basereg+3) + vzip8 %(basereg+2), %(basereg+3) + vzip8 %(basereg+0), %(basereg+1) +.endif +.endm + +/* + * This is a macro for implementing cache preload. The main idea is that + * cache preload logic is mostly independent from the rest of pixels + * processing code. It starts at the top left pixel and moves forward + * across pixels and can jump across scanlines. Prefetch distance is + * handled in an 'incremental' way: it starts from 0 and advances to the + * optimal distance over time. After reaching optimal prefetch distance, + * it is kept constant. There are some checks which prevent prefetching + * unneeded pixel lines below the image (but it still can prefetch a bit + * more data on the right side of the image - not a big issue and may + * be actually helpful when rendering text glyphs). Additional trick is + * the use of LDR instruction for prefetch instead of PLD when moving to + * the next line, the point is that we have a high chance of getting TLB + * miss in this case, and PLD would be useless. + * + * This sounds like it may introduce a noticeable overhead (when working with + * fully cached data). But in reality, due to having a separate pipeline and + * instruction queue for NEON unit in ARM Cortex-A8, normal ARM code can + * execute simultaneously with NEON and be completely shadowed by it. Thus + * we get no performance overhead at all (*). This looks like a very nice + * feature of Cortex-A8, if used wisely. We don't have a hardware prefetcher, + * but still can implement some rather advanced prefetch logic in software + * for almost zero cost! + * + * (*) The overhead of the prefetcher is visible when running some trivial + * pixels processing like simple copy. Anyway, having prefetch is a must + * when working with the graphics data. + */ +.macro PF a, x:vararg +.if (PREFETCH_TYPE_CURRENT == PREFETCH_TYPE_ADVANCED) + a x +.endif +.endm + +.macro cache_preload std_increment, boost_increment +.if (src_bpp_shift >= 0) || (dst_r_bpp != 0) || (mask_bpp_shift >= 0) +.if regs_shortage + PF ldr ORIG_W, [sp] /* If we are short on regs, ORIG_W is kept on stack */ +.endif +.if std_increment != 0 + PF add PF_X, PF_X, #std_increment +.endif + PF tst PF_CTL, #0xF + PF addne PF_X, PF_X, #boost_increment + PF subne PF_CTL, PF_CTL, #1 + PF cmp PF_X, ORIG_W +.if src_bpp_shift >= 0 + PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] +.endif +.if dst_r_bpp != 0 + PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] +.endif +.if mask_bpp_shift >= 0 + PF pld, [PF_MASK, PF_X, lsl #mask_bpp_shift] +.endif + PF subge PF_X, PF_X, ORIG_W + PF subges PF_CTL, PF_CTL, #0x10 +.if src_bpp_shift >= 0 + PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! +.endif +.if dst_r_bpp != 0 + PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! +.endif +.if mask_bpp_shift >= 0 + PF ldrgeb DUMMY, [PF_MASK, MASK_STRIDE, lsl #mask_bpp_shift]! +.endif +.endif +.endm + +.macro cache_preload_simple +.if (PREFETCH_TYPE_CURRENT == PREFETCH_TYPE_SIMPLE) +.if src_bpp > 0 + pld [SRC, #(PREFETCH_DISTANCE_SIMPLE * src_bpp / 8)] +.endif +.if dst_r_bpp > 0 + pld [DST_R, #(PREFETCH_DISTANCE_SIMPLE * dst_r_bpp / 8)] +.endif +.if mask_bpp > 0 + pld [MASK, #(PREFETCH_DISTANCE_SIMPLE * mask_bpp / 8)] +.endif +.endif +.endm + +.macro fetch_mask_pixblock + pixld pixblock_size, mask_bpp, \ + (mask_basereg - pixblock_size * mask_bpp / 64), MASK +.endm + +/* + * Macro which is used to process leading pixels until destination + * pointer is properly aligned (at 16 bytes boundary). When destination + * buffer uses 16bpp format, this is unnecessary, or even pointless. + */ +.macro ensure_destination_ptr_alignment process_pixblock_head, \ + process_pixblock_tail, \ + process_pixblock_tail_head +.if dst_w_bpp != 24 + tst DST_R, #0xF + beq 2f + +.irp lowbit, 1, 2, 4, 8, 16 +local skip1 +.if (dst_w_bpp <= (lowbit * 8)) && ((lowbit * 8) < (pixblock_size * dst_w_bpp)) +.if lowbit < 16 /* we don't need more than 16-byte alignment */ + tst DST_R, #lowbit + beq 1f +.endif + pixld_src (lowbit * 8 / dst_w_bpp), src_bpp, src_basereg, SRC + pixld (lowbit * 8 / dst_w_bpp), mask_bpp, mask_basereg, MASK +.if dst_r_bpp > 0 + pixld_a (lowbit * 8 / dst_r_bpp), dst_r_bpp, dst_r_basereg, DST_R +.else + add DST_R, DST_R, #lowbit +.endif + PF add PF_X, PF_X, #(lowbit * 8 / dst_w_bpp) + sub W, W, #(lowbit * 8 / dst_w_bpp) +1: +.endif +.endr + pixdeinterleave src_bpp, src_basereg + pixdeinterleave mask_bpp, mask_basereg + pixdeinterleave dst_r_bpp, dst_r_basereg + + process_pixblock_head + cache_preload 0, pixblock_size + cache_preload_simple + process_pixblock_tail + + pixinterleave dst_w_bpp, dst_w_basereg +.irp lowbit, 1, 2, 4, 8, 16 +.if (dst_w_bpp <= (lowbit * 8)) && ((lowbit * 8) < (pixblock_size * dst_w_bpp)) +.if lowbit < 16 /* we don't need more than 16-byte alignment */ + tst DST_W, #lowbit + beq 1f +.endif + pixst_a (lowbit * 8 / dst_w_bpp), dst_w_bpp, dst_w_basereg, DST_W +1: +.endif +.endr +.endif +2: +.endm + +/* + * Special code for processing up to (pixblock_size - 1) remaining + * trailing pixels. As SIMD processing performs operation on + * pixblock_size pixels, anything smaller than this has to be loaded + * and stored in a special way. Loading and storing of pixel data is + * performed in such a way that we fill some 'slots' in the NEON + * registers (some slots naturally are unused), then perform compositing + * operation as usual. In the end, the data is taken from these 'slots' + * and saved to memory. + * + * cache_preload_flag - allows to suppress prefetch if + * set to 0 + * dst_aligned_flag - selects whether destination buffer + * is aligned + */ +.macro process_trailing_pixels cache_preload_flag, \ + dst_aligned_flag, \ + process_pixblock_head, \ + process_pixblock_tail, \ + process_pixblock_tail_head + tst W, #(pixblock_size - 1) + beq 2f +.irp chunk_size, 16, 8, 4, 2, 1 +.if pixblock_size > chunk_size + tst W, #chunk_size + beq 1f + pixld_src chunk_size, src_bpp, src_basereg, SRC + pixld chunk_size, mask_bpp, mask_basereg, MASK +.if dst_aligned_flag != 0 + pixld_a chunk_size, dst_r_bpp, dst_r_basereg, DST_R +.else + pixld chunk_size, dst_r_bpp, dst_r_basereg, DST_R +.endif +.if cache_preload_flag != 0 + PF add PF_X, PF_X, #chunk_size +.endif +1: +.endif +.endr + pixdeinterleave src_bpp, src_basereg + pixdeinterleave mask_bpp, mask_basereg + pixdeinterleave dst_r_bpp, dst_r_basereg + + process_pixblock_head +.if cache_preload_flag != 0 + cache_preload 0, pixblock_size + cache_preload_simple +.endif + process_pixblock_tail + pixinterleave dst_w_bpp, dst_w_basereg +.irp chunk_size, 16, 8, 4, 2, 1 +.if pixblock_size > chunk_size + tst W, #chunk_size + beq 1f +.if dst_aligned_flag != 0 + pixst_a chunk_size, dst_w_bpp, dst_w_basereg, DST_W +.else + pixst chunk_size, dst_w_bpp, dst_w_basereg, DST_W +.endif +1: +.endif +.endr +2: +.endm + +/* + * Macro, which performs all the needed operations to switch to the next + * scanline and start the next loop iteration unless all the scanlines + * are already processed. + */ +.macro advance_to_next_scanline start_of_loop_label +.if regs_shortage + ldrd W, [sp] /* load W and H (width and height) from stack */ +.else + mov W, ORIG_W +.endif + add DST_W, DST_W, DST_STRIDE, lsl #dst_bpp_shift +.if src_bpp != 0 + add SRC, SRC, SRC_STRIDE, lsl #src_bpp_shift +.endif +.if mask_bpp != 0 + add MASK, MASK, MASK_STRIDE, lsl #mask_bpp_shift +.endif +.if (dst_w_bpp != 24) + sub DST_W, DST_W, W, lsl #dst_bpp_shift +.endif +.if (src_bpp != 24) && (src_bpp != 0) + sub SRC, SRC, W, lsl #src_bpp_shift +.endif +.if (mask_bpp != 24) && (mask_bpp != 0) + sub MASK, MASK, W, lsl #mask_bpp_shift +.endif + subs H, H, #1 + mov DST_R, DST_W +.if regs_shortage + str H, [sp, #4] /* save updated height to stack */ +.endif + bge start_of_loop_label +.endm + +/* + * Registers are allocated in the following way by default: + * d0, d1, d2, d3 - reserved for loading source pixel data + * d4, d5, d6, d7 - reserved for loading destination pixel data + * d24, d25, d26, d27 - reserved for loading mask pixel data + * d28, d29, d30, d31 - final destination pixel data for writeback to memory + */ +.macro generate_composite_function fname, \ + src_bpp_, \ + mask_bpp_, \ + dst_w_bpp_, \ + flags, \ + pixblock_size_, \ + prefetch_distance, \ + init, \ + cleanup, \ + process_pixblock_head, \ + process_pixblock_tail, \ + process_pixblock_tail_head, \ + dst_w_basereg_ = 28, \ + dst_r_basereg_ = 4, \ + src_basereg_ = 0, \ + mask_basereg_ = 24 + + .func fname + .global fname + /* For ELF format also set function visibility to hidden */ +#ifdef __ELF__ + .hidden fname + .type fname, %function +#endif +fname: + push {r4-r12, lr} /* save all registers */ + +/* + * Select prefetch type for this function. If prefetch distance is + * set to 0 or one of the color formats is 24bpp, SIMPLE prefetch + * has to be used instead of ADVANCED. + */ + .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_DEFAULT +.if prefetch_distance == 0 + .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_NONE +.elseif (PREFETCH_TYPE_CURRENT > PREFETCH_TYPE_SIMPLE) && \ + ((src_bpp_ == 24) || (mask_bpp_ == 24) || (dst_w_bpp_ == 24)) + .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_SIMPLE +.endif + +/* + * Make some macro arguments globally visible and accessible + * from other macros + */ + .set src_bpp, src_bpp_ + .set mask_bpp, mask_bpp_ + .set dst_w_bpp, dst_w_bpp_ + .set pixblock_size, pixblock_size_ + .set dst_w_basereg, dst_w_basereg_ + .set dst_r_basereg, dst_r_basereg_ + .set src_basereg, src_basereg_ + .set mask_basereg, mask_basereg_ + + .macro pixld_src x:vararg + pixld x + .endm + .macro fetch_src_pixblock + pixld_src pixblock_size, src_bpp, \ + (src_basereg - pixblock_size * src_bpp / 64), SRC + .endm +/* + * Assign symbolic names to registers + */ + W .req r0 /* width (is updated during processing) */ + H .req r1 /* height (is updated during processing) */ + DST_W .req r2 /* destination buffer pointer for writes */ + DST_STRIDE .req r3 /* destination image stride */ + SRC .req r4 /* source buffer pointer */ + SRC_STRIDE .req r5 /* source image stride */ + DST_R .req r6 /* destination buffer pointer for reads */ + + MASK .req r7 /* mask pointer */ + MASK_STRIDE .req r8 /* mask stride */ + + PF_CTL .req r9 /* combined lines counter and prefetch */ + /* distance increment counter */ + PF_X .req r10 /* pixel index in a scanline for current */ + /* pretetch position */ + PF_SRC .req r11 /* pointer to source scanline start */ + /* for prefetch purposes */ + PF_DST .req r12 /* pointer to destination scanline start */ + /* for prefetch purposes */ + PF_MASK .req r14 /* pointer to mask scanline start */ + /* for prefetch purposes */ +/* + * Check whether we have enough registers for all the local variables. + * If we don't have enough registers, original width and height are + * kept on top of stack (and 'regs_shortage' variable is set to indicate + * this for the rest of code). Even if there are enough registers, the + * allocation scheme may be a bit different depending on whether source + * or mask is not used. + */ +.if (PREFETCH_TYPE_CURRENT < PREFETCH_TYPE_ADVANCED) + ORIG_W .req r10 /* saved original width */ + DUMMY .req r12 /* temporary register */ + .set regs_shortage, 0 +.elseif mask_bpp == 0 + ORIG_W .req r7 /* saved original width */ + DUMMY .req r8 /* temporary register */ + .set regs_shortage, 0 +.elseif src_bpp == 0 + ORIG_W .req r4 /* saved original width */ + DUMMY .req r5 /* temporary register */ + .set regs_shortage, 0 +.else + ORIG_W .req r1 /* saved original width */ + DUMMY .req r1 /* temporary register */ + .set regs_shortage, 1 +.endif + + .set mask_bpp_shift, -1 +.if src_bpp == 32 + .set src_bpp_shift, 2 +.elseif src_bpp == 24 + .set src_bpp_shift, 0 +.elseif src_bpp == 16 + .set src_bpp_shift, 1 +.elseif src_bpp == 8 + .set src_bpp_shift, 0 +.elseif src_bpp == 0 + .set src_bpp_shift, -1 +.else + .error "requested src bpp (src_bpp) is not supported" +.endif +.if mask_bpp == 32 + .set mask_bpp_shift, 2 +.elseif mask_bpp == 24 + .set mask_bpp_shift, 0 +.elseif mask_bpp == 8 + .set mask_bpp_shift, 0 +.elseif mask_bpp == 0 + .set mask_bpp_shift, -1 +.else + .error "requested mask bpp (mask_bpp) is not supported" +.endif +.if dst_w_bpp == 32 + .set dst_bpp_shift, 2 +.elseif dst_w_bpp == 24 + .set dst_bpp_shift, 0 +.elseif dst_w_bpp == 16 + .set dst_bpp_shift, 1 +.elseif dst_w_bpp == 8 + .set dst_bpp_shift, 0 +.else + .error "requested dst bpp (dst_w_bpp) is not supported" +.endif + +.if (((flags) & FLAG_DST_READWRITE) != 0) + .set dst_r_bpp, dst_w_bpp +.else + .set dst_r_bpp, 0 +.endif +.if (((flags) & FLAG_DEINTERLEAVE_32BPP) != 0) + .set DEINTERLEAVE_32BPP_ENABLED, 1 +.else + .set DEINTERLEAVE_32BPP_ENABLED, 0 +.endif + +.if prefetch_distance < 0 || prefetch_distance > 15 + .error "invalid prefetch distance (prefetch_distance)" +.endif + +.if src_bpp > 0 + ldr SRC, [sp, #40] +.endif +.if mask_bpp > 0 + ldr MASK, [sp, #48] +.endif + PF mov PF_X, #0 +.if src_bpp > 0 + ldr SRC_STRIDE, [sp, #44] +.endif +.if mask_bpp > 0 + ldr MASK_STRIDE, [sp, #52] +.endif + mov DST_R, DST_W + +.if src_bpp == 24 + sub SRC_STRIDE, SRC_STRIDE, W + sub SRC_STRIDE, SRC_STRIDE, W, lsl #1 +.endif +.if mask_bpp == 24 + sub MASK_STRIDE, MASK_STRIDE, W + sub MASK_STRIDE, MASK_STRIDE, W, lsl #1 +.endif +.if dst_w_bpp == 24 + sub DST_STRIDE, DST_STRIDE, W + sub DST_STRIDE, DST_STRIDE, W, lsl #1 +.endif + +/* + * Setup advanced prefetcher initial state + */ + PF mov PF_SRC, SRC + PF mov PF_DST, DST_R + PF mov PF_MASK, MASK + /* PF_CTL = prefetch_distance | ((h - 1) << 4) */ + PF mov PF_CTL, H, lsl #4 + PF add PF_CTL, #(prefetch_distance - 0x10) + + init +.if regs_shortage + push {r0, r1} +.endif + subs H, H, #1 +.if regs_shortage + str H, [sp, #4] /* save updated height to stack */ +.else + mov ORIG_W, W +.endif + blt 9f + cmp W, #(pixblock_size * 2) + blt 8f +/* + * This is the start of the pipelined loop, which if optimized for + * long scanlines + */ +0: + ensure_destination_ptr_alignment process_pixblock_head, \ + process_pixblock_tail, \ + process_pixblock_tail_head + + /* Implement "head (tail_head) ... (tail_head) tail" loop pattern */ + pixld_a pixblock_size, dst_r_bpp, \ + (dst_r_basereg - pixblock_size * dst_r_bpp / 64), DST_R + fetch_src_pixblock + pixld pixblock_size, mask_bpp, \ + (mask_basereg - pixblock_size * mask_bpp / 64), MASK + PF add PF_X, PF_X, #pixblock_size + process_pixblock_head + cache_preload 0, pixblock_size + cache_preload_simple + subs W, W, #(pixblock_size * 2) + blt 2f +1: + process_pixblock_tail_head + cache_preload_simple + subs W, W, #pixblock_size + bge 1b +2: + process_pixblock_tail + pixst_a pixblock_size, dst_w_bpp, \ + (dst_w_basereg - pixblock_size * dst_w_bpp / 64), DST_W + + /* Process the remaining trailing pixels in the scanline */ + process_trailing_pixels 1, 1, \ + process_pixblock_head, \ + process_pixblock_tail, \ + process_pixblock_tail_head + advance_to_next_scanline 0b + +.if regs_shortage + pop {r0, r1} +.endif + cleanup + pop {r4-r12, pc} /* exit */ +/* + * This is the start of the loop, designed to process images with small width + * (less than pixblock_size * 2 pixels). In this case neither pipelining + * nor prefetch are used. + */ +8: + /* Process exactly pixblock_size pixels if needed */ + tst W, #pixblock_size + beq 1f + pixld pixblock_size, dst_r_bpp, \ + (dst_r_basereg - pixblock_size * dst_r_bpp / 64), DST_R + fetch_src_pixblock + pixld pixblock_size, mask_bpp, \ + (mask_basereg - pixblock_size * mask_bpp / 64), MASK + process_pixblock_head + process_pixblock_tail + pixst pixblock_size, dst_w_bpp, \ + (dst_w_basereg - pixblock_size * dst_w_bpp / 64), DST_W +1: + /* Process the remaining trailing pixels in the scanline */ + process_trailing_pixels 0, 0, \ + process_pixblock_head, \ + process_pixblock_tail, \ + process_pixblock_tail_head + advance_to_next_scanline 8b +9: +.if regs_shortage + pop {r0, r1} +.endif + cleanup + pop {r4-r12, pc} /* exit */ + + .purgem fetch_src_pixblock + .purgem pixld_src + + .unreq SRC + .unreq MASK + .unreq DST_R + .unreq DST_W + .unreq ORIG_W + .unreq W + .unreq H + .unreq SRC_STRIDE + .unreq DST_STRIDE + .unreq MASK_STRIDE + .unreq PF_CTL + .unreq PF_X + .unreq PF_SRC + .unreq PF_DST + .unreq PF_MASK + .unreq DUMMY + .endfunc +.endm + +/* + * A simplified variant of function generation template for a single + * scanline processing (for implementing pixman combine functions) + */ +.macro generate_composite_function_scanline use_nearest_scaling, \ + fname, \ + src_bpp_, \ + mask_bpp_, \ + dst_w_bpp_, \ + flags, \ + pixblock_size_, \ + init, \ + cleanup, \ + process_pixblock_head, \ + process_pixblock_tail, \ + process_pixblock_tail_head, \ + dst_w_basereg_ = 28, \ + dst_r_basereg_ = 4, \ + src_basereg_ = 0, \ + mask_basereg_ = 24 + + .func fname + .global fname + /* For ELF format also set function visibility to hidden */ +#ifdef __ELF__ + .hidden fname + .type fname, %function +#endif +fname: + .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_NONE +/* + * Make some macro arguments globally visible and accessible + * from other macros + */ + .set src_bpp, src_bpp_ + .set mask_bpp, mask_bpp_ + .set dst_w_bpp, dst_w_bpp_ + .set pixblock_size, pixblock_size_ + .set dst_w_basereg, dst_w_basereg_ + .set dst_r_basereg, dst_r_basereg_ + .set src_basereg, src_basereg_ + .set mask_basereg, mask_basereg_ + +.if use_nearest_scaling != 0 + /* + * Assign symbolic names to registers for nearest scaling + */ + W .req r0 + DST_W .req r1 + SRC .req r2 + VX .req r3 + UNIT_X .req ip + MASK .req lr + TMP1 .req r4 + TMP2 .req r5 + DST_R .req r6 + SRC_WIDTH_FIXED .req r7 + + .macro pixld_src x:vararg + pixld_s x + .endm + + ldr UNIT_X, [sp] + push {r4-r8, lr} + ldr SRC_WIDTH_FIXED, [sp, #(24 + 4)] + .if mask_bpp != 0 + ldr MASK, [sp, #(24 + 8)] + .endif +.else + /* + * Assign symbolic names to registers + */ + W .req r0 /* width (is updated during processing) */ + DST_W .req r1 /* destination buffer pointer for writes */ + SRC .req r2 /* source buffer pointer */ + DST_R .req ip /* destination buffer pointer for reads */ + MASK .req r3 /* mask pointer */ + + .macro pixld_src x:vararg + pixld x + .endm +.endif + +.if (((flags) & FLAG_DST_READWRITE) != 0) + .set dst_r_bpp, dst_w_bpp +.else + .set dst_r_bpp, 0 +.endif +.if (((flags) & FLAG_DEINTERLEAVE_32BPP) != 0) + .set DEINTERLEAVE_32BPP_ENABLED, 1 +.else + .set DEINTERLEAVE_32BPP_ENABLED, 0 +.endif + + .macro fetch_src_pixblock + pixld_src pixblock_size, src_bpp, \ + (src_basereg - pixblock_size * src_bpp / 64), SRC + .endm + + init + mov DST_R, DST_W + + cmp W, #pixblock_size + blt 8f + + ensure_destination_ptr_alignment process_pixblock_head, \ + process_pixblock_tail, \ + process_pixblock_tail_head + + subs W, W, #pixblock_size + blt 7f + + /* Implement "head (tail_head) ... (tail_head) tail" loop pattern */ + pixld_a pixblock_size, dst_r_bpp, \ + (dst_r_basereg - pixblock_size * dst_r_bpp / 64), DST_R + fetch_src_pixblock + pixld pixblock_size, mask_bpp, \ + (mask_basereg - pixblock_size * mask_bpp / 64), MASK + process_pixblock_head + subs W, W, #pixblock_size + blt 2f +1: + process_pixblock_tail_head + subs W, W, #pixblock_size + bge 1b +2: + process_pixblock_tail + pixst_a pixblock_size, dst_w_bpp, \ + (dst_w_basereg - pixblock_size * dst_w_bpp / 64), DST_W +7: + /* Process the remaining trailing pixels in the scanline (dst aligned) */ + process_trailing_pixels 0, 1, \ + process_pixblock_head, \ + process_pixblock_tail, \ + process_pixblock_tail_head + + cleanup +.if use_nearest_scaling != 0 + pop {r4-r8, pc} /* exit */ +.else + bx lr /* exit */ +.endif +8: + /* Process the remaining trailing pixels in the scanline (dst unaligned) */ + process_trailing_pixels 0, 0, \ + process_pixblock_head, \ + process_pixblock_tail, \ + process_pixblock_tail_head + + cleanup + +.if use_nearest_scaling != 0 + pop {r4-r8, pc} /* exit */ + + .unreq DST_R + .unreq SRC + .unreq W + .unreq VX + .unreq UNIT_X + .unreq TMP1 + .unreq TMP2 + .unreq DST_W + .unreq MASK + .unreq SRC_WIDTH_FIXED + +.else + bx lr /* exit */ + + .unreq SRC + .unreq MASK + .unreq DST_R + .unreq DST_W + .unreq W +.endif + + .purgem fetch_src_pixblock + .purgem pixld_src + + .endfunc +.endm + +.macro generate_composite_function_single_scanline x:vararg + generate_composite_function_scanline 0, x +.endm + +.macro generate_composite_function_nearest_scanline x:vararg + generate_composite_function_scanline 1, x +.endm + +/* Default prologue/epilogue, nothing special needs to be done */ + +.macro default_init +.endm + +.macro default_cleanup +.endm + +/* + * Prologue/epilogue variant which additionally saves/restores d8-d15 + * registers (they need to be saved/restored by callee according to ABI). + * This is required if the code needs to use all the NEON registers. + */ + +.macro default_init_need_all_regs + vpush {d8-d15} +.endm + +.macro default_cleanup_need_all_regs + vpop {d8-d15} +.endm + +/******************************************************************************/ + +/* + * Conversion of 8 r5g6b6 pixels packed in 128-bit register (in) + * into a planar a8r8g8b8 format (with a, r, g, b color components + * stored into 64-bit registers out_a, out_r, out_g, out_b respectively). + * + * Warning: the conversion is destructive and the original + * value (in) is lost. + */ +.macro convert_0565_to_8888 in, out_a, out_r, out_g, out_b + vshrn.u16 out_r, in, #8 + vshrn.u16 out_g, in, #3 + vsli.u16 in, in, #5 + vmov.u8 out_a, #255 + vsri.u8 out_r, out_r, #5 + vsri.u8 out_g, out_g, #6 + vshrn.u16 out_b, in, #2 +.endm + +.macro convert_0565_to_x888 in, out_r, out_g, out_b + vshrn.u16 out_r, in, #8 + vshrn.u16 out_g, in, #3 + vsli.u16 in, in, #5 + vsri.u8 out_r, out_r, #5 + vsri.u8 out_g, out_g, #6 + vshrn.u16 out_b, in, #2 +.endm + +/* + * Conversion from planar a8r8g8b8 format (with a, r, g, b color components + * in 64-bit registers in_a, in_r, in_g, in_b respectively) into 8 r5g6b6 + * pixels packed in 128-bit register (out). Requires two temporary 128-bit + * registers (tmp1, tmp2) + */ +.macro convert_8888_to_0565 in_r, in_g, in_b, out, tmp1, tmp2 + vshll.u8 tmp1, in_g, #8 + vshll.u8 out, in_r, #8 + vshll.u8 tmp2, in_b, #8 + vsri.u16 out, tmp1, #5 + vsri.u16 out, tmp2, #11 +.endm + +/* + * Conversion of four r5g6b5 pixels (in) to four x8r8g8b8 pixels + * returned in (out0, out1) registers pair. Requires one temporary + * 64-bit register (tmp). 'out1' and 'in' may overlap, the original + * value from 'in' is lost + */ +.macro convert_four_0565_to_x888_packed in, out0, out1, tmp + vshl.u16 out0, in, #5 /* G top 6 bits */ + vshl.u16 tmp, in, #11 /* B top 5 bits */ + vsri.u16 in, in, #5 /* R is ready in top bits */ + vsri.u16 out0, out0, #6 /* G is ready in top bits */ + vsri.u16 tmp, tmp, #5 /* B is ready in top bits */ + vshr.u16 out1, in, #8 /* R is in place */ + vsri.u16 out0, tmp, #8 /* G & B is in place */ + vzip.u16 out0, out1 /* everything is in place */ +.endm diff --git a/src/pixman/pixman/pixman-arm-neon.c b/src/pixman/pixman/pixman-arm-neon.c new file mode 100644 index 0000000..60e9c78 --- /dev/null +++ b/src/pixman/pixman/pixman-arm-neon.c @@ -0,0 +1,472 @@ +/* + * Copyright © 2009 ARM Ltd, Movial Creative Technologies Oy + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of ARM Ltd not be used in + * advertising or publicity pertaining to distribution of the software without + * specific, written prior permission. ARM Ltd makes no + * representations about the suitability of this software for any purpose. It + * is provided "as is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + * + * Author: Ian Rickards (ian.rickards@arm.com) + * Author: Jonathan Morton (jonathan.morton@movial.com) + * Author: Markku Vire (markku.vire@movial.com) + * + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <string.h> +#include "pixman-private.h" +#include "pixman-arm-common.h" + +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_8888_8888, + uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_x888_8888, + uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0565_0565, + uint16_t, 1, uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_0888, + uint8_t, 3, uint8_t, 3) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_8888_0565, + uint32_t, 1, uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0565_8888, + uint16_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_8888_rev, + uint8_t, 3, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_0565_rev, + uint8_t, 3, uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_pixbuf_8888, + uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_rpixbuf_8888, + uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, add_8_8, + uint8_t, 1, uint8_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, add_8888_8888, + uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_0565, + uint32_t, 1, uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_8888, + uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, out_reverse_8_0565, + uint8_t, 1, uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, out_reverse_8_8888, + uint8_t, 1, uint32_t, 1) + +PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_n_0565, + uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_n_8888, + uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_reverse_n_8888, + uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_N_DST (0, neon, in_n_8, + uint8_t, 1) + +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_0565, + uint8_t, 1, uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_8888, + uint8_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8888_8888_ca, + uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8888_0565_ca, + uint32_t, 1, uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_8, + uint8_t, 1, uint8_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, add_n_8_8, + uint8_t, 1, uint8_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, add_n_8_8888, + uint8_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (0, neon, src_n_8_8888, + uint8_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (0, neon, src_n_8_8, + uint8_t, 1, uint8_t, 1) + +PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_8888_n_8888, + uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_8888_n_0565, + uint32_t, 1, uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_0565_n_0565, + uint16_t, 1, uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, add_8888_n_8888, + uint32_t, 1, uint32_t, 1) + +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8_8_8, + uint8_t, 1, uint8_t, 1, uint8_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_0565_8_0565, + uint16_t, 1, uint8_t, 1, uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8_8888, + uint32_t, 1, uint8_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8888_8888, + uint32_t, 1, uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_8888, + uint32_t, 1, uint8_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8888_8888, + uint32_t, 1, uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_0565, + uint32_t, 1, uint8_t, 1, uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_0565_8_0565, + uint16_t, 1, uint8_t, 1, uint16_t, 1) + +PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_8888, OVER, + uint32_t, uint32_t) +PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_0565, OVER, + uint32_t, uint16_t) +PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_0565, SRC, + uint32_t, uint16_t) +PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 0565_8888, SRC, + uint16_t, uint32_t) + +PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 8888_8_0565, + OVER, uint32_t, uint16_t) +PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 0565_8_0565, + OVER, uint16_t, uint16_t) + +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_8888, SRC, + uint32_t, uint32_t) +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_0565, SRC, + uint32_t, uint16_t) +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_x888, SRC, + uint16_t, uint32_t) +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_0565, SRC, + uint16_t, uint16_t) +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, neon, 8888_8888, OVER, + uint32_t, uint32_t) +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, neon, 8888_8888, ADD, + uint32_t, uint32_t) + +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (0, neon, 8888_8_8888, SRC, + uint32_t, uint32_t) +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (0, neon, 8888_8_0565, SRC, + uint32_t, uint16_t) +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (0, neon, 0565_8_x888, SRC, + uint16_t, uint32_t) +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (0, neon, 0565_8_0565, SRC, + uint16_t, uint16_t) +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (SKIP_ZERO_SRC, neon, 8888_8_8888, OVER, + uint32_t, uint32_t) +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (SKIP_ZERO_SRC, neon, 8888_8_8888, ADD, + uint32_t, uint32_t) + +void +pixman_composite_src_n_8_asm_neon (int32_t w, + int32_t h, + uint8_t *dst, + int32_t dst_stride, + uint8_t src); + +void +pixman_composite_src_n_0565_asm_neon (int32_t w, + int32_t h, + uint16_t *dst, + int32_t dst_stride, + uint16_t src); + +void +pixman_composite_src_n_8888_asm_neon (int32_t w, + int32_t h, + uint32_t *dst, + int32_t dst_stride, + uint32_t src); + +static pixman_bool_t +arm_neon_fill (pixman_implementation_t *imp, + uint32_t * bits, + int stride, + int bpp, + int x, + int y, + int width, + int height, + uint32_t _xor) +{ + /* stride is always multiple of 32bit units in pixman */ + uint32_t byte_stride = stride * sizeof(uint32_t); + + switch (bpp) + { + case 8: + pixman_composite_src_n_8_asm_neon ( + width, + height, + (uint8_t *)(((char *) bits) + y * byte_stride + x), + byte_stride, + _xor & 0xff); + return TRUE; + case 16: + pixman_composite_src_n_0565_asm_neon ( + width, + height, + (uint16_t *)(((char *) bits) + y * byte_stride + x * 2), + byte_stride / 2, + _xor & 0xffff); + return TRUE; + case 32: + pixman_composite_src_n_8888_asm_neon ( + width, + height, + (uint32_t *)(((char *) bits) + y * byte_stride + x * 4), + byte_stride / 4, + _xor); + return TRUE; + default: + return FALSE; + } +} + +static pixman_bool_t +arm_neon_blt (pixman_implementation_t *imp, + uint32_t * src_bits, + uint32_t * dst_bits, + int src_stride, + int dst_stride, + int src_bpp, + int dst_bpp, + int src_x, + int src_y, + int dest_x, + int dest_y, + int width, + int height) +{ + if (src_bpp != dst_bpp) + return FALSE; + + switch (src_bpp) + { + case 16: + pixman_composite_src_0565_0565_asm_neon ( + width, height, + (uint16_t *)(((char *) dst_bits) + + dest_y * dst_stride * 4 + dest_x * 2), dst_stride * 2, + (uint16_t *)(((char *) src_bits) + + src_y * src_stride * 4 + src_x * 2), src_stride * 2); + return TRUE; + case 32: + pixman_composite_src_8888_8888_asm_neon ( + width, height, + (uint32_t *)(((char *) dst_bits) + + dest_y * dst_stride * 4 + dest_x * 4), dst_stride, + (uint32_t *)(((char *) src_bits) + + src_y * src_stride * 4 + src_x * 4), src_stride); + return TRUE; + default: + return FALSE; + } +} + +static const pixman_fast_path_t arm_neon_fast_paths[] = +{ + PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, neon_composite_src_0565_0565), + PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, neon_composite_src_0565_0565), + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, neon_composite_src_8888_0565), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, neon_composite_src_8888_0565), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, neon_composite_src_8888_0565), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, neon_composite_src_8888_0565), + PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, a8r8g8b8, neon_composite_src_0565_8888), + PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, x8r8g8b8, neon_composite_src_0565_8888), + PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, a8b8g8r8, neon_composite_src_0565_8888), + PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, x8b8g8r8, neon_composite_src_0565_8888), + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, neon_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, neon_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, neon_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, neon_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, neon_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, neon_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, neon_composite_src_x888_8888), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, neon_composite_src_x888_8888), + PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, neon_composite_src_0888_0888), + PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, x8r8g8b8, neon_composite_src_0888_8888_rev), + PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, r5g6b5, neon_composite_src_0888_0565_rev), + PIXMAN_STD_FAST_PATH (SRC, pixbuf, pixbuf, a8r8g8b8, neon_composite_src_pixbuf_8888), + PIXMAN_STD_FAST_PATH (SRC, pixbuf, pixbuf, a8b8g8r8, neon_composite_src_rpixbuf_8888), + PIXMAN_STD_FAST_PATH (SRC, rpixbuf, rpixbuf, a8r8g8b8, neon_composite_src_rpixbuf_8888), + PIXMAN_STD_FAST_PATH (SRC, rpixbuf, rpixbuf, a8b8g8r8, neon_composite_src_pixbuf_8888), + PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8r8g8b8, neon_composite_src_n_8_8888), + PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8r8g8b8, neon_composite_src_n_8_8888), + PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8b8g8r8, neon_composite_src_n_8_8888), + PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8b8g8r8, neon_composite_src_n_8_8888), + PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8, neon_composite_src_n_8_8), + + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8, neon_composite_over_n_8_8), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, neon_composite_over_n_8_0565), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, neon_composite_over_n_8_0565), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, neon_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, neon_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, neon_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, neon_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, null, r5g6b5, neon_composite_over_n_0565), + PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, neon_composite_over_n_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, neon_composite_over_n_8888), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, neon_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, neon_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, neon_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, neon_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, neon_composite_over_n_8888_0565_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, neon_composite_over_n_8888_0565_ca), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, neon_composite_over_8888_n_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, neon_composite_over_8888_n_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, r5g6b5, neon_composite_over_8888_n_0565), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, b5g6r5, neon_composite_over_8888_n_0565), + PIXMAN_STD_FAST_PATH (OVER, r5g6b5, solid, r5g6b5, neon_composite_over_0565_n_0565), + PIXMAN_STD_FAST_PATH (OVER, b5g6r5, solid, b5g6r5, neon_composite_over_0565_n_0565), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, a8r8g8b8, neon_composite_over_8888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, x8r8g8b8, neon_composite_over_8888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, a8b8g8r8, neon_composite_over_8888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, x8b8g8r8, neon_composite_over_8888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, r5g6b5, neon_composite_over_8888_8_0565), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, b5g6r5, neon_composite_over_8888_8_0565), + PIXMAN_STD_FAST_PATH (OVER, r5g6b5, a8, r5g6b5, neon_composite_over_0565_8_0565), + PIXMAN_STD_FAST_PATH (OVER, b5g6r5, a8, b5g6r5, neon_composite_over_0565_8_0565), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_over_8888_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, neon_composite_over_8888_0565), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, neon_composite_over_8888_0565), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, neon_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, neon_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, neon_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, neon_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, a8r8g8b8, neon_composite_src_x888_8888), + PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, a8b8g8r8, neon_composite_src_x888_8888), + PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, neon_composite_add_n_8_8), + PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8r8g8b8, neon_composite_add_n_8_8888), + PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8b8g8r8, neon_composite_add_n_8_8888), + PIXMAN_STD_FAST_PATH (ADD, a8, a8, a8, neon_composite_add_8_8_8), + PIXMAN_STD_FAST_PATH (ADD, r5g6b5, a8, r5g6b5, neon_composite_add_0565_8_0565), + PIXMAN_STD_FAST_PATH (ADD, b5g6r5, a8, b5g6r5, neon_composite_add_0565_8_0565), + PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, a8, a8r8g8b8, neon_composite_add_8888_8_8888), + PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, a8, a8b8g8r8, neon_composite_add_8888_8_8888), + PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_add_8888_8888_8888), + PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, solid, a8r8g8b8, neon_composite_add_8888_n_8888), + PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, solid, a8b8g8r8, neon_composite_add_8888_n_8888), + PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, neon_composite_add_8_8), + PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, neon_composite_add_8888_8888), + PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, neon_composite_add_8888_8888), + PIXMAN_STD_FAST_PATH (IN, solid, null, a8, neon_composite_in_n_8), + PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, neon_composite_over_reverse_n_8888), + PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, neon_composite_over_reverse_n_8888), + PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, r5g6b5, neon_composite_out_reverse_8_0565), + PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, b5g6r5, neon_composite_out_reverse_8_0565), + PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, a8r8g8b8, neon_composite_out_reverse_8_8888), + PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, a8b8g8r8, neon_composite_out_reverse_8_8888), + + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, neon_8888_8888), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, neon_8888_8888), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, neon_8888_8888), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, neon_8888_8888), + + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_0565), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, b5g6r5, neon_8888_0565), + + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_0565), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, b5g6r5, neon_8888_0565), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, b5g6r5, neon_8888_0565), + + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, x8b8g8r8, neon_0565_8888), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_8888), + /* Note: NONE repeat is not supported yet */ + SIMPLE_NEAREST_FAST_PATH_COVER (SRC, r5g6b5, a8r8g8b8, neon_0565_8888), + SIMPLE_NEAREST_FAST_PATH_COVER (SRC, b5g6r5, a8b8g8r8, neon_0565_8888), + SIMPLE_NEAREST_FAST_PATH_PAD (SRC, r5g6b5, a8r8g8b8, neon_0565_8888), + SIMPLE_NEAREST_FAST_PATH_PAD (SRC, b5g6r5, a8b8g8r8, neon_0565_8888), + + PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_8_0565), + PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8b8g8r8, b5g6r5, neon_8888_8_0565), + + PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, r5g6b5, r5g6b5, neon_0565_8_0565), + PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, b5g6r5, b5g6r5, neon_0565_8_0565), + + SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, neon_8888_8888), + SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, neon_8888_8888), + SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, neon_8888_8888), + + SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_0565), + SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565), + + SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_x888), + SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, r5g6b5, neon_0565_0565), + + SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, neon_8888_8888), + SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, neon_8888_8888), + + SIMPLE_BILINEAR_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, neon_8888_8888), + SIMPLE_BILINEAR_FAST_PATH (ADD, a8r8g8b8, x8r8g8b8, neon_8888_8888), + + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, neon_8888_8_8888), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, neon_8888_8_8888), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, neon_8888_8_8888), + + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_8_0565), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_8_0565), + + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_8_x888), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, r5g6b5, r5g6b5, neon_0565_8_0565), + + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, neon_8888_8_8888), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, neon_8888_8_8888), + + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, neon_8888_8_8888), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (ADD, a8r8g8b8, x8r8g8b8, neon_8888_8_8888), + + { PIXMAN_OP_NONE }, +}; + +#define BIND_COMBINE_U(name) \ +void \ +pixman_composite_scanline_##name##_mask_asm_neon (int32_t w, \ + const uint32_t *dst, \ + const uint32_t *src, \ + const uint32_t *mask); \ + \ +void \ +pixman_composite_scanline_##name##_asm_neon (int32_t w, \ + const uint32_t *dst, \ + const uint32_t *src); \ + \ +static void \ +neon_combine_##name##_u (pixman_implementation_t *imp, \ + pixman_op_t op, \ + uint32_t * dest, \ + const uint32_t * src, \ + const uint32_t * mask, \ + int width) \ +{ \ + if (mask) \ + pixman_composite_scanline_##name##_mask_asm_neon (width, dest, \ + src, mask); \ + else \ + pixman_composite_scanline_##name##_asm_neon (width, dest, src); \ +} + +BIND_COMBINE_U (over) +BIND_COMBINE_U (add) +BIND_COMBINE_U (out_reverse) + +pixman_implementation_t * +_pixman_implementation_create_arm_neon (pixman_implementation_t *fallback) +{ + pixman_implementation_t *imp = + _pixman_implementation_create (fallback, arm_neon_fast_paths); + + imp->combine_32[PIXMAN_OP_OVER] = neon_combine_over_u; + imp->combine_32[PIXMAN_OP_ADD] = neon_combine_add_u; + imp->combine_32[PIXMAN_OP_OUT_REVERSE] = neon_combine_out_reverse_u; + + imp->blt = arm_neon_blt; + imp->fill = arm_neon_fill; + + return imp; +} diff --git a/src/pixman/pixman/pixman-arm-simd-asm-scaled.S b/src/pixman/pixman/pixman-arm-simd-asm-scaled.S new file mode 100644 index 0000000..7110995 --- /dev/null +++ b/src/pixman/pixman/pixman-arm-simd-asm-scaled.S @@ -0,0 +1,165 @@ +/* + * Copyright © 2008 Mozilla Corporation + * Copyright © 2010 Nokia Corporation + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of Mozilla Corporation not be used in + * advertising or publicity pertaining to distribution of the software without + * specific, written prior permission. Mozilla Corporation makes no + * representations about the suitability of this software for any purpose. It + * is provided "as is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + * + * Author: Jeff Muizelaar (jeff@infidigm.net) + * + */ + +/* Prevent the stack from becoming executable */ +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif + + .text + .arch armv6 + .object_arch armv4 + .arm + .altmacro + .p2align 2 + +/* Supplementary macro for setting function attributes */ +.macro pixman_asm_function fname + .func fname + .global fname +#ifdef __ELF__ + .hidden fname + .type fname, %function +#endif +fname: +.endm + +/* + * Note: This code is only using armv5te instructions (not even armv6), + * but is scheduled for ARM Cortex-A8 pipeline. So it might need to + * be split into a few variants, tuned for each microarchitecture. + * + * TODO: In order to get good performance on ARM9/ARM11 cores (which don't + * have efficient write combining), it needs to be changed to use 16-byte + * aligned writes using STM instruction. + * + * Nearest scanline scaler macro template uses the following arguments: + * fname - name of the function to generate + * bpp_shift - (1 << bpp_shift) is the size of pixel in bytes + * t - type suffix for LDR/STR instructions + * prefetch_distance - prefetch in the source image by that many + * pixels ahead + * prefetch_braking_distance - stop prefetching when that many pixels are + * remaining before the end of scanline + */ + +.macro generate_nearest_scanline_func fname, bpp_shift, t, \ + prefetch_distance, \ + prefetch_braking_distance + +pixman_asm_function fname + W .req r0 + DST .req r1 + SRC .req r2 + VX .req r3 + UNIT_X .req ip + TMP1 .req r4 + TMP2 .req r5 + VXMASK .req r6 + PF_OFFS .req r7 + SRC_WIDTH_FIXED .req r8 + + ldr UNIT_X, [sp] + push {r4, r5, r6, r7, r8, r10} + mvn VXMASK, #((1 << bpp_shift) - 1) + ldr SRC_WIDTH_FIXED, [sp, #28] + + /* define helper macro */ + .macro scale_2_pixels + ldr&t TMP1, [SRC, TMP1] + and TMP2, VXMASK, VX, asr #(16 - bpp_shift) + adds VX, VX, UNIT_X + str&t TMP1, [DST], #(1 << bpp_shift) +9: subpls VX, VX, SRC_WIDTH_FIXED + bpl 9b + + ldr&t TMP2, [SRC, TMP2] + and TMP1, VXMASK, VX, asr #(16 - bpp_shift) + adds VX, VX, UNIT_X + str&t TMP2, [DST], #(1 << bpp_shift) +9: subpls VX, VX, SRC_WIDTH_FIXED + bpl 9b + .endm + + /* now do the scaling */ + and TMP1, VXMASK, VX, asr #(16 - bpp_shift) + adds VX, VX, UNIT_X +9: subpls VX, VX, SRC_WIDTH_FIXED + bpl 9b + subs W, W, #(8 + prefetch_braking_distance) + blt 2f + /* calculate prefetch offset */ + mov PF_OFFS, #prefetch_distance + mla PF_OFFS, UNIT_X, PF_OFFS, VX +1: /* main loop, process 8 pixels per iteration with prefetch */ + pld [SRC, PF_OFFS, asr #(16 - bpp_shift)] + add PF_OFFS, UNIT_X, lsl #3 + scale_2_pixels + scale_2_pixels + scale_2_pixels + scale_2_pixels + subs W, W, #8 + bge 1b +2: + subs W, W, #(4 - 8 - prefetch_braking_distance) + blt 2f +1: /* process the remaining pixels */ + scale_2_pixels + scale_2_pixels + subs W, W, #4 + bge 1b +2: + tst W, #2 + beq 2f + scale_2_pixels +2: + tst W, #1 + ldrne&t TMP1, [SRC, TMP1] + strne&t TMP1, [DST] + /* cleanup helper macro */ + .purgem scale_2_pixels + .unreq DST + .unreq SRC + .unreq W + .unreq VX + .unreq UNIT_X + .unreq TMP1 + .unreq TMP2 + .unreq VXMASK + .unreq PF_OFFS + .unreq SRC_WIDTH_FIXED + /* return */ + pop {r4, r5, r6, r7, r8, r10} + bx lr +.endfunc +.endm + +generate_nearest_scanline_func \ + pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6, 1, h, 80, 32 + +generate_nearest_scanline_func \ + pixman_scaled_nearest_scanline_8888_8888_SRC_asm_armv6, 2, , 48, 32 diff --git a/src/pixman/pixman/pixman-arm-simd-asm.S b/src/pixman/pixman/pixman-arm-simd-asm.S new file mode 100644 index 0000000..c209688 --- /dev/null +++ b/src/pixman/pixman/pixman-arm-simd-asm.S @@ -0,0 +1,613 @@ +/* + * Copyright © 2012 Raspberry Pi Foundation + * Copyright © 2012 RISC OS Open Ltd + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of the copyright holders not be used in + * advertising or publicity pertaining to distribution of the software without + * specific, written prior permission. The copyright holders make no + * representations about the suitability of this software for any purpose. It + * is provided "as is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + * + * Author: Ben Avison (bavison@riscosopen.org) + * + */ + +/* Prevent the stack from becoming executable */ +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif + + .text + .arch armv6 + .object_arch armv4 + .arm + .altmacro + .p2align 2 + +#include "pixman-arm-simd-asm.h" + +/* A head macro should do all processing which results in an output of up to + * 16 bytes, as far as the final load instruction. The corresponding tail macro + * should complete the processing of the up-to-16 bytes. The calling macro will + * sometimes choose to insert a preload or a decrement of X between them. + * cond ARM condition code for code block + * numbytes Number of output bytes that should be generated this time + * firstreg First WK register in which to place output + * unaligned_src Whether to use non-wordaligned loads of source image + * unaligned_mask Whether to use non-wordaligned loads of mask image + * preload If outputting 16 bytes causes 64 bytes to be read, whether an extra preload should be output + */ + +.macro blit_init + line_saved_regs STRIDE_D, STRIDE_S +.endm + +.macro blit_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload + pixld cond, numbytes, firstreg, SRC, unaligned_src +.endm + +.macro blit_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment + WK4 .req STRIDE_D + WK5 .req STRIDE_S + WK6 .req MASK + WK7 .req STRIDE_M +110: pixld , 16, 0, SRC, unaligned_src + pixld , 16, 4, SRC, unaligned_src + pld [SRC, SCRATCH] + pixst , 16, 0, DST + pixst , 16, 4, DST + subs X, X, #32*8/src_bpp + bhs 110b + .unreq WK4 + .unreq WK5 + .unreq WK6 + .unreq WK7 +.endm + +generate_composite_function \ + pixman_composite_src_8888_8888_asm_armv6, 32, 0, 32, \ + FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_SPILL_LINE_VARS_WIDE | FLAG_PROCESS_PRESERVES_SCRATCH, \ + 4, /* prefetch distance */ \ + blit_init, \ + nop_macro, /* newline */ \ + nop_macro, /* cleanup */ \ + blit_process_head, \ + nop_macro, /* process tail */ \ + blit_inner_loop + +generate_composite_function \ + pixman_composite_src_0565_0565_asm_armv6, 16, 0, 16, \ + FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_SPILL_LINE_VARS_WIDE | FLAG_PROCESS_PRESERVES_SCRATCH, \ + 4, /* prefetch distance */ \ + blit_init, \ + nop_macro, /* newline */ \ + nop_macro, /* cleanup */ \ + blit_process_head, \ + nop_macro, /* process tail */ \ + blit_inner_loop + +generate_composite_function \ + pixman_composite_src_8_8_asm_armv6, 8, 0, 8, \ + FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_SPILL_LINE_VARS_WIDE | FLAG_PROCESS_PRESERVES_SCRATCH, \ + 3, /* prefetch distance */ \ + blit_init, \ + nop_macro, /* newline */ \ + nop_macro, /* cleanup */ \ + blit_process_head, \ + nop_macro, /* process tail */ \ + blit_inner_loop + +/******************************************************************************/ + +.macro src_n_8888_init + ldr SRC, [sp, #ARGS_STACK_OFFSET] + mov STRIDE_S, SRC + mov MASK, SRC + mov STRIDE_M, SRC +.endm + +.macro src_n_0565_init + ldrh SRC, [sp, #ARGS_STACK_OFFSET] + orr SRC, SRC, lsl #16 + mov STRIDE_S, SRC + mov MASK, SRC + mov STRIDE_M, SRC +.endm + +.macro src_n_8_init + ldrb SRC, [sp, #ARGS_STACK_OFFSET] + orr SRC, SRC, lsl #8 + orr SRC, SRC, lsl #16 + mov STRIDE_S, SRC + mov MASK, SRC + mov STRIDE_M, SRC +.endm + +.macro fill_process_tail cond, numbytes, firstreg + WK4 .req SRC + WK5 .req STRIDE_S + WK6 .req MASK + WK7 .req STRIDE_M + pixst cond, numbytes, 4, DST + .unreq WK4 + .unreq WK5 + .unreq WK6 + .unreq WK7 +.endm + +generate_composite_function \ + pixman_composite_src_n_8888_asm_armv6, 0, 0, 32, \ + FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_PROCESS_PRESERVES_PSR | FLAG_PROCESS_DOES_STORE | FLAG_PROCESS_PRESERVES_SCRATCH \ + 0, /* prefetch distance doesn't apply */ \ + src_n_8888_init \ + nop_macro, /* newline */ \ + nop_macro /* cleanup */ \ + nop_macro /* process head */ \ + fill_process_tail + +generate_composite_function \ + pixman_composite_src_n_0565_asm_armv6, 0, 0, 16, \ + FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_PROCESS_PRESERVES_PSR | FLAG_PROCESS_DOES_STORE | FLAG_PROCESS_PRESERVES_SCRATCH \ + 0, /* prefetch distance doesn't apply */ \ + src_n_0565_init \ + nop_macro, /* newline */ \ + nop_macro /* cleanup */ \ + nop_macro /* process head */ \ + fill_process_tail + +generate_composite_function \ + pixman_composite_src_n_8_asm_armv6, 0, 0, 8, \ + FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_PROCESS_PRESERVES_PSR | FLAG_PROCESS_DOES_STORE | FLAG_PROCESS_PRESERVES_SCRATCH \ + 0, /* prefetch distance doesn't apply */ \ + src_n_8_init \ + nop_macro, /* newline */ \ + nop_macro /* cleanup */ \ + nop_macro /* process head */ \ + fill_process_tail + +/******************************************************************************/ + +.macro src_x888_8888_pixel, cond, reg + orr&cond WK®, WK®, #0xFF000000 +.endm + +.macro pixman_composite_src_x888_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload + pixld cond, numbytes, firstreg, SRC, unaligned_src +.endm + +.macro pixman_composite_src_x888_8888_process_tail cond, numbytes, firstreg + src_x888_8888_pixel cond, %(firstreg+0) + .if numbytes >= 8 + src_x888_8888_pixel cond, %(firstreg+1) + .if numbytes == 16 + src_x888_8888_pixel cond, %(firstreg+2) + src_x888_8888_pixel cond, %(firstreg+3) + .endif + .endif +.endm + +generate_composite_function \ + pixman_composite_src_x888_8888_asm_armv6, 32, 0, 32, \ + FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_PROCESS_PRESERVES_SCRATCH, \ + 3, /* prefetch distance */ \ + nop_macro, /* init */ \ + nop_macro, /* newline */ \ + nop_macro, /* cleanup */ \ + pixman_composite_src_x888_8888_process_head, \ + pixman_composite_src_x888_8888_process_tail + +/******************************************************************************/ + +.macro src_0565_8888_init + /* Hold loop invariants in MASK and STRIDE_M */ + ldr MASK, =0x07E007E0 + mov STRIDE_M, #0xFF000000 + /* Set GE[3:0] to 1010 so SEL instructions do what we want */ + ldr SCRATCH, =0x80008000 + uadd8 SCRATCH, SCRATCH, SCRATCH +.endm + +.macro src_0565_8888_2pixels, reg1, reg2 + and SCRATCH, WK®1, MASK @ 00000GGGGGG0000000000gggggg00000 + bic WK®2, WK®1, MASK @ RRRRR000000BBBBBrrrrr000000bbbbb + orr SCRATCH, SCRATCH, SCRATCH, lsr #6 @ 00000GGGGGGGGGGGG0000ggggggggggg + mov WK®1, WK®2, lsl #16 @ rrrrr000000bbbbb0000000000000000 + mov SCRATCH, SCRATCH, ror #19 @ GGGG0000ggggggggggg00000GGGGGGGG + bic WK®2, WK®2, WK®1, lsr #16 @ RRRRR000000BBBBB0000000000000000 + orr WK®1, WK®1, WK®1, lsr #5 @ rrrrrrrrrr0bbbbbbbbbb00000000000 + orr WK®2, WK®2, WK®2, lsr #5 @ RRRRRRRRRR0BBBBBBBBBB00000000000 + pkhtb WK®1, WK®1, WK®1, asr #5 @ rrrrrrrr--------bbbbbbbb-------- + sel WK®1, WK®1, SCRATCH @ rrrrrrrrggggggggbbbbbbbb-------- + mov SCRATCH, SCRATCH, ror #16 @ ggg00000GGGGGGGGGGGG0000gggggggg + pkhtb WK®2, WK®2, WK®2, asr #5 @ RRRRRRRR--------BBBBBBBB-------- + sel WK®2, WK®2, SCRATCH @ RRRRRRRRGGGGGGGGBBBBBBBB-------- + orr WK®1, STRIDE_M, WK®1, lsr #8 @ 11111111rrrrrrrrggggggggbbbbbbbb + orr WK®2, STRIDE_M, WK®2, lsr #8 @ 11111111RRRRRRRRGGGGGGGGBBBBBBBB +.endm + +/* This version doesn't need STRIDE_M, but is one instruction longer. + It would however be preferable for an XRGB target, since we could knock off the last 2 instructions, but is that a common case? + and SCRATCH, WK®1, MASK @ 00000GGGGGG0000000000gggggg00000 + bic WK®1, WK®1, MASK @ RRRRR000000BBBBBrrrrr000000bbbbb + orr SCRATCH, SCRATCH, SCRATCH, lsr #6 @ 00000GGGGGGGGGGGG0000ggggggggggg + mov WK®2, WK®1, lsr #16 @ 0000000000000000RRRRR000000BBBBB + mov SCRATCH, SCRATCH, ror #27 @ GGGGGGGGGGGG0000ggggggggggg00000 + bic WK®1, WK®1, WK®2, lsl #16 @ 0000000000000000rrrrr000000bbbbb + mov WK®2, WK®2, lsl #3 @ 0000000000000RRRRR000000BBBBB000 + mov WK®1, WK®1, lsl #3 @ 0000000000000rrrrr000000bbbbb000 + orr WK®2, WK®2, WK®2, lsr #5 @ 0000000000000RRRRRRRRRR0BBBBBBBB + orr WK®1, WK®1, WK®1, lsr #5 @ 0000000000000rrrrrrrrrr0bbbbbbbb + pkhbt WK®2, WK®2, WK®2, lsl #5 @ --------RRRRRRRR--------BBBBBBBB + pkhbt WK®1, WK®1, WK®1, lsl #5 @ --------rrrrrrrr--------bbbbbbbb + sel WK®2, SCRATCH, WK®2 @ --------RRRRRRRRGGGGGGGGBBBBBBBB + sel WK®1, SCRATCH, WK®1 @ --------rrrrrrrrggggggggbbbbbbbb + orr WK®2, WK®2, #0xFF000000 @ 11111111RRRRRRRRGGGGGGGGBBBBBBBB + orr WK®1, WK®1, #0xFF000000 @ 11111111rrrrrrrrggggggggbbbbbbbb +*/ + +.macro src_0565_8888_1pixel, reg + bic SCRATCH, WK®, MASK @ 0000000000000000rrrrr000000bbbbb + and WK®, WK®, MASK @ 000000000000000000000gggggg00000 + mov SCRATCH, SCRATCH, lsl #3 @ 0000000000000rrrrr000000bbbbb000 + mov WK®, WK®, lsl #5 @ 0000000000000000gggggg0000000000 + orr SCRATCH, SCRATCH, SCRATCH, lsr #5 @ 0000000000000rrrrrrrrrr0bbbbbbbb + orr WK®, WK®, WK®, lsr #6 @ 000000000000000gggggggggggg00000 + pkhbt SCRATCH, SCRATCH, SCRATCH, lsl #5 @ --------rrrrrrrr--------bbbbbbbb + sel WK®, WK®, SCRATCH @ --------rrrrrrrrggggggggbbbbbbbb + orr WK®, WK®, #0xFF000000 @ 11111111rrrrrrrrggggggggbbbbbbbb +.endm + +.macro src_0565_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload + .if numbytes == 16 + pixldst ld,, 8, firstreg, %(firstreg+2),,, SRC, unaligned_src + .elseif numbytes == 8 + pixld , 4, firstreg, SRC, unaligned_src + .elseif numbytes == 4 + pixld , 2, firstreg, SRC, unaligned_src + .endif +.endm + +.macro src_0565_8888_process_tail cond, numbytes, firstreg + .if numbytes == 16 + src_0565_8888_2pixels firstreg, %(firstreg+1) + src_0565_8888_2pixels %(firstreg+2), %(firstreg+3) + .elseif numbytes == 8 + src_0565_8888_2pixels firstreg, %(firstreg+1) + .else + src_0565_8888_1pixel firstreg + .endif +.endm + +generate_composite_function \ + pixman_composite_src_0565_8888_asm_armv6, 16, 0, 32, \ + FLAG_DST_WRITEONLY | FLAG_BRANCH_OVER, \ + 3, /* prefetch distance */ \ + src_0565_8888_init, \ + nop_macro, /* newline */ \ + nop_macro, /* cleanup */ \ + src_0565_8888_process_head, \ + src_0565_8888_process_tail + +/******************************************************************************/ + +.macro add_8_8_8pixels cond, dst1, dst2 + uqadd8&cond WK&dst1, WK&dst1, MASK + uqadd8&cond WK&dst2, WK&dst2, STRIDE_M +.endm + +.macro add_8_8_4pixels cond, dst + uqadd8&cond WK&dst, WK&dst, MASK +.endm + +.macro add_8_8_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload + WK4 .req MASK + WK5 .req STRIDE_M + .if numbytes == 16 + pixld cond, 8, 4, SRC, unaligned_src + pixld cond, 16, firstreg, DST, 0 + add_8_8_8pixels cond, firstreg, %(firstreg+1) + pixld cond, 8, 4, SRC, unaligned_src + .else + pixld cond, numbytes, 4, SRC, unaligned_src + pixld cond, numbytes, firstreg, DST, 0 + .endif + .unreq WK4 + .unreq WK5 +.endm + +.macro add_8_8_process_tail cond, numbytes, firstreg + .if numbytes == 16 + add_8_8_8pixels cond, %(firstreg+2), %(firstreg+3) + .elseif numbytes == 8 + add_8_8_8pixels cond, firstreg, %(firstreg+1) + .else + add_8_8_4pixels cond, firstreg + .endif +.endm + +generate_composite_function \ + pixman_composite_add_8_8_asm_armv6, 8, 0, 8, \ + FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_PRESERVES_SCRATCH, \ + 2, /* prefetch distance */ \ + nop_macro, /* init */ \ + nop_macro, /* newline */ \ + nop_macro, /* cleanup */ \ + add_8_8_process_head, \ + add_8_8_process_tail + +/******************************************************************************/ + +.macro over_8888_8888_init + /* Hold loop invariant in MASK */ + ldr MASK, =0x00800080 + /* Set GE[3:0] to 0101 so SEL instructions do what we want */ + uadd8 SCRATCH, MASK, MASK + line_saved_regs STRIDE_D, STRIDE_S, ORIG_W +.endm + +.macro over_8888_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload + WK4 .req STRIDE_D + WK5 .req STRIDE_S + WK6 .req STRIDE_M + WK7 .req ORIG_W + pixld , numbytes, %(4+firstreg), SRC, unaligned_src + pixld , numbytes, firstreg, DST, 0 + .unreq WK4 + .unreq WK5 + .unreq WK6 + .unreq WK7 +.endm + +.macro over_8888_8888_check_transparent numbytes, reg0, reg1, reg2, reg3 + /* Since these colours a premultiplied by alpha, only 0 indicates transparent (any other colour with 0 in the alpha byte is luminous) */ + teq WK®0, #0 + .if numbytes > 4 + teqeq WK®1, #0 + .if numbytes > 8 + teqeq WK®2, #0 + teqeq WK®3, #0 + .endif + .endif +.endm + +.macro over_8888_8888_prepare next + mov WK&next, WK&next, lsr #24 +.endm + +.macro over_8888_8888_1pixel src, dst, offset, next + /* src = destination component multiplier */ + rsb WK&src, WK&src, #255 + /* Split even/odd bytes of dst into SCRATCH/dst */ + uxtb16 SCRATCH, WK&dst + uxtb16 WK&dst, WK&dst, ror #8 + /* Multiply through, adding 0.5 to the upper byte of result for rounding */ + mla SCRATCH, SCRATCH, WK&src, MASK + mla WK&dst, WK&dst, WK&src, MASK + /* Where we would have had a stall between the result of the first MLA and the shifter input, + * reload the complete source pixel */ + ldr WK&src, [SRC, #offset] + /* Multiply by 257/256 to approximate 256/255 */ + uxtab16 SCRATCH, SCRATCH, SCRATCH, ror #8 + /* In this stall, start processing the next pixel */ + .if offset < -4 + mov WK&next, WK&next, lsr #24 + .endif + uxtab16 WK&dst, WK&dst, WK&dst, ror #8 + /* Recombine even/odd bytes of multiplied destination */ + mov SCRATCH, SCRATCH, ror #8 + sel WK&dst, SCRATCH, WK&dst + /* Saturated add of source to multiplied destination */ + uqadd8 WK&dst, WK&dst, WK&src +.endm + +.macro over_8888_8888_process_tail cond, numbytes, firstreg + WK4 .req STRIDE_D + WK5 .req STRIDE_S + WK6 .req STRIDE_M + WK7 .req ORIG_W + over_8888_8888_check_transparent numbytes, %(4+firstreg), %(5+firstreg), %(6+firstreg), %(7+firstreg) + beq 10f + over_8888_8888_prepare %(4+firstreg) + .set PROCESS_REG, firstreg + .set PROCESS_OFF, -numbytes + .rept numbytes / 4 + over_8888_8888_1pixel %(4+PROCESS_REG), %(0+PROCESS_REG), PROCESS_OFF, %(5+PROCESS_REG) + .set PROCESS_REG, PROCESS_REG+1 + .set PROCESS_OFF, PROCESS_OFF+4 + .endr + pixst , numbytes, firstreg, DST +10: + .unreq WK4 + .unreq WK5 + .unreq WK6 + .unreq WK7 +.endm + +generate_composite_function \ + pixman_composite_over_8888_8888_asm_armv6, 32, 0, 32 \ + FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS \ + 2, /* prefetch distance */ \ + over_8888_8888_init, \ + nop_macro, /* newline */ \ + nop_macro, /* cleanup */ \ + over_8888_8888_process_head, \ + over_8888_8888_process_tail + +/******************************************************************************/ + +/* Multiply each byte of a word by a byte. + * Useful when there aren't any obvious ways to fill the stalls with other instructions. + * word Register containing 4 bytes + * byte Register containing byte multiplier (bits 8-31 must be 0) + * tmp Scratch register + * half Register containing the constant 0x00800080 + * GE[3:0] bits must contain 0101 + */ +.macro mul_8888_8 word, byte, tmp, half + /* Split even/odd bytes of word apart */ + uxtb16 tmp, word + uxtb16 word, word, ror #8 + /* Multiply bytes together with rounding, then by 257/256 */ + mla tmp, tmp, byte, half + mla word, word, byte, half /* 1 stall follows */ + uxtab16 tmp, tmp, tmp, ror #8 /* 1 stall follows */ + uxtab16 word, word, word, ror #8 + /* Recombine bytes */ + mov tmp, tmp, ror #8 + sel word, tmp, word +.endm + +/******************************************************************************/ + +.macro over_8888_n_8888_init + /* Mask is constant */ + ldr MASK, [sp, #ARGS_STACK_OFFSET+8] + /* Hold loop invariant in STRIDE_M */ + ldr STRIDE_M, =0x00800080 + /* We only want the alpha bits of the constant mask */ + mov MASK, MASK, lsr #24 + /* Set GE[3:0] to 0101 so SEL instructions do what we want */ + uadd8 SCRATCH, STRIDE_M, STRIDE_M + line_saved_regs Y, STRIDE_D, STRIDE_S, ORIG_W +.endm + +.macro over_8888_n_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload + WK4 .req Y + WK5 .req STRIDE_D + WK6 .req STRIDE_S + WK7 .req ORIG_W + pixld , numbytes, %(4+(firstreg%2)), SRC, unaligned_src + pixld , numbytes, firstreg, DST, 0 + .unreq WK4 + .unreq WK5 + .unreq WK6 + .unreq WK7 +.endm + +.macro over_8888_n_8888_1pixel src, dst + mul_8888_8 WK&src, MASK, SCRATCH, STRIDE_M + sub WK7, WK6, WK&src, lsr #24 + mul_8888_8 WK&dst, WK7, SCRATCH, STRIDE_M + uqadd8 WK&dst, WK&dst, WK&src +.endm + +.macro over_8888_n_8888_process_tail cond, numbytes, firstreg + WK4 .req Y + WK5 .req STRIDE_D + WK6 .req STRIDE_S + WK7 .req ORIG_W + over_8888_8888_check_transparent numbytes, %(4+(firstreg%2)), %(5+(firstreg%2)), %(6+firstreg), %(7+firstreg) + beq 10f + mov WK6, #255 + .set PROCESS_REG, firstreg + .rept numbytes / 4 + .if numbytes == 16 && PROCESS_REG == 2 + /* We're using WK6 and WK7 as temporaries, so half way through + * 4 pixels, reload the second two source pixels but this time + * into WK4 and WK5 */ + ldmdb SRC, {WK4, WK5} + .endif + over_8888_n_8888_1pixel %(4+(PROCESS_REG%2)), %(PROCESS_REG) + .set PROCESS_REG, PROCESS_REG+1 + .endr + pixst , numbytes, firstreg, DST +10: + .unreq WK4 + .unreq WK5 + .unreq WK6 + .unreq WK7 +.endm + +generate_composite_function \ + pixman_composite_over_8888_n_8888_asm_armv6, 32, 0, 32 \ + FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS \ + 2, /* prefetch distance */ \ + over_8888_n_8888_init, \ + nop_macro, /* newline */ \ + nop_macro, /* cleanup */ \ + over_8888_n_8888_process_head, \ + over_8888_n_8888_process_tail + +/******************************************************************************/ + +.macro over_n_8_8888_init + /* Source is constant, but splitting it into even/odd bytes is a loop invariant */ + ldr SRC, [sp, #ARGS_STACK_OFFSET] + /* Not enough registers to hold this constant, but we still use it here to set GE[3:0] */ + ldr SCRATCH, =0x00800080 + uxtb16 STRIDE_S, SRC + uxtb16 SRC, SRC, ror #8 + /* Set GE[3:0] to 0101 so SEL instructions do what we want */ + uadd8 SCRATCH, SCRATCH, SCRATCH + line_saved_regs Y, STRIDE_D, STRIDE_M, ORIG_W +.endm + +.macro over_n_8_8888_newline + ldr STRIDE_D, =0x00800080 + b 1f + .ltorg +1: +.endm + +.macro over_n_8_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload + WK4 .req STRIDE_M + pixld , numbytes/4, 4, MASK, unaligned_mask + pixld , numbytes, firstreg, DST, 0 + .unreq WK4 +.endm + +.macro over_n_8_8888_1pixel src, dst + uxtb Y, WK4, ror #src*8 + /* Trailing part of multiplication of source */ + mla SCRATCH, STRIDE_S, Y, STRIDE_D + mla Y, SRC, Y, STRIDE_D + mov ORIG_W, #255 + uxtab16 SCRATCH, SCRATCH, SCRATCH, ror #8 + uxtab16 Y, Y, Y, ror #8 + mov SCRATCH, SCRATCH, ror #8 + sub ORIG_W, ORIG_W, Y, lsr #24 + sel Y, SCRATCH, Y + /* Then multiply the destination */ + mul_8888_8 WK&dst, ORIG_W, SCRATCH, STRIDE_D + uqadd8 WK&dst, WK&dst, Y +.endm + +.macro over_n_8_8888_process_tail cond, numbytes, firstreg + WK4 .req STRIDE_M + teq WK4, #0 + beq 10f + .set PROCESS_REG, firstreg + .rept numbytes / 4 + over_n_8_8888_1pixel %(PROCESS_REG-firstreg), %(PROCESS_REG) + .set PROCESS_REG, PROCESS_REG+1 + .endr + pixst , numbytes, firstreg, DST +10: + .unreq WK4 +.endm + +generate_composite_function \ + pixman_composite_over_n_8_8888_asm_armv6, 0, 8, 32 \ + FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS \ + 2, /* prefetch distance */ \ + over_n_8_8888_init, \ + over_n_8_8888_newline, \ + nop_macro, /* cleanup */ \ + over_n_8_8888_process_head, \ + over_n_8_8888_process_tail + +/******************************************************************************/ + diff --git a/src/pixman/pixman/pixman-arm-simd-asm.h b/src/pixman/pixman/pixman-arm-simd-asm.h new file mode 100644 index 0000000..6543606 --- /dev/null +++ b/src/pixman/pixman/pixman-arm-simd-asm.h @@ -0,0 +1,908 @@ +/* + * Copyright © 2012 Raspberry Pi Foundation + * Copyright © 2012 RISC OS Open Ltd + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of the copyright holders not be used in + * advertising or publicity pertaining to distribution of the software without + * specific, written prior permission. The copyright holders make no + * representations about the suitability of this software for any purpose. It + * is provided "as is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + * + * Author: Ben Avison (bavison@riscosopen.org) + * + */ + +/* + * Because the alignment of pixel data to cachelines, and even the number of + * cachelines per row can vary from row to row, and because of the need to + * preload each scanline once and only once, this prefetch strategy treats + * each row of pixels independently. When a pixel row is long enough, there + * are three distinct phases of prefetch: + * * an inner loop section, where each time a cacheline of data is + * processed, another cacheline is preloaded (the exact distance ahead is + * determined empirically using profiling results from lowlevel-blt-bench) + * * a leading section, where enough cachelines are preloaded to ensure no + * cachelines escape being preloaded when the inner loop starts + * * a trailing section, where a limited number (0 or more) of cachelines + * are preloaded to deal with data (if any) that hangs off the end of the + * last iteration of the inner loop, plus any trailing bytes that were not + * enough to make up one whole iteration of the inner loop + * + * There are (in general) three distinct code paths, selected between + * depending upon how long the pixel row is. If it is long enough that there + * is at least one iteration of the inner loop (as described above) then + * this is described as the "wide" case. If it is shorter than that, but + * there are still enough bytes output that there is at least one 16-byte- + * long, 16-byte-aligned write to the destination (the optimum type of + * write), then this is the "medium" case. If it is not even this long, then + * this is the "narrow" case, and there is no attempt to align writes to + * 16-byte boundaries. In the "medium" and "narrow" cases, all the + * cachelines containing data from the pixel row are prefetched up-front. + */ + +/* + * Determine whether we put the arguments on the stack for debugging. + */ +#undef DEBUG_PARAMS + +/* + * Bit flags for 'generate_composite_function' macro which are used + * to tune generated functions behavior. + */ +.set FLAG_DST_WRITEONLY, 0 +.set FLAG_DST_READWRITE, 1 +.set FLAG_COND_EXEC, 0 +.set FLAG_BRANCH_OVER, 2 +.set FLAG_PROCESS_PRESERVES_PSR, 0 +.set FLAG_PROCESS_CORRUPTS_PSR, 4 +.set FLAG_PROCESS_DOESNT_STORE, 0 +.set FLAG_PROCESS_DOES_STORE, 8 /* usually because it needs to conditionally skip it */ +.set FLAG_NO_SPILL_LINE_VARS, 0 +.set FLAG_SPILL_LINE_VARS_WIDE, 16 +.set FLAG_SPILL_LINE_VARS_NON_WIDE, 32 +.set FLAG_SPILL_LINE_VARS, 48 +.set FLAG_PROCESS_CORRUPTS_SCRATCH, 0 +.set FLAG_PROCESS_PRESERVES_SCRATCH, 64 + +/* + * Offset into stack where mask and source pointer/stride can be accessed. + */ +#ifdef DEBUG_PARAMS +.set ARGS_STACK_OFFSET, (9*4+9*4) +#else +.set ARGS_STACK_OFFSET, (9*4) +#endif + +/* + * Constants for selecting preferable prefetch type. + */ +.set PREFETCH_TYPE_NONE, 0 +.set PREFETCH_TYPE_STANDARD, 1 + +/* + * Definitions of macros for load/store of pixel data. + */ + +.macro pixldst op, cond=al, numbytes, reg0, reg1, reg2, reg3, base, unaligned=0 + .if numbytes == 16 + .if unaligned == 1 + op&r&cond WK®0, [base], #4 + op&r&cond WK®1, [base], #4 + op&r&cond WK®2, [base], #4 + op&r&cond WK®3, [base], #4 + .else + op&m&cond&ia base!, {WK®0,WK®1,WK®2,WK®3} + .endif + .elseif numbytes == 8 + .if unaligned == 1 + op&r&cond WK®0, [base], #4 + op&r&cond WK®1, [base], #4 + .else + op&m&cond&ia base!, {WK®0,WK®1} + .endif + .elseif numbytes == 4 + op&r&cond WK®0, [base], #4 + .elseif numbytes == 2 + op&r&cond&h WK®0, [base], #2 + .elseif numbytes == 1 + op&r&cond&b WK®0, [base], #1 + .else + .error "unsupported size: numbytes" + .endif +.endm + +.macro pixst_baseupdated cond, numbytes, reg0, reg1, reg2, reg3, base + .if numbytes == 16 + stm&cond&db base, {WK®0,WK®1,WK®2,WK®3} + .elseif numbytes == 8 + stm&cond&db base, {WK®0,WK®1} + .elseif numbytes == 4 + str&cond WK®0, [base, #-4] + .elseif numbytes == 2 + str&cond&h WK®0, [base, #-2] + .elseif numbytes == 1 + str&cond&b WK®0, [base, #-1] + .else + .error "unsupported size: numbytes" + .endif +.endm + +.macro pixld cond, numbytes, firstreg, base, unaligned + pixldst ld, cond, numbytes, %(firstreg+0), %(firstreg+1), %(firstreg+2), %(firstreg+3), base, unaligned +.endm + +.macro pixst cond, numbytes, firstreg, base + .if (flags) & FLAG_DST_READWRITE + pixst_baseupdated cond, numbytes, %(firstreg+0), %(firstreg+1), %(firstreg+2), %(firstreg+3), base + .else + pixldst st, cond, numbytes, %(firstreg+0), %(firstreg+1), %(firstreg+2), %(firstreg+3), base + .endif +.endm + +.macro PF a, x:vararg + .if (PREFETCH_TYPE_CURRENT == PREFETCH_TYPE_STANDARD) + a x + .endif +.endm + + +.macro preload_leading_step1 bpp, ptr, base +/* If the destination is already 16-byte aligned, then we need to preload + * between 0 and prefetch_distance (inclusive) cache lines ahead so there + * are no gaps when the inner loop starts. + */ + .if bpp > 0 + PF bic, ptr, base, #31 + .set OFFSET, 0 + .rept prefetch_distance+1 + PF pld, [ptr, #OFFSET] + .set OFFSET, OFFSET+32 + .endr + .endif +.endm + +.macro preload_leading_step2 bpp, bpp_shift, ptr, base +/* However, if the destination is not 16-byte aligned, we may need to + * preload more cache lines than that. The question we need to ask is: + * are the bytes corresponding to the leading pixels more than the amount + * by which the source pointer will be rounded down for preloading, and if + * so, by how many cache lines? Effectively, we want to calculate + * leading_bytes = ((-dst)&15)*src_bpp/dst_bpp + * inner_loop_offset = (src+leading_bytes)&31 + * extra_needed = leading_bytes - inner_loop_offset + * and test if extra_needed is <= 0, <= 32, or > 32 (where > 32 is only + * possible when there are 4 src bytes for every 1 dst byte). + */ + .if bpp > 0 + .ifc base,DST + /* The test can be simplified further when preloading the destination */ + PF tst, base, #16 + PF beq, 61f + .else + .if bpp/dst_w_bpp == 4 + PF add, SCRATCH, base, WK0, lsl #bpp_shift-dst_bpp_shift + PF and, SCRATCH, SCRATCH, #31 + PF rsb, SCRATCH, SCRATCH, WK0, lsl #bpp_shift-dst_bpp_shift + PF sub, SCRATCH, SCRATCH, #1 /* so now ranges are -16..-1 / 0..31 / 32..63 */ + PF movs, SCRATCH, SCRATCH, #32-6 /* so this sets NC / nc / Nc */ + PF bcs, 61f + PF bpl, 60f + PF pld, [ptr, #32*(prefetch_distance+2)] + .else + PF mov, SCRATCH, base, lsl #32-5 + PF add, SCRATCH, SCRATCH, WK0, lsl #32-5+bpp_shift-dst_bpp_shift + PF rsbs, SCRATCH, SCRATCH, WK0, lsl #32-5+bpp_shift-dst_bpp_shift + PF bls, 61f + .endif + .endif +60: PF pld, [ptr, #32*(prefetch_distance+1)] +61: + .endif +.endm + +#define IS_END_OF_GROUP(INDEX,SIZE) ((SIZE) < 2 || ((INDEX) & ~((INDEX)+1)) & ((SIZE)/2)) +.macro preload_middle bpp, base, scratch_holds_offset + .if bpp > 0 + /* prefetch distance = 256/bpp, stm distance = 128/dst_w_bpp */ + .if IS_END_OF_GROUP(SUBBLOCK,256/128*dst_w_bpp/bpp) + .if scratch_holds_offset + PF pld, [base, SCRATCH] + .else + PF bic, SCRATCH, base, #31 + PF pld, [SCRATCH, #32*prefetch_distance] + .endif + .endif + .endif +.endm + +.macro preload_trailing bpp, bpp_shift, base + .if bpp > 0 + .if bpp*pix_per_block > 256 + /* Calculations are more complex if more than one fetch per block */ + PF and, WK1, base, #31 + PF add, WK1, WK1, WK0, lsl #bpp_shift + PF add, WK1, WK1, #32*(bpp*pix_per_block/256-1)*(prefetch_distance+1) + PF bic, SCRATCH, base, #31 +80: PF pld, [SCRATCH, #32*(prefetch_distance+1)] + PF add, SCRATCH, SCRATCH, #32 + PF subs, WK1, WK1, #32 + PF bhi, 80b + .else + /* If exactly one fetch per block, then we need either 0, 1 or 2 extra preloads */ + PF mov, SCRATCH, base, lsl #32-5 + PF adds, SCRATCH, SCRATCH, X, lsl #32-5+bpp_shift + PF adceqs, SCRATCH, SCRATCH, #0 + /* The instruction above has two effects: ensures Z is only + * set if C was clear (so Z indicates that both shifted quantities + * were 0), and clears C if Z was set (so C indicates that the sum + * of the shifted quantities was greater and not equal to 32) */ + PF beq, 82f + PF bic, SCRATCH, base, #31 + PF bcc, 81f + PF pld, [SCRATCH, #32*(prefetch_distance+2)] +81: PF pld, [SCRATCH, #32*(prefetch_distance+1)] +82: + .endif + .endif +.endm + + +.macro preload_line narrow_case, bpp, bpp_shift, base +/* "narrow_case" - just means that the macro was invoked from the "narrow" + * code path rather than the "medium" one - because in the narrow case, + * the row of pixels is known to output no more than 30 bytes, then + * (assuming the source pixels are no wider than the the destination + * pixels) they cannot possibly straddle more than 2 32-byte cachelines, + * meaning there's no need for a loop. + * "bpp" - number of bits per pixel in the channel (source, mask or + * destination) that's being preloaded, or 0 if this channel is not used + * for reading + * "bpp_shift" - log2 of ("bpp"/8) (except if "bpp"=0 of course) + * "base" - base address register of channel to preload (SRC, MASK or DST) + */ + .if bpp > 0 + .if narrow_case && (bpp <= dst_w_bpp) + /* In these cases, each line for each channel is in either 1 or 2 cache lines */ + PF bic, WK0, base, #31 + PF pld, [WK0] + PF add, WK1, base, X, LSL #bpp_shift + PF sub, WK1, WK1, #1 + PF bic, WK1, WK1, #31 + PF cmp, WK1, WK0 + PF beq, 90f + PF pld, [WK1] +90: + .else + PF bic, WK0, base, #31 + PF pld, [WK0] + PF add, WK1, base, X, lsl #bpp_shift + PF sub, WK1, WK1, #1 + PF bic, WK1, WK1, #31 + PF cmp, WK1, WK0 + PF beq, 92f +91: PF add, WK0, WK0, #32 + PF cmp, WK0, WK1 + PF pld, [WK0] + PF bne, 91b +92: + .endif + .endif +.endm + + +.macro conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx + process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, 0 + .if decrementx + sub&cond X, X, #8*numbytes/dst_w_bpp + .endif + process_tail cond, numbytes, firstreg + .if !((flags) & FLAG_PROCESS_DOES_STORE) + pixst cond, numbytes, firstreg, DST + .endif +.endm + +.macro conditional_process1 cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx + .if (flags) & FLAG_BRANCH_OVER + .ifc cond,mi + bpl 100f + .endif + .ifc cond,cs + bcc 100f + .endif + .ifc cond,ne + beq 100f + .endif + conditional_process1_helper , process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx +100: + .else + conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx + .endif +.endm + +.macro conditional_process2 test, cond1, cond2, process_head, process_tail, numbytes1, numbytes2, firstreg1, firstreg2, unaligned_src, unaligned_mask, decrementx + .if (flags) & (FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE) + /* Can't interleave reads and writes */ + test + conditional_process1 cond1, process_head, process_tail, numbytes1, firstreg1, unaligned_src, unaligned_mask, decrementx + .if (flags) & FLAG_PROCESS_CORRUPTS_PSR + test + .endif + conditional_process1 cond2, process_head, process_tail, numbytes2, firstreg2, unaligned_src, unaligned_mask, decrementx + .else + /* Can interleave reads and writes for better scheduling */ + test + process_head cond1, numbytes1, firstreg1, unaligned_src, unaligned_mask, 0 + process_head cond2, numbytes2, firstreg2, unaligned_src, unaligned_mask, 0 + .if decrementx + sub&cond1 X, X, #8*numbytes1/dst_w_bpp + sub&cond2 X, X, #8*numbytes2/dst_w_bpp + .endif + process_tail cond1, numbytes1, firstreg1 + process_tail cond2, numbytes2, firstreg2 + pixst cond1, numbytes1, firstreg1, DST + pixst cond2, numbytes2, firstreg2, DST + .endif +.endm + + +.macro test_bits_1_0_ptr + movs SCRATCH, WK0, lsl #32-1 /* C,N = bits 1,0 of DST */ +.endm + +.macro test_bits_3_2_ptr + movs SCRATCH, WK0, lsl #32-3 /* C,N = bits 3, 2 of DST */ +.endm + +.macro leading_15bytes process_head, process_tail + /* On entry, WK0 bits 0-3 = number of bytes until destination is 16-byte aligned */ + /* Use unaligned loads in all cases for simplicity */ + .if dst_w_bpp == 8 + conditional_process2 test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, 1 + .elseif dst_w_bpp == 16 + test_bits_1_0_ptr + conditional_process1 cs, process_head, process_tail, 2, 2, 1, 1, 1 + .endif + conditional_process2 test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, 1 +.endm + +.macro test_bits_3_2_pix + movs SCRATCH, X, lsl #dst_bpp_shift+32-3 +.endm + +.macro test_bits_1_0_pix + .if dst_w_bpp == 8 + movs SCRATCH, X, lsl #dst_bpp_shift+32-1 + .else + movs SCRATCH, X, lsr #1 + .endif +.endm + +.macro trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask + conditional_process2 test_bits_3_2_pix, cs, mi, process_head, process_tail, 8, 4, 0, 2, unaligned_src, unaligned_mask, 0 + .if dst_w_bpp == 16 + test_bits_1_0_pix + conditional_process1 cs, process_head, process_tail, 2, 0, unaligned_src, unaligned_mask, 0 + .elseif dst_w_bpp == 8 + conditional_process2 test_bits_1_0_pix, cs, mi, process_head, process_tail, 2, 1, 0, 1, unaligned_src, unaligned_mask, 0 + .endif +.endm + + +.macro wide_case_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment +110: + .set SUBBLOCK, 0 /* this is a count of STMs; there can be up to 8 STMs per block */ + .rept pix_per_block*dst_w_bpp/128 + process_head , 16, 0, unaligned_src, unaligned_mask, 1 + .if (src_bpp > 0) && (mask_bpp == 0) && ((flags) & FLAG_PROCESS_PRESERVES_SCRATCH) + preload_middle src_bpp, SRC, 1 + .elseif (src_bpp == 0) && (mask_bpp > 0) && ((flags) & FLAG_PROCESS_PRESERVES_SCRATCH) + preload_middle mask_bpp, MASK, 1 + .else + preload_middle src_bpp, SRC, 0 + preload_middle mask_bpp, MASK, 0 + .endif + .if (dst_r_bpp > 0) && ((SUBBLOCK % 2) == 0) + /* Because we know that writes are 16-byte aligned, it's relatively easy to ensure that + * destination prefetches are 32-byte aligned. It's also the easiest channel to offset + * preloads for, to achieve staggered prefetches for multiple channels, because there are + * always two STMs per prefetch, so there is always an opposite STM on which to put the + * preload. Note, no need to BIC the base register here */ + PF pld, [DST, #32*prefetch_distance - dst_alignment] + .endif + process_tail , 16, 0 + .if !((flags) & FLAG_PROCESS_DOES_STORE) + pixst , 16, 0, DST + .endif + .set SUBBLOCK, SUBBLOCK+1 + .endr + subs X, X, #pix_per_block + bhs 110b +.endm + +.macro wide_case_inner_loop_and_trailing_pixels process_head, process_tail, process_inner_loop, exit_label, unaligned_src, unaligned_mask + /* Destination now 16-byte aligned; we have at least one block before we have to stop preloading */ + .if dst_r_bpp > 0 + tst DST, #16 + bne 111f + process_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, 16 + b 112f +111: + .endif + process_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, 0 +112: + /* Just before the final (prefetch_distance+1) 32-byte blocks, deal with final preloads */ + .if (src_bpp*pix_per_block > 256) || (mask_bpp*pix_per_block > 256) || (dst_r_bpp*pix_per_block > 256) + PF and, WK0, X, #pix_per_block-1 + .endif + preload_trailing src_bpp, src_bpp_shift, SRC + preload_trailing mask_bpp, mask_bpp_shift, MASK + preload_trailing dst_r_bpp, dst_bpp_shift, DST + add X, X, #(prefetch_distance+2)*pix_per_block - 128/dst_w_bpp + /* The remainder of the line is handled identically to the medium case */ + medium_case_inner_loop_and_trailing_pixels process_head, process_tail,, exit_label, unaligned_src, unaligned_mask +.endm + +.macro medium_case_inner_loop_and_trailing_pixels process_head, process_tail, unused, exit_label, unaligned_src, unaligned_mask +120: + process_head , 16, 0, unaligned_src, unaligned_mask, 0 + process_tail , 16, 0 + .if !((flags) & FLAG_PROCESS_DOES_STORE) + pixst , 16, 0, DST + .endif + subs X, X, #128/dst_w_bpp + bhs 120b + /* Trailing pixels */ + tst X, #128/dst_w_bpp - 1 + beq exit_label + trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask +.endm + +.macro narrow_case_inner_loop_and_trailing_pixels process_head, process_tail, unused, exit_label, unaligned_src, unaligned_mask + tst X, #16*8/dst_w_bpp + conditional_process1 ne, process_head, process_tail, 16, 0, unaligned_src, unaligned_mask, 0 + /* Trailing pixels */ + /* In narrow case, it's relatively unlikely to be aligned, so let's do without a branch here */ + trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask +.endm + +.macro switch_on_alignment action, process_head, process_tail, process_inner_loop, exit_label + /* Note that if we're reading the destination, it's already guaranteed to be aligned at this point */ + .if mask_bpp == 8 || mask_bpp == 16 + tst MASK, #3 + bne 141f + .endif + .if src_bpp == 8 || src_bpp == 16 + tst SRC, #3 + bne 140f + .endif + action process_head, process_tail, process_inner_loop, exit_label, 0, 0 + .if src_bpp == 8 || src_bpp == 16 + b exit_label +140: + action process_head, process_tail, process_inner_loop, exit_label, 1, 0 + .endif + .if mask_bpp == 8 || mask_bpp == 16 + b exit_label +141: + .if src_bpp == 8 || src_bpp == 16 + tst SRC, #3 + bne 142f + .endif + action process_head, process_tail, process_inner_loop, exit_label, 0, 1 + .if src_bpp == 8 || src_bpp == 16 + b exit_label +142: + action process_head, process_tail, process_inner_loop, exit_label, 1, 1 + .endif + .endif +.endm + + +.macro end_of_line restore_x, vars_spilled, loop_label, last_one + .if vars_spilled + /* Sadly, GAS doesn't seem have an equivalent of the DCI directive? */ + /* This is ldmia sp,{} */ + .word 0xE89D0000 | LINE_SAVED_REGS + .endif + subs Y, Y, #1 + .if vars_spilled + .if (LINE_SAVED_REGS) & (1<<1) + str Y, [sp] + .endif + .endif + add DST, DST, STRIDE_D + .if src_bpp > 0 + add SRC, SRC, STRIDE_S + .endif + .if mask_bpp > 0 + add MASK, MASK, STRIDE_M + .endif + .if restore_x + mov X, ORIG_W + .endif + bhs loop_label + .ifc "last_one","" + .if vars_spilled + b 197f + .else + b 198f + .endif + .else + .if (!vars_spilled) && ((flags) & FLAG_SPILL_LINE_VARS) + b 198f + .endif + .endif +.endm + + +.macro generate_composite_function fname, \ + src_bpp_, \ + mask_bpp_, \ + dst_w_bpp_, \ + flags_, \ + prefetch_distance_, \ + init, \ + newline, \ + cleanup, \ + process_head, \ + process_tail, \ + process_inner_loop + + .func fname + .global fname + /* For ELF format also set function visibility to hidden */ +#ifdef __ELF__ + .hidden fname + .type fname, %function +#endif + +/* + * Make some macro arguments globally visible and accessible + * from other macros + */ + .set src_bpp, src_bpp_ + .set mask_bpp, mask_bpp_ + .set dst_w_bpp, dst_w_bpp_ + .set flags, flags_ + .set prefetch_distance, prefetch_distance_ + +/* + * Select prefetch type for this function. + */ + .if prefetch_distance == 0 + .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_NONE + .else + .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_STANDARD + .endif + + .if src_bpp == 32 + .set src_bpp_shift, 2 + .elseif src_bpp == 24 + .set src_bpp_shift, 0 + .elseif src_bpp == 16 + .set src_bpp_shift, 1 + .elseif src_bpp == 8 + .set src_bpp_shift, 0 + .elseif src_bpp == 0 + .set src_bpp_shift, -1 + .else + .error "requested src bpp (src_bpp) is not supported" + .endif + + .if mask_bpp == 32 + .set mask_bpp_shift, 2 + .elseif mask_bpp == 24 + .set mask_bpp_shift, 0 + .elseif mask_bpp == 8 + .set mask_bpp_shift, 0 + .elseif mask_bpp == 0 + .set mask_bpp_shift, -1 + .else + .error "requested mask bpp (mask_bpp) is not supported" + .endif + + .if dst_w_bpp == 32 + .set dst_bpp_shift, 2 + .elseif dst_w_bpp == 24 + .set dst_bpp_shift, 0 + .elseif dst_w_bpp == 16 + .set dst_bpp_shift, 1 + .elseif dst_w_bpp == 8 + .set dst_bpp_shift, 0 + .else + .error "requested dst bpp (dst_w_bpp) is not supported" + .endif + + .if (((flags) & FLAG_DST_READWRITE) != 0) + .set dst_r_bpp, dst_w_bpp + .else + .set dst_r_bpp, 0 + .endif + + .set pix_per_block, 16*8/dst_w_bpp + .if src_bpp != 0 + .if 32*8/src_bpp > pix_per_block + .set pix_per_block, 32*8/src_bpp + .endif + .endif + .if mask_bpp != 0 + .if 32*8/mask_bpp > pix_per_block + .set pix_per_block, 32*8/mask_bpp + .endif + .endif + .if dst_r_bpp != 0 + .if 32*8/dst_r_bpp > pix_per_block + .set pix_per_block, 32*8/dst_r_bpp + .endif + .endif + +/* The standard entry conditions set up by pixman-arm-common.h are: + * r0 = width (pixels) + * r1 = height (rows) + * r2 = pointer to top-left pixel of destination + * r3 = destination stride (pixels) + * [sp] = source pixel value, or pointer to top-left pixel of source + * [sp,#4] = 0 or source stride (pixels) + * The following arguments are unused for non-mask operations + * [sp,#8] = mask pixel value, or pointer to top-left pixel of mask + * [sp,#12] = 0 or mask stride (pixels) + */ + +/* + * Assign symbolic names to registers + */ + X .req r0 /* pixels to go on this line */ + Y .req r1 /* lines to go */ + DST .req r2 /* destination pixel pointer */ + STRIDE_D .req r3 /* destination stride (bytes, minus width) */ + SRC .req r4 /* source pixel pointer */ + STRIDE_S .req r5 /* source stride (bytes, minus width) */ + MASK .req r6 /* mask pixel pointer (if applicable) */ + STRIDE_M .req r7 /* mask stride (bytes, minus width) */ + WK0 .req r8 /* pixel data registers */ + WK1 .req r9 + WK2 .req r10 + WK3 .req r11 + SCRATCH .req r12 + ORIG_W .req r14 /* width (pixels) */ + +fname: + push {r4-r11, lr} /* save all registers */ + + subs Y, Y, #1 + blo 199f + +#ifdef DEBUG_PARAMS + sub sp, sp, #9*4 +#endif + + .if src_bpp > 0 + ldr SRC, [sp, #ARGS_STACK_OFFSET] + ldr STRIDE_S, [sp, #ARGS_STACK_OFFSET+4] + .endif + .if mask_bpp > 0 + ldr MASK, [sp, #ARGS_STACK_OFFSET+8] + ldr STRIDE_M, [sp, #ARGS_STACK_OFFSET+12] + .endif + +#ifdef DEBUG_PARAMS + add Y, Y, #1 + stmia sp, {r0-r7,pc} + sub Y, Y, #1 +#endif + + init + + lsl STRIDE_D, #dst_bpp_shift /* stride in bytes */ + sub STRIDE_D, STRIDE_D, X, lsl #dst_bpp_shift + .if src_bpp > 0 + lsl STRIDE_S, #src_bpp_shift + sub STRIDE_S, STRIDE_S, X, lsl #src_bpp_shift + .endif + .if mask_bpp > 0 + lsl STRIDE_M, #mask_bpp_shift + sub STRIDE_M, STRIDE_M, X, lsl #mask_bpp_shift + .endif + + /* Are we not even wide enough to have one 16-byte aligned 16-byte block write? */ + cmp X, #2*16*8/dst_w_bpp - 1 + blo 170f + .if src_bpp || mask_bpp || dst_r_bpp /* Wide and medium cases are the same for fill */ + /* To preload ahead on the current line, we need at least (prefetch_distance+2) 32-byte blocks on all prefetch channels */ + cmp X, #(prefetch_distance+3)*pix_per_block - 1 + blo 160f + + /* Wide case */ + /* Adjust X so that the decrement instruction can also test for + * inner loop termination. We want it to stop when there are + * (prefetch_distance+1) complete blocks to go. */ + sub X, X, #(prefetch_distance+2)*pix_per_block + mov ORIG_W, X + .if (flags) & FLAG_SPILL_LINE_VARS_WIDE + /* This is stmdb sp!,{} */ + .word 0xE92D0000 | LINE_SAVED_REGS + .endif +151: /* New line */ + newline + preload_leading_step1 src_bpp, WK1, SRC + preload_leading_step1 mask_bpp, WK2, MASK + preload_leading_step1 dst_r_bpp, WK3, DST + + tst DST, #15 + beq 154f + rsb WK0, DST, #0 /* bits 0-3 = number of leading bytes until destination aligned */ + .if (src_bpp != 0 && src_bpp != 2*dst_w_bpp) || (mask_bpp != 0 && mask_bpp != 2*dst_w_bpp) + PF and, WK0, WK0, #15 + .endif + + preload_leading_step2 src_bpp, src_bpp_shift, WK1, SRC + preload_leading_step2 mask_bpp, mask_bpp_shift, WK2, MASK + preload_leading_step2 dst_r_bpp, dst_bpp_shift, WK3, DST + + leading_15bytes process_head, process_tail + +154: /* Destination now 16-byte aligned; we have at least one prefetch on each channel as well as at least one 16-byte output block */ + .if (src_bpp > 0) && (mask_bpp == 0) && ((flags) & FLAG_PROCESS_PRESERVES_SCRATCH) + and SCRATCH, SRC, #31 + rsb SCRATCH, SCRATCH, #32*prefetch_distance + .elseif (src_bpp == 0) && (mask_bpp > 0) && ((flags) & FLAG_PROCESS_PRESERVES_SCRATCH) + and SCRATCH, MASK, #31 + rsb SCRATCH, SCRATCH, #32*prefetch_distance + .endif + .ifc "process_inner_loop","" + switch_on_alignment wide_case_inner_loop_and_trailing_pixels, process_head, process_tail, wide_case_inner_loop, 157f + .else + switch_on_alignment wide_case_inner_loop_and_trailing_pixels, process_head, process_tail, process_inner_loop, 157f + .endif + +157: /* Check for another line */ + end_of_line 1, %((flags) & FLAG_SPILL_LINE_VARS_WIDE), 151b + .endif + + .ltorg + +160: /* Medium case */ + mov ORIG_W, X + .if (flags) & FLAG_SPILL_LINE_VARS_NON_WIDE + /* This is stmdb sp!,{} */ + .word 0xE92D0000 | LINE_SAVED_REGS + .endif +161: /* New line */ + newline + preload_line 0, src_bpp, src_bpp_shift, SRC /* in: X, corrupts: WK0-WK1 */ + preload_line 0, mask_bpp, mask_bpp_shift, MASK + preload_line 0, dst_r_bpp, dst_bpp_shift, DST + + sub X, X, #128/dst_w_bpp /* simplifies inner loop termination */ + tst DST, #15 + beq 164f + rsb WK0, DST, #0 /* bits 0-3 = number of leading bytes until destination aligned */ + + leading_15bytes process_head, process_tail + +164: /* Destination now 16-byte aligned; we have at least one 16-byte output block */ + switch_on_alignment medium_case_inner_loop_and_trailing_pixels, process_head, process_tail,, 167f + +167: /* Check for another line */ + end_of_line 1, %((flags) & FLAG_SPILL_LINE_VARS_NON_WIDE), 161b + + .ltorg + +170: /* Narrow case, less than 31 bytes, so no guarantee of at least one 16-byte block */ + .if dst_w_bpp < 32 + mov ORIG_W, X + .endif + .if (flags) & FLAG_SPILL_LINE_VARS_NON_WIDE + /* This is stmdb sp!,{} */ + .word 0xE92D0000 | LINE_SAVED_REGS + .endif +171: /* New line */ + newline + preload_line 1, src_bpp, src_bpp_shift, SRC /* in: X, corrupts: WK0-WK1 */ + preload_line 1, mask_bpp, mask_bpp_shift, MASK + preload_line 1, dst_r_bpp, dst_bpp_shift, DST + + .if dst_w_bpp == 8 + tst DST, #3 + beq 174f +172: subs X, X, #1 + blo 177f + process_head , 1, 0, 1, 1, 0 + process_tail , 1, 0 + .if !((flags) & FLAG_PROCESS_DOES_STORE) + pixst , 1, 0, DST + .endif + tst DST, #3 + bne 172b + .elseif dst_w_bpp == 16 + tst DST, #2 + beq 174f + subs X, X, #1 + blo 177f + process_head , 2, 0, 1, 1, 0 + process_tail , 2, 0 + .if !((flags) & FLAG_PROCESS_DOES_STORE) + pixst , 2, 0, DST + .endif + .endif + +174: /* Destination now 4-byte aligned; we have 0 or more output bytes to go */ + switch_on_alignment narrow_case_inner_loop_and_trailing_pixels, process_head, process_tail,, 177f + +177: /* Check for another line */ + end_of_line %(dst_w_bpp < 32), %((flags) & FLAG_SPILL_LINE_VARS_NON_WIDE), 171b, last_one + +197: + .if (flags) & FLAG_SPILL_LINE_VARS + add sp, sp, #LINE_SAVED_REG_COUNT*4 + .endif +198: + cleanup + +#ifdef DEBUG_PARAMS + add sp, sp, #9*4 /* junk the debug copy of arguments */ +#endif +199: + pop {r4-r11, pc} /* exit */ + + .ltorg + + .unreq X + .unreq Y + .unreq DST + .unreq STRIDE_D + .unreq SRC + .unreq STRIDE_S + .unreq MASK + .unreq STRIDE_M + .unreq WK0 + .unreq WK1 + .unreq WK2 + .unreq WK3 + .unreq SCRATCH + .unreq ORIG_W + .endfunc +.endm + +.macro line_saved_regs x:vararg + .set LINE_SAVED_REGS, 0 + .set LINE_SAVED_REG_COUNT, 0 + .irp SAVED_REG,x + .ifc "SAVED_REG","Y" + .set LINE_SAVED_REGS, LINE_SAVED_REGS | (1<<1) + .set LINE_SAVED_REG_COUNT, LINE_SAVED_REG_COUNT + 1 + .endif + .ifc "SAVED_REG","STRIDE_D" + .set LINE_SAVED_REGS, LINE_SAVED_REGS | (1<<3) + .set LINE_SAVED_REG_COUNT, LINE_SAVED_REG_COUNT + 1 + .endif + .ifc "SAVED_REG","STRIDE_S" + .set LINE_SAVED_REGS, LINE_SAVED_REGS | (1<<5) + .set LINE_SAVED_REG_COUNT, LINE_SAVED_REG_COUNT + 1 + .endif + .ifc "SAVED_REG","STRIDE_M" + .set LINE_SAVED_REGS, LINE_SAVED_REGS | (1<<7) + .set LINE_SAVED_REG_COUNT, LINE_SAVED_REG_COUNT + 1 + .endif + .ifc "SAVED_REG","ORIG_W" + .set LINE_SAVED_REGS, LINE_SAVED_REGS | (1<<14) + .set LINE_SAVED_REG_COUNT, LINE_SAVED_REG_COUNT + 1 + .endif + .endr +.endm + +.macro nop_macro x:vararg +.endm diff --git a/src/pixman/pixman/pixman-arm-simd.c b/src/pixman/pixman/pixman-arm-simd.c new file mode 100644 index 0000000..af062e1 --- /dev/null +++ b/src/pixman/pixman/pixman-arm-simd.c @@ -0,0 +1,257 @@ +/* + * Copyright © 2008 Mozilla Corporation + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of Mozilla Corporation not be used in + * advertising or publicity pertaining to distribution of the software without + * specific, written prior permission. Mozilla Corporation makes no + * representations about the suitability of this software for any purpose. It + * is provided "as is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + * + * Author: Jeff Muizelaar (jeff@infidigm.net) + * + */ +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include "pixman-private.h" +#include "pixman-arm-common.h" +#include "pixman-inlines.h" + +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, src_8888_8888, + uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, src_x888_8888, + uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, src_0565_0565, + uint16_t, 1, uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, src_8_8, + uint8_t, 1, uint8_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, src_0565_8888, + uint16_t, 1, uint32_t, 1) + +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, add_8_8, + uint8_t, 1, uint8_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, over_8888_8888, + uint32_t, 1, uint32_t, 1) + +PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, armv6, over_8888_n_8888, + uint32_t, 1, uint32_t, 1) + +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, armv6, over_n_8_8888, + uint8_t, 1, uint32_t, 1) + +PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (armv6, 0565_0565, SRC, + uint16_t, uint16_t) +PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (armv6, 8888_8888, SRC, + uint32_t, uint32_t) + +void +pixman_composite_src_n_8888_asm_armv6 (int32_t w, + int32_t h, + uint32_t *dst, + int32_t dst_stride, + uint32_t src); + +void +pixman_composite_src_n_0565_asm_armv6 (int32_t w, + int32_t h, + uint16_t *dst, + int32_t dst_stride, + uint16_t src); + +void +pixman_composite_src_n_8_asm_armv6 (int32_t w, + int32_t h, + uint8_t *dst, + int32_t dst_stride, + uint8_t src); + +static pixman_bool_t +arm_simd_fill (pixman_implementation_t *imp, + uint32_t * bits, + int stride, /* in 32-bit words */ + int bpp, + int x, + int y, + int width, + int height, + uint32_t _xor) +{ + /* stride is always multiple of 32bit units in pixman */ + uint32_t byte_stride = stride * sizeof(uint32_t); + + switch (bpp) + { + case 8: + pixman_composite_src_n_8_asm_armv6 ( + width, + height, + (uint8_t *)(((char *) bits) + y * byte_stride + x), + byte_stride, + _xor & 0xff); + return TRUE; + case 16: + pixman_composite_src_n_0565_asm_armv6 ( + width, + height, + (uint16_t *)(((char *) bits) + y * byte_stride + x * 2), + byte_stride / 2, + _xor & 0xffff); + return TRUE; + case 32: + pixman_composite_src_n_8888_asm_armv6 ( + width, + height, + (uint32_t *)(((char *) bits) + y * byte_stride + x * 4), + byte_stride / 4, + _xor); + return TRUE; + default: + return FALSE; + } +} + +static pixman_bool_t +arm_simd_blt (pixman_implementation_t *imp, + uint32_t * src_bits, + uint32_t * dst_bits, + int src_stride, /* in 32-bit words */ + int dst_stride, /* in 32-bit words */ + int src_bpp, + int dst_bpp, + int src_x, + int src_y, + int dest_x, + int dest_y, + int width, + int height) +{ + if (src_bpp != dst_bpp) + return FALSE; + + switch (src_bpp) + { + case 8: + pixman_composite_src_8_8_asm_armv6 ( + width, height, + (uint8_t *)(((char *) dst_bits) + + dest_y * dst_stride * 4 + dest_x * 1), dst_stride * 4, + (uint8_t *)(((char *) src_bits) + + src_y * src_stride * 4 + src_x * 1), src_stride * 4); + return TRUE; + case 16: + pixman_composite_src_0565_0565_asm_armv6 ( + width, height, + (uint16_t *)(((char *) dst_bits) + + dest_y * dst_stride * 4 + dest_x * 2), dst_stride * 2, + (uint16_t *)(((char *) src_bits) + + src_y * src_stride * 4 + src_x * 2), src_stride * 2); + return TRUE; + case 32: + pixman_composite_src_8888_8888_asm_armv6 ( + width, height, + (uint32_t *)(((char *) dst_bits) + + dest_y * dst_stride * 4 + dest_x * 4), dst_stride, + (uint32_t *)(((char *) src_bits) + + src_y * src_stride * 4 + src_x * 4), src_stride); + return TRUE; + default: + return FALSE; + } +} + +static const pixman_fast_path_t arm_simd_fast_paths[] = +{ + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, armv6_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, armv6_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, armv6_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, armv6_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, armv6_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, armv6_composite_src_8888_8888), + + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, armv6_composite_src_x888_8888), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, armv6_composite_src_x888_8888), + + PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, armv6_composite_src_0565_0565), + PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, armv6_composite_src_0565_0565), + PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, a1r5g5b5, armv6_composite_src_0565_0565), + PIXMAN_STD_FAST_PATH (SRC, a1b5g5r5, null, a1b5g5r5, armv6_composite_src_0565_0565), + PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, x1r5g5b5, armv6_composite_src_0565_0565), + PIXMAN_STD_FAST_PATH (SRC, a1b5g5r5, null, x1b5g5r5, armv6_composite_src_0565_0565), + PIXMAN_STD_FAST_PATH (SRC, x1r5g5b5, null, x1r5g5b5, armv6_composite_src_0565_0565), + PIXMAN_STD_FAST_PATH (SRC, x1b5g5r5, null, x1b5g5r5, armv6_composite_src_0565_0565), + PIXMAN_STD_FAST_PATH (SRC, a4r4g4b4, null, a4r4g4b4, armv6_composite_src_0565_0565), + PIXMAN_STD_FAST_PATH (SRC, a4b4g4r4, null, a4b4g4r4, armv6_composite_src_0565_0565), + PIXMAN_STD_FAST_PATH (SRC, a4r4g4b4, null, x4r4g4b4, armv6_composite_src_0565_0565), + PIXMAN_STD_FAST_PATH (SRC, a4b4g4r4, null, x4b4g4r4, armv6_composite_src_0565_0565), + PIXMAN_STD_FAST_PATH (SRC, x4r4g4b4, null, x4r4g4b4, armv6_composite_src_0565_0565), + PIXMAN_STD_FAST_PATH (SRC, x4b4g4r4, null, x4b4g4r4, armv6_composite_src_0565_0565), + + PIXMAN_STD_FAST_PATH (SRC, a8, null, a8, armv6_composite_src_8_8), + PIXMAN_STD_FAST_PATH (SRC, r3g3b2, null, r3g3b2, armv6_composite_src_8_8), + PIXMAN_STD_FAST_PATH (SRC, b2g3r3, null, b2g3r3, armv6_composite_src_8_8), + PIXMAN_STD_FAST_PATH (SRC, a2r2g2b2, null, a2r2g2b2, armv6_composite_src_8_8), + PIXMAN_STD_FAST_PATH (SRC, a2b2g2r2, null, a2b2g2r2, armv6_composite_src_8_8), + PIXMAN_STD_FAST_PATH (SRC, c8, null, c8, armv6_composite_src_8_8), + PIXMAN_STD_FAST_PATH (SRC, g8, null, g8, armv6_composite_src_8_8), + PIXMAN_STD_FAST_PATH (SRC, x4a4, null, x4a4, armv6_composite_src_8_8), + PIXMAN_STD_FAST_PATH (SRC, x4c4, null, x4c4, armv6_composite_src_8_8), + PIXMAN_STD_FAST_PATH (SRC, x4g4, null, x4g4, armv6_composite_src_8_8), + + PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, a8r8g8b8, armv6_composite_src_0565_8888), + PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, x8r8g8b8, armv6_composite_src_0565_8888), + PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, a8b8g8r8, armv6_composite_src_0565_8888), + PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, x8b8g8r8, armv6_composite_src_0565_8888), + + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, armv6_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, armv6_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, armv6_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, armv6_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, armv6_composite_over_8888_n_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, armv6_composite_over_8888_n_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, armv6_composite_over_8888_n_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, armv6_composite_over_8888_n_8888), + + PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, armv6_composite_add_8_8), + + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, armv6_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, armv6_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, armv6_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, armv6_composite_over_n_8_8888), + + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, armv6_0565_0565), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, b5g6r5, armv6_0565_0565), + + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, armv6_8888_8888), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, armv6_8888_8888), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, armv6_8888_8888), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, armv6_8888_8888), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, armv6_8888_8888), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, armv6_8888_8888), + + { PIXMAN_OP_NONE }, +}; + +pixman_implementation_t * +_pixman_implementation_create_arm_simd (pixman_implementation_t *fallback) +{ + pixman_implementation_t *imp = _pixman_implementation_create (fallback, arm_simd_fast_paths); + + imp->blt = arm_simd_blt; + imp->fill = arm_simd_fill; + + return imp; +} diff --git a/src/pixman/pixman/pixman-arm.c b/src/pixman/pixman/pixman-arm.c new file mode 100644 index 0000000..23374e4 --- /dev/null +++ b/src/pixman/pixman/pixman-arm.c @@ -0,0 +1,225 @@ +/* + * Copyright © 2000 SuSE, Inc. + * Copyright © 2007 Red Hat, Inc. + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of SuSE not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. SuSE makes no representations about the + * suitability of this software for any purpose. It is provided "as is" + * without express or implied warranty. + * + * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE + * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include "pixman-private.h" + +typedef enum +{ + ARM_V7 = (1 << 0), + ARM_V6 = (1 << 1), + ARM_VFP = (1 << 2), + ARM_NEON = (1 << 3), + ARM_IWMMXT = (1 << 4) +} arm_cpu_features_t; + +#if defined(USE_ARM_SIMD) || defined(USE_ARM_NEON) || defined(USE_ARM_IWMMXT) + +#if defined(_MSC_VER) + +/* Needed for EXCEPTION_ILLEGAL_INSTRUCTION */ +#include <windows.h> + +extern int pixman_msvc_try_arm_neon_op (); +extern int pixman_msvc_try_arm_simd_op (); + +static arm_cpu_features_t +detect_cpu_features (void) +{ + arm_cpu_features_t features = 0; + + __try + { + pixman_msvc_try_arm_simd_op (); + features |= ARM_V6; + } + __except (GetExceptionCode () == EXCEPTION_ILLEGAL_INSTRUCTION) + { + } + + __try + { + pixman_msvc_try_arm_neon_op (); + features |= ARM_NEON; + } + __except (GetExceptionCode () == EXCEPTION_ILLEGAL_INSTRUCTION) + { + } + + return features; +} + +#elif defined(__APPLE__) && defined(TARGET_OS_IPHONE) /* iOS */ + +#include "TargetConditionals.h" + +static arm_cpu_features_t +detect_cpu_features (void) +{ + arm_cpu_features_t features = 0; + + features |= ARM_V6; + + /* Detection of ARM NEON on iOS is fairly simple because iOS binaries + * contain separate executable images for each processor architecture. + * So all we have to do is detect the armv7 architecture build. The + * operating system automatically runs the armv7 binary for armv7 devices + * and the armv6 binary for armv6 devices. + */ +#if defined(__ARM_NEON__) + features |= ARM_NEON; +#endif + + return features; +} + +#elif defined(__ANDROID__) || defined(ANDROID) /* Android */ + +#include <cpu-features.h> + +static arm_cpu_features_t +detect_cpu_features (void) +{ + arm_cpu_features_t features = 0; + AndroidCpuFamily cpu_family; + uint64_t cpu_features; + + cpu_family = android_getCpuFamily(); + cpu_features = android_getCpuFeatures(); + + if (cpu_family == ANDROID_CPU_FAMILY_ARM) + { + if (cpu_features & ANDROID_CPU_ARM_FEATURE_ARMv7) + features |= ARM_V7; + + if (cpu_features & ANDROID_CPU_ARM_FEATURE_VFPv3) + features |= ARM_VFP; + + if (cpu_features & ANDROID_CPU_ARM_FEATURE_NEON) + features |= ARM_NEON; + } + + return features; +} + +#elif defined (__linux__) /* linux ELF */ + +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <fcntl.h> +#include <string.h> +#include <elf.h> + +static arm_cpu_features_t +detect_cpu_features (void) +{ + arm_cpu_features_t features = 0; + Elf32_auxv_t aux; + int fd; + + fd = open ("/proc/self/auxv", O_RDONLY); + if (fd >= 0) + { + while (read (fd, &aux, sizeof(Elf32_auxv_t)) == sizeof(Elf32_auxv_t)) + { + if (aux.a_type == AT_HWCAP) + { + uint32_t hwcap = aux.a_un.a_val; + + /* hardcode these values to avoid depending on specific + * versions of the hwcap header, e.g. HWCAP_NEON + */ + if ((hwcap & 64) != 0) + features |= ARM_VFP; + if ((hwcap & 512) != 0) + features |= ARM_IWMMXT; + /* this flag is only present on kernel 2.6.29 */ + if ((hwcap & 4096) != 0) + features |= ARM_NEON; + } + else if (aux.a_type == AT_PLATFORM) + { + const char *plat = (const char*) aux.a_un.a_val; + + if (strncmp (plat, "v7l", 3) == 0) + features |= (ARM_V7 | ARM_V6); + else if (strncmp (plat, "v6l", 3) == 0) + features |= ARM_V6; + } + } + close (fd); + } + + return features; +} + +#else /* Unknown */ + +static arm_cpu_features_t +detect_cpu_features (void) +{ + return 0; +} + +#endif /* Linux elf */ + +static pixman_bool_t +have_feature (arm_cpu_features_t feature) +{ + static pixman_bool_t initialized; + static arm_cpu_features_t features; + + if (!initialized) + { + features = detect_cpu_features(); + initialized = TRUE; + } + + return (features & feature) == feature; +} + +#endif /* USE_ARM_SIMD || USE_ARM_NEON || USE_ARM_IWMMXT */ + +pixman_implementation_t * +_pixman_arm_get_implementations (pixman_implementation_t *imp) +{ +#ifdef USE_ARM_SIMD + if (!_pixman_disabled ("arm-simd") && have_feature (ARM_V6)) + imp = _pixman_implementation_create_arm_simd (imp); +#endif + +#ifdef USE_ARM_IWMMXT + if (!_pixman_disabled ("arm-iwmmxt") && have_feature (ARM_IWMMXT)) + imp = _pixman_implementation_create_mmx (imp); +#endif + +#ifdef USE_ARM_NEON + if (!_pixman_disabled ("arm-neon") && have_feature (ARM_NEON)) + imp = _pixman_implementation_create_arm_neon (imp); +#endif + + return imp; +} diff --git a/src/pixman/pixman/pixman-bits-image.c b/src/pixman/pixman/pixman-bits-image.c new file mode 100644 index 0000000..dcdcc69 --- /dev/null +++ b/src/pixman/pixman/pixman-bits-image.c @@ -0,0 +1,1039 @@ +/* + * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc. + * 2005 Lars Knoll & Zack Rusin, Trolltech + * 2008 Aaron Plattner, NVIDIA Corporation + * Copyright © 2000 SuSE, Inc. + * Copyright © 2007, 2009 Red Hat, Inc. + * Copyright © 2008 André Tupinambá <andrelrt@gmail.com> + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of Keith Packard not be used in + * advertising or publicity pertaining to distribution of the software without + * specific, written prior permission. Keith Packard makes no + * representations about the suitability of this software for any purpose. It + * is provided "as is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "pixman-private.h" +#include "pixman-combine32.h" +#include "pixman-inlines.h" + +static uint32_t * +_pixman_image_get_scanline_generic_float (pixman_iter_t * iter, + const uint32_t *mask) +{ + pixman_iter_get_scanline_t fetch_32 = iter->data; + uint32_t *buffer = iter->buffer; + + fetch_32 (iter, NULL); + + pixman_expand_to_float ((argb_t *)buffer, buffer, PIXMAN_a8r8g8b8, iter->width); + + return iter->buffer; +} + +/* Fetch functions */ + +static force_inline uint32_t +fetch_pixel_no_alpha (bits_image_t *image, + int x, int y, pixman_bool_t check_bounds) +{ + if (check_bounds && + (x < 0 || x >= image->width || y < 0 || y >= image->height)) + { + return 0; + } + + return image->fetch_pixel_32 (image, x, y); +} + +typedef uint32_t (* get_pixel_t) (bits_image_t *image, + int x, int y, pixman_bool_t check_bounds); + +static force_inline uint32_t +bits_image_fetch_pixel_nearest (bits_image_t *image, + pixman_fixed_t x, + pixman_fixed_t y, + get_pixel_t get_pixel) +{ + int x0 = pixman_fixed_to_int (x - pixman_fixed_e); + int y0 = pixman_fixed_to_int (y - pixman_fixed_e); + + if (image->common.repeat != PIXMAN_REPEAT_NONE) + { + repeat (image->common.repeat, &x0, image->width); + repeat (image->common.repeat, &y0, image->height); + + return get_pixel (image, x0, y0, FALSE); + } + else + { + return get_pixel (image, x0, y0, TRUE); + } +} + +static force_inline uint32_t +bits_image_fetch_pixel_bilinear (bits_image_t *image, + pixman_fixed_t x, + pixman_fixed_t y, + get_pixel_t get_pixel) +{ + pixman_repeat_t repeat_mode = image->common.repeat; + int width = image->width; + int height = image->height; + int x1, y1, x2, y2; + uint32_t tl, tr, bl, br; + int32_t distx, disty; + + x1 = x - pixman_fixed_1 / 2; + y1 = y - pixman_fixed_1 / 2; + + distx = pixman_fixed_to_bilinear_weight (x1); + disty = pixman_fixed_to_bilinear_weight (y1); + + x1 = pixman_fixed_to_int (x1); + y1 = pixman_fixed_to_int (y1); + x2 = x1 + 1; + y2 = y1 + 1; + + if (repeat_mode != PIXMAN_REPEAT_NONE) + { + repeat (repeat_mode, &x1, width); + repeat (repeat_mode, &y1, height); + repeat (repeat_mode, &x2, width); + repeat (repeat_mode, &y2, height); + + tl = get_pixel (image, x1, y1, FALSE); + bl = get_pixel (image, x1, y2, FALSE); + tr = get_pixel (image, x2, y1, FALSE); + br = get_pixel (image, x2, y2, FALSE); + } + else + { + tl = get_pixel (image, x1, y1, TRUE); + tr = get_pixel (image, x2, y1, TRUE); + bl = get_pixel (image, x1, y2, TRUE); + br = get_pixel (image, x2, y2, TRUE); + } + + return bilinear_interpolation (tl, tr, bl, br, distx, disty); +} + +static force_inline uint32_t +bits_image_fetch_pixel_convolution (bits_image_t *image, + pixman_fixed_t x, + pixman_fixed_t y, + get_pixel_t get_pixel) +{ + pixman_fixed_t *params = image->common.filter_params; + int x_off = (params[0] - pixman_fixed_1) >> 1; + int y_off = (params[1] - pixman_fixed_1) >> 1; + int32_t cwidth = pixman_fixed_to_int (params[0]); + int32_t cheight = pixman_fixed_to_int (params[1]); + int32_t i, j, x1, x2, y1, y2; + pixman_repeat_t repeat_mode = image->common.repeat; + int width = image->width; + int height = image->height; + int srtot, sgtot, sbtot, satot; + + params += 2; + + x1 = pixman_fixed_to_int (x - pixman_fixed_e - x_off); + y1 = pixman_fixed_to_int (y - pixman_fixed_e - y_off); + x2 = x1 + cwidth; + y2 = y1 + cheight; + + srtot = sgtot = sbtot = satot = 0; + + for (i = y1; i < y2; ++i) + { + for (j = x1; j < x2; ++j) + { + int rx = j; + int ry = i; + + pixman_fixed_t f = *params; + + if (f) + { + uint32_t pixel; + + if (repeat_mode != PIXMAN_REPEAT_NONE) + { + repeat (repeat_mode, &rx, width); + repeat (repeat_mode, &ry, height); + + pixel = get_pixel (image, rx, ry, FALSE); + } + else + { + pixel = get_pixel (image, rx, ry, TRUE); + } + + srtot += (int)RED_8 (pixel) * f; + sgtot += (int)GREEN_8 (pixel) * f; + sbtot += (int)BLUE_8 (pixel) * f; + satot += (int)ALPHA_8 (pixel) * f; + } + + params++; + } + } + + satot = (satot + 0x8000) >> 16; + srtot = (srtot + 0x8000) >> 16; + sgtot = (sgtot + 0x8000) >> 16; + sbtot = (sbtot + 0x8000) >> 16; + + satot = CLIP (satot, 0, 0xff); + srtot = CLIP (srtot, 0, 0xff); + sgtot = CLIP (sgtot, 0, 0xff); + sbtot = CLIP (sbtot, 0, 0xff); + + return ((satot << 24) | (srtot << 16) | (sgtot << 8) | (sbtot)); +} + +static uint32_t +bits_image_fetch_pixel_separable_convolution (bits_image_t *image, + pixman_fixed_t x, + pixman_fixed_t y, + get_pixel_t get_pixel) +{ + pixman_fixed_t *params = image->common.filter_params; + pixman_repeat_t repeat_mode = image->common.repeat; + int width = image->width; + int height = image->height; + int cwidth = pixman_fixed_to_int (params[0]); + int cheight = pixman_fixed_to_int (params[1]); + int x_phase_bits = pixman_fixed_to_int (params[2]); + int y_phase_bits = pixman_fixed_to_int (params[3]); + int x_phase_shift = 16 - x_phase_bits; + int y_phase_shift = 16 - y_phase_bits; + int x_off = ((cwidth << 16) - pixman_fixed_1) >> 1; + int y_off = ((cheight << 16) - pixman_fixed_1) >> 1; + pixman_fixed_t *y_params; + int srtot, sgtot, sbtot, satot; + int32_t x1, x2, y1, y2; + int32_t px, py; + int i, j; + + /* Round x and y to the middle of the closest phase before continuing. This + * ensures that the convolution matrix is aligned right, since it was + * positioned relative to a particular phase (and not relative to whatever + * exact fraction we happen to get here). + */ + x = ((x >> x_phase_shift) << x_phase_shift) + ((1 << x_phase_shift) >> 1); + y = ((y >> y_phase_shift) << y_phase_shift) + ((1 << y_phase_shift) >> 1); + + px = (x & 0xffff) >> x_phase_shift; + py = (y & 0xffff) >> y_phase_shift; + + y_params = params + 4 + (1 << x_phase_bits) * cwidth + py * cheight; + + x1 = pixman_fixed_to_int (x - pixman_fixed_e - x_off); + y1 = pixman_fixed_to_int (y - pixman_fixed_e - y_off); + x2 = x1 + cwidth; + y2 = y1 + cheight; + + srtot = sgtot = sbtot = satot = 0; + + for (i = y1; i < y2; ++i) + { + pixman_fixed_48_16_t fy = *y_params++; + pixman_fixed_t *x_params = params + 4 + px * cwidth; + + if (fy) + { + for (j = x1; j < x2; ++j) + { + pixman_fixed_t fx = *x_params++; + int rx = j; + int ry = i; + + if (fx) + { + pixman_fixed_t f; + uint32_t pixel; + + if (repeat_mode != PIXMAN_REPEAT_NONE) + { + repeat (repeat_mode, &rx, width); + repeat (repeat_mode, &ry, height); + + pixel = get_pixel (image, rx, ry, FALSE); + } + else + { + pixel = get_pixel (image, rx, ry, TRUE); + } + + f = (fy * fx + 0x8000) >> 16; + + srtot += (int)RED_8 (pixel) * f; + sgtot += (int)GREEN_8 (pixel) * f; + sbtot += (int)BLUE_8 (pixel) * f; + satot += (int)ALPHA_8 (pixel) * f; + } + } + } + } + + satot = (satot + 0x8000) >> 16; + srtot = (srtot + 0x8000) >> 16; + sgtot = (sgtot + 0x8000) >> 16; + sbtot = (sbtot + 0x8000) >> 16; + + satot = CLIP (satot, 0, 0xff); + srtot = CLIP (srtot, 0, 0xff); + sgtot = CLIP (sgtot, 0, 0xff); + sbtot = CLIP (sbtot, 0, 0xff); + + return ((satot << 24) | (srtot << 16) | (sgtot << 8) | (sbtot)); +} + +static force_inline uint32_t +bits_image_fetch_pixel_filtered (bits_image_t *image, + pixman_fixed_t x, + pixman_fixed_t y, + get_pixel_t get_pixel) +{ + switch (image->common.filter) + { + case PIXMAN_FILTER_NEAREST: + case PIXMAN_FILTER_FAST: + return bits_image_fetch_pixel_nearest (image, x, y, get_pixel); + break; + + case PIXMAN_FILTER_BILINEAR: + case PIXMAN_FILTER_GOOD: + case PIXMAN_FILTER_BEST: + return bits_image_fetch_pixel_bilinear (image, x, y, get_pixel); + break; + + case PIXMAN_FILTER_CONVOLUTION: + return bits_image_fetch_pixel_convolution (image, x, y, get_pixel); + break; + + case PIXMAN_FILTER_SEPARABLE_CONVOLUTION: + return bits_image_fetch_pixel_separable_convolution (image, x, y, get_pixel); + break; + + default: + break; + } + + return 0; +} + +static uint32_t * +bits_image_fetch_affine_no_alpha (pixman_iter_t * iter, + const uint32_t * mask) +{ + pixman_image_t *image = iter->image; + int offset = iter->x; + int line = iter->y++; + int width = iter->width; + uint32_t * buffer = iter->buffer; + + pixman_fixed_t x, y; + pixman_fixed_t ux, uy; + pixman_vector_t v; + int i; + + /* reference point is the center of the pixel */ + v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2; + v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2; + v.vector[2] = pixman_fixed_1; + + if (image->common.transform) + { + if (!pixman_transform_point_3d (image->common.transform, &v)) + return iter->buffer; + + ux = image->common.transform->matrix[0][0]; + uy = image->common.transform->matrix[1][0]; + } + else + { + ux = pixman_fixed_1; + uy = 0; + } + + x = v.vector[0]; + y = v.vector[1]; + + for (i = 0; i < width; ++i) + { + if (!mask || mask[i]) + { + buffer[i] = bits_image_fetch_pixel_filtered ( + &image->bits, x, y, fetch_pixel_no_alpha); + } + + x += ux; + y += uy; + } + + return buffer; +} + +/* General fetcher */ +static force_inline uint32_t +fetch_pixel_general (bits_image_t *image, int x, int y, pixman_bool_t check_bounds) +{ + uint32_t pixel; + + if (check_bounds && + (x < 0 || x >= image->width || y < 0 || y >= image->height)) + { + return 0; + } + + pixel = image->fetch_pixel_32 (image, x, y); + + if (image->common.alpha_map) + { + uint32_t pixel_a; + + x -= image->common.alpha_origin_x; + y -= image->common.alpha_origin_y; + + if (x < 0 || x >= image->common.alpha_map->width || + y < 0 || y >= image->common.alpha_map->height) + { + pixel_a = 0; + } + else + { + pixel_a = image->common.alpha_map->fetch_pixel_32 ( + image->common.alpha_map, x, y); + + pixel_a = ALPHA_8 (pixel_a); + } + + pixel &= 0x00ffffff; + pixel |= (pixel_a << 24); + } + + return pixel; +} + +static uint32_t * +bits_image_fetch_general (pixman_iter_t *iter, + const uint32_t *mask) +{ + pixman_image_t *image = iter->image; + int offset = iter->x; + int line = iter->y++; + int width = iter->width; + uint32_t * buffer = iter->buffer; + + pixman_fixed_t x, y, w; + pixman_fixed_t ux, uy, uw; + pixman_vector_t v; + int i; + + /* reference point is the center of the pixel */ + v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2; + v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2; + v.vector[2] = pixman_fixed_1; + + if (image->common.transform) + { + if (!pixman_transform_point_3d (image->common.transform, &v)) + return buffer; + + ux = image->common.transform->matrix[0][0]; + uy = image->common.transform->matrix[1][0]; + uw = image->common.transform->matrix[2][0]; + } + else + { + ux = pixman_fixed_1; + uy = 0; + uw = 0; + } + + x = v.vector[0]; + y = v.vector[1]; + w = v.vector[2]; + + for (i = 0; i < width; ++i) + { + pixman_fixed_t x0, y0; + + if (!mask || mask[i]) + { + if (w != 0) + { + x0 = ((pixman_fixed_48_16_t)x << 16) / w; + y0 = ((pixman_fixed_48_16_t)y << 16) / w; + } + else + { + x0 = 0; + y0 = 0; + } + + buffer[i] = bits_image_fetch_pixel_filtered ( + &image->bits, x0, y0, fetch_pixel_general); + } + + x += ux; + y += uy; + w += uw; + } + + return buffer; +} + +static void +replicate_pixel_32 (bits_image_t * bits, + int x, + int y, + int width, + uint32_t * buffer) +{ + uint32_t color; + uint32_t *end; + + color = bits->fetch_pixel_32 (bits, x, y); + + end = buffer + width; + while (buffer < end) + *(buffer++) = color; +} + +static void +replicate_pixel_float (bits_image_t * bits, + int x, + int y, + int width, + uint32_t * b) +{ + argb_t color; + argb_t *buffer = (argb_t *)b; + argb_t *end; + + color = bits->fetch_pixel_float (bits, x, y); + + end = buffer + width; + while (buffer < end) + *(buffer++) = color; +} + +static void +bits_image_fetch_untransformed_repeat_none (bits_image_t *image, + pixman_bool_t wide, + int x, + int y, + int width, + uint32_t * buffer) +{ + uint32_t w; + + if (y < 0 || y >= image->height) + { + memset (buffer, 0, width * (wide? sizeof (argb_t) : 4)); + return; + } + + if (x < 0) + { + w = MIN (width, -x); + + memset (buffer, 0, w * (wide ? sizeof (argb_t) : 4)); + + width -= w; + buffer += w * (wide? 4 : 1); + x += w; + } + + if (x < image->width) + { + w = MIN (width, image->width - x); + + if (wide) + image->fetch_scanline_float (image, x, y, w, buffer, NULL); + else + image->fetch_scanline_32 (image, x, y, w, buffer, NULL); + + width -= w; + buffer += w * (wide? 4 : 1); + x += w; + } + + memset (buffer, 0, width * (wide ? sizeof (argb_t) : 4)); +} + +static void +bits_image_fetch_untransformed_repeat_normal (bits_image_t *image, + pixman_bool_t wide, + int x, + int y, + int width, + uint32_t * buffer) +{ + uint32_t w; + + while (y < 0) + y += image->height; + + while (y >= image->height) + y -= image->height; + + if (image->width == 1) + { + if (wide) + replicate_pixel_float (image, 0, y, width, buffer); + else + replicate_pixel_32 (image, 0, y, width, buffer); + + return; + } + + while (width) + { + while (x < 0) + x += image->width; + while (x >= image->width) + x -= image->width; + + w = MIN (width, image->width - x); + + if (wide) + image->fetch_scanline_float (image, x, y, w, buffer, NULL); + else + image->fetch_scanline_32 (image, x, y, w, buffer, NULL); + + buffer += w * (wide? 4 : 1); + x += w; + width -= w; + } +} + +static uint32_t * +bits_image_fetch_untransformed_32 (pixman_iter_t * iter, + const uint32_t *mask) +{ + pixman_image_t *image = iter->image; + int x = iter->x; + int y = iter->y; + int width = iter->width; + uint32_t * buffer = iter->buffer; + + if (image->common.repeat == PIXMAN_REPEAT_NONE) + { + bits_image_fetch_untransformed_repeat_none ( + &image->bits, FALSE, x, y, width, buffer); + } + else + { + bits_image_fetch_untransformed_repeat_normal ( + &image->bits, FALSE, x, y, width, buffer); + } + + iter->y++; + return buffer; +} + +static uint32_t * +bits_image_fetch_untransformed_float (pixman_iter_t * iter, + const uint32_t *mask) +{ + pixman_image_t *image = iter->image; + int x = iter->x; + int y = iter->y; + int width = iter->width; + uint32_t * buffer = iter->buffer; + + if (image->common.repeat == PIXMAN_REPEAT_NONE) + { + bits_image_fetch_untransformed_repeat_none ( + &image->bits, TRUE, x, y, width, buffer); + } + else + { + bits_image_fetch_untransformed_repeat_normal ( + &image->bits, TRUE, x, y, width, buffer); + } + + iter->y++; + return buffer; +} + +typedef struct +{ + pixman_format_code_t format; + uint32_t flags; + pixman_iter_get_scanline_t get_scanline_32; + pixman_iter_get_scanline_t get_scanline_float; +} fetcher_info_t; + +static const fetcher_info_t fetcher_info[] = +{ + { PIXMAN_any, + (FAST_PATH_NO_ALPHA_MAP | + FAST_PATH_ID_TRANSFORM | + FAST_PATH_NO_CONVOLUTION_FILTER | + FAST_PATH_NO_PAD_REPEAT | + FAST_PATH_NO_REFLECT_REPEAT), + bits_image_fetch_untransformed_32, + bits_image_fetch_untransformed_float + }, + + /* Affine, no alpha */ + { PIXMAN_any, + (FAST_PATH_NO_ALPHA_MAP | FAST_PATH_HAS_TRANSFORM | FAST_PATH_AFFINE_TRANSFORM), + bits_image_fetch_affine_no_alpha, + _pixman_image_get_scanline_generic_float + }, + + /* General */ + { PIXMAN_any, + 0, + bits_image_fetch_general, + _pixman_image_get_scanline_generic_float + }, + + { PIXMAN_null }, +}; + +static void +bits_image_property_changed (pixman_image_t *image) +{ + _pixman_bits_image_setup_accessors (&image->bits); +} + +void +_pixman_bits_image_src_iter_init (pixman_image_t *image, pixman_iter_t *iter) +{ + pixman_format_code_t format = image->common.extended_format_code; + uint32_t flags = image->common.flags; + const fetcher_info_t *info; + + for (info = fetcher_info; info->format != PIXMAN_null; ++info) + { + if ((info->format == format || info->format == PIXMAN_any) && + (info->flags & flags) == info->flags) + { + if (iter->iter_flags & ITER_NARROW) + { + iter->get_scanline = info->get_scanline_32; + } + else + { + iter->data = info->get_scanline_32; + iter->get_scanline = info->get_scanline_float; + } + return; + } + } + + /* Just in case we somehow didn't find a scanline function */ + iter->get_scanline = _pixman_iter_get_scanline_noop; +} + +static uint32_t * +dest_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask) +{ + pixman_image_t *image = iter->image; + int x = iter->x; + int y = iter->y; + int width = iter->width; + uint32_t * buffer = iter->buffer; + + image->bits.fetch_scanline_32 (&image->bits, x, y, width, buffer, mask); + if (image->common.alpha_map) + { + uint32_t *alpha; + + if ((alpha = malloc (width * sizeof (uint32_t)))) + { + int i; + + x -= image->common.alpha_origin_x; + y -= image->common.alpha_origin_y; + + image->common.alpha_map->fetch_scanline_32 ( + image->common.alpha_map, x, y, width, alpha, mask); + + for (i = 0; i < width; ++i) + { + buffer[i] &= ~0xff000000; + buffer[i] |= (alpha[i] & 0xff000000); + } + + free (alpha); + } + } + + return iter->buffer; +} + +static uint32_t * +dest_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask) +{ + bits_image_t * image = &iter->image->bits; + int x = iter->x; + int y = iter->y; + int width = iter->width; + argb_t * buffer = (argb_t *)iter->buffer; + + image->fetch_scanline_float ( + image, x, y, width, (uint32_t *)buffer, mask); + if (image->common.alpha_map) + { + argb_t *alpha; + + if ((alpha = malloc (width * sizeof (argb_t)))) + { + int i; + + x -= image->common.alpha_origin_x; + y -= image->common.alpha_origin_y; + + image->common.alpha_map->fetch_scanline_float ( + image->common.alpha_map, x, y, width, (uint32_t *)alpha, mask); + + for (i = 0; i < width; ++i) + buffer[i].a = alpha[i].a; + + free (alpha); + } + } + + return iter->buffer; +} + +static void +dest_write_back_narrow (pixman_iter_t *iter) +{ + bits_image_t * image = &iter->image->bits; + int x = iter->x; + int y = iter->y; + int width = iter->width; + const uint32_t *buffer = iter->buffer; + + image->store_scanline_32 (image, x, y, width, buffer); + + if (image->common.alpha_map) + { + x -= image->common.alpha_origin_x; + y -= image->common.alpha_origin_y; + + image->common.alpha_map->store_scanline_32 ( + image->common.alpha_map, x, y, width, buffer); + } + + iter->y++; +} + +static void +dest_write_back_wide (pixman_iter_t *iter) +{ + bits_image_t * image = &iter->image->bits; + int x = iter->x; + int y = iter->y; + int width = iter->width; + const uint32_t *buffer = iter->buffer; + + image->store_scanline_float (image, x, y, width, buffer); + + if (image->common.alpha_map) + { + x -= image->common.alpha_origin_x; + y -= image->common.alpha_origin_y; + + image->common.alpha_map->store_scanline_float ( + image->common.alpha_map, x, y, width, buffer); + } + + iter->y++; +} + +void +_pixman_bits_image_dest_iter_init (pixman_image_t *image, pixman_iter_t *iter) +{ + if (iter->iter_flags & ITER_NARROW) + { + if ((iter->iter_flags & (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) == + (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) + { + iter->get_scanline = _pixman_iter_get_scanline_noop; + } + else + { + iter->get_scanline = dest_get_scanline_narrow; + } + + iter->write_back = dest_write_back_narrow; + } + else + { + iter->get_scanline = dest_get_scanline_wide; + iter->write_back = dest_write_back_wide; + } +} + +static uint32_t * +create_bits (pixman_format_code_t format, + int width, + int height, + int * rowstride_bytes, + pixman_bool_t clear) +{ + int stride; + size_t buf_size; + int bpp; + + /* what follows is a long-winded way, avoiding any possibility of integer + * overflows, of saying: + * stride = ((width * bpp + 0x1f) >> 5) * sizeof (uint32_t); + */ + + bpp = PIXMAN_FORMAT_BPP (format); + if (_pixman_multiply_overflows_int (width, bpp)) + return NULL; + + stride = width * bpp; + if (_pixman_addition_overflows_int (stride, 0x1f)) + return NULL; + + stride += 0x1f; + stride >>= 5; + + stride *= sizeof (uint32_t); + + if (_pixman_multiply_overflows_size (height, stride)) + return NULL; + + buf_size = (size_t)height * stride; + + if (rowstride_bytes) + *rowstride_bytes = stride; + + if (clear) + return calloc (buf_size, 1); + else + return malloc (buf_size); +} + +pixman_bool_t +_pixman_bits_image_init (pixman_image_t * image, + pixman_format_code_t format, + int width, + int height, + uint32_t * bits, + int rowstride, + pixman_bool_t clear) +{ + uint32_t *free_me = NULL; + + if (!bits && width && height) + { + int rowstride_bytes; + + free_me = bits = create_bits (format, width, height, &rowstride_bytes, clear); + + if (!bits) + return FALSE; + + rowstride = rowstride_bytes / (int) sizeof (uint32_t); + } + + _pixman_image_init (image); + + image->type = BITS; + image->bits.format = format; + image->bits.width = width; + image->bits.height = height; + image->bits.bits = bits; + image->bits.free_me = free_me; + image->bits.read_func = NULL; + image->bits.write_func = NULL; + image->bits.rowstride = rowstride; + image->bits.indexed = NULL; + + image->common.property_changed = bits_image_property_changed; + + _pixman_image_reset_clip_region (image); + + return TRUE; +} + +static pixman_image_t * +create_bits_image_internal (pixman_format_code_t format, + int width, + int height, + uint32_t * bits, + int rowstride_bytes, + pixman_bool_t clear) +{ + pixman_image_t *image; + + /* must be a whole number of uint32_t's + */ + return_val_if_fail ( + bits == NULL || (rowstride_bytes % sizeof (uint32_t)) == 0, NULL); + + return_val_if_fail (PIXMAN_FORMAT_BPP (format) >= PIXMAN_FORMAT_DEPTH (format), NULL); + + image = _pixman_image_allocate (); + + if (!image) + return NULL; + + if (!_pixman_bits_image_init (image, format, width, height, bits, + rowstride_bytes / (int) sizeof (uint32_t), + clear)) + { + free (image); + return NULL; + } + + return image; +} + +/* If bits is NULL, a buffer will be allocated and initialized to 0 */ +PIXMAN_EXPORT pixman_image_t * +pixman_image_create_bits (pixman_format_code_t format, + int width, + int height, + uint32_t * bits, + int rowstride_bytes) +{ + return create_bits_image_internal ( + format, width, height, bits, rowstride_bytes, TRUE); +} + + +/* If bits is NULL, a buffer will be allocated and _not_ initialized */ +PIXMAN_EXPORT pixman_image_t * +pixman_image_create_bits_no_clear (pixman_format_code_t format, + int width, + int height, + uint32_t * bits, + int rowstride_bytes) +{ + return create_bits_image_internal ( + format, width, height, bits, rowstride_bytes, FALSE); +} diff --git a/src/pixman/pixman/pixman-combine-float.c b/src/pixman/pixman/pixman-combine-float.c new file mode 100644 index 0000000..5ea739f --- /dev/null +++ b/src/pixman/pixman/pixman-combine-float.c @@ -0,0 +1,1016 @@ +/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */ +/* + * Copyright © 2010, 2012 Soren Sandmann Pedersen + * Copyright © 2010, 2012 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Author: Soren Sandmann Pedersen (sandmann@cs.au.dk) + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <math.h> +#include <string.h> +#include <float.h> + +#include "pixman-private.h" + +/* Workaround for http://gcc.gnu.org/PR54965 */ +/* GCC 4.6 has problems with force_inline, so just use normal inline instead */ +#if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ == 6) +#undef force_inline +#define force_inline __inline__ +#endif + +typedef float (* combine_channel_t) (float sa, float s, float da, float d); + +static force_inline void +combine_inner (pixman_bool_t component, + float *dest, const float *src, const float *mask, int n_pixels, + combine_channel_t combine_a, combine_channel_t combine_c) +{ + int i; + + if (!mask) + { + for (i = 0; i < 4 * n_pixels; i += 4) + { + float sa = src[i + 0]; + float sr = src[i + 1]; + float sg = src[i + 2]; + float sb = src[i + 3]; + + float da = dest[i + 0]; + float dr = dest[i + 1]; + float dg = dest[i + 2]; + float db = dest[i + 3]; + + dest[i + 0] = combine_a (sa, sa, da, da); + dest[i + 1] = combine_c (sa, sr, da, dr); + dest[i + 2] = combine_c (sa, sg, da, dg); + dest[i + 3] = combine_c (sa, sb, da, db); + } + } + else + { + for (i = 0; i < 4 * n_pixels; i += 4) + { + float sa, sr, sg, sb; + float ma, mr, mg, mb; + float da, dr, dg, db; + + sa = src[i + 0]; + sr = src[i + 1]; + sg = src[i + 2]; + sb = src[i + 3]; + + if (component) + { + ma = mask[i + 0]; + mr = mask[i + 1]; + mg = mask[i + 2]; + mb = mask[i + 3]; + + sr *= mr; + sg *= mg; + sb *= mb; + + ma *= sa; + mr *= sa; + mg *= sa; + mb *= sa; + + sa = ma; + } + else + { + ma = mask[i + 0]; + + sa *= ma; + sr *= ma; + sg *= ma; + sb *= ma; + + ma = mr = mg = mb = sa; + } + + da = dest[i + 0]; + dr = dest[i + 1]; + dg = dest[i + 2]; + db = dest[i + 3]; + + dest[i + 0] = combine_a (ma, sa, da, da); + dest[i + 1] = combine_c (mr, sr, da, dr); + dest[i + 2] = combine_c (mg, sg, da, dg); + dest[i + 3] = combine_c (mb, sb, da, db); + } + } +} + +#define MAKE_COMBINER(name, component, combine_a, combine_c) \ + static void \ + combine_ ## name ## _float (pixman_implementation_t *imp, \ + pixman_op_t op, \ + float *dest, \ + const float *src, \ + const float *mask, \ + int n_pixels) \ + { \ + combine_inner (component, dest, src, mask, n_pixels, \ + combine_a, combine_c); \ + } + +#define MAKE_COMBINERS(name, combine_a, combine_c) \ + MAKE_COMBINER(name ## _ca, TRUE, combine_a, combine_c) \ + MAKE_COMBINER(name ## _u, FALSE, combine_a, combine_c) + + +/* + * Porter/Duff operators + */ +typedef enum +{ + ZERO, + ONE, + SRC_ALPHA, + DEST_ALPHA, + INV_SA, + INV_DA, + SA_OVER_DA, + DA_OVER_SA, + INV_SA_OVER_DA, + INV_DA_OVER_SA, + ONE_MINUS_SA_OVER_DA, + ONE_MINUS_DA_OVER_SA, + ONE_MINUS_INV_DA_OVER_SA, + ONE_MINUS_INV_SA_OVER_DA +} combine_factor_t; + +#define CLAMP(f) \ + (((f) < 0)? 0 : (((f) > 1.0) ? 1.0 : (f))) + +static force_inline float +get_factor (combine_factor_t factor, float sa, float da) +{ + float f = -1; + + switch (factor) + { + case ZERO: + f = 0.0f; + break; + + case ONE: + f = 1.0f; + break; + + case SRC_ALPHA: + f = sa; + break; + + case DEST_ALPHA: + f = da; + break; + + case INV_SA: + f = 1 - sa; + break; + + case INV_DA: + f = 1 - da; + break; + + case SA_OVER_DA: + if (FLOAT_IS_ZERO (da)) + f = 1.0f; + else + f = CLAMP (sa / da); + break; + + case DA_OVER_SA: + if (FLOAT_IS_ZERO (sa)) + f = 1.0f; + else + f = CLAMP (da / sa); + break; + + case INV_SA_OVER_DA: + if (FLOAT_IS_ZERO (da)) + f = 1.0f; + else + f = CLAMP ((1.0f - sa) / da); + break; + + case INV_DA_OVER_SA: + if (FLOAT_IS_ZERO (sa)) + f = 1.0f; + else + f = CLAMP ((1.0f - da) / sa); + break; + + case ONE_MINUS_SA_OVER_DA: + if (FLOAT_IS_ZERO (da)) + f = 0.0f; + else + f = CLAMP (1.0f - sa / da); + break; + + case ONE_MINUS_DA_OVER_SA: + if (FLOAT_IS_ZERO (sa)) + f = 0.0f; + else + f = CLAMP (1.0f - da / sa); + break; + + case ONE_MINUS_INV_DA_OVER_SA: + if (FLOAT_IS_ZERO (sa)) + f = 0.0f; + else + f = CLAMP (1.0f - (1.0f - da) / sa); + break; + + case ONE_MINUS_INV_SA_OVER_DA: + if (FLOAT_IS_ZERO (da)) + f = 0.0f; + else + f = CLAMP (1.0f - (1.0f - sa) / da); + break; + } + + return f; +} + +#define MAKE_PD_COMBINERS(name, a, b) \ + static float force_inline \ + pd_combine_ ## name (float sa, float s, float da, float d) \ + { \ + const float fa = get_factor (a, sa, da); \ + const float fb = get_factor (b, sa, da); \ + \ + return MIN (1.0f, s * fa + d * fb); \ + } \ + \ + MAKE_COMBINERS(name, pd_combine_ ## name, pd_combine_ ## name) + +MAKE_PD_COMBINERS (clear, ZERO, ZERO) +MAKE_PD_COMBINERS (src, ONE, ZERO) +MAKE_PD_COMBINERS (dst, ZERO, ONE) +MAKE_PD_COMBINERS (over, ONE, INV_SA) +MAKE_PD_COMBINERS (over_reverse, INV_DA, ONE) +MAKE_PD_COMBINERS (in, DEST_ALPHA, ZERO) +MAKE_PD_COMBINERS (in_reverse, ZERO, SRC_ALPHA) +MAKE_PD_COMBINERS (out, INV_DA, ZERO) +MAKE_PD_COMBINERS (out_reverse, ZERO, INV_SA) +MAKE_PD_COMBINERS (atop, DEST_ALPHA, INV_SA) +MAKE_PD_COMBINERS (atop_reverse, INV_DA, SRC_ALPHA) +MAKE_PD_COMBINERS (xor, INV_DA, INV_SA) +MAKE_PD_COMBINERS (add, ONE, ONE) + +MAKE_PD_COMBINERS (saturate, INV_DA_OVER_SA, ONE) + +MAKE_PD_COMBINERS (disjoint_clear, ZERO, ZERO) +MAKE_PD_COMBINERS (disjoint_src, ONE, ZERO) +MAKE_PD_COMBINERS (disjoint_dst, ZERO, ONE) +MAKE_PD_COMBINERS (disjoint_over, ONE, INV_SA_OVER_DA) +MAKE_PD_COMBINERS (disjoint_over_reverse, INV_DA_OVER_SA, ONE) +MAKE_PD_COMBINERS (disjoint_in, ONE_MINUS_INV_DA_OVER_SA, ZERO) +MAKE_PD_COMBINERS (disjoint_in_reverse, ZERO, ONE_MINUS_INV_SA_OVER_DA) +MAKE_PD_COMBINERS (disjoint_out, INV_DA_OVER_SA, ZERO) +MAKE_PD_COMBINERS (disjoint_out_reverse, ZERO, INV_SA_OVER_DA) +MAKE_PD_COMBINERS (disjoint_atop, ONE_MINUS_INV_DA_OVER_SA, INV_SA_OVER_DA) +MAKE_PD_COMBINERS (disjoint_atop_reverse, INV_DA_OVER_SA, ONE_MINUS_INV_SA_OVER_DA) +MAKE_PD_COMBINERS (disjoint_xor, INV_DA_OVER_SA, INV_SA_OVER_DA) + +MAKE_PD_COMBINERS (conjoint_clear, ZERO, ZERO) +MAKE_PD_COMBINERS (conjoint_src, ONE, ZERO) +MAKE_PD_COMBINERS (conjoint_dst, ZERO, ONE) +MAKE_PD_COMBINERS (conjoint_over, ONE, ONE_MINUS_SA_OVER_DA) +MAKE_PD_COMBINERS (conjoint_over_reverse, ONE_MINUS_DA_OVER_SA, ONE) +MAKE_PD_COMBINERS (conjoint_in, DA_OVER_SA, ZERO) +MAKE_PD_COMBINERS (conjoint_in_reverse, ZERO, SA_OVER_DA) +MAKE_PD_COMBINERS (conjoint_out, ONE_MINUS_DA_OVER_SA, ZERO) +MAKE_PD_COMBINERS (conjoint_out_reverse, ZERO, ONE_MINUS_SA_OVER_DA) +MAKE_PD_COMBINERS (conjoint_atop, DA_OVER_SA, ONE_MINUS_SA_OVER_DA) +MAKE_PD_COMBINERS (conjoint_atop_reverse, ONE_MINUS_DA_OVER_SA, SA_OVER_DA) +MAKE_PD_COMBINERS (conjoint_xor, ONE_MINUS_DA_OVER_SA, ONE_MINUS_SA_OVER_DA) + +/* + * PDF blend modes: + * + * The following blend modes have been taken from the PDF ISO 32000 + * specification, which at this point in time is available from + * http://www.adobe.com/devnet/acrobat/pdfs/PDF32000_2008.pdf + * The relevant chapters are 11.3.5 and 11.3.6. + * The formula for computing the final pixel color given in 11.3.6 is: + * αr × Cr = (1 – αs) × αb × Cb + (1 – αb) × αs × Cs + αb × αs × B(Cb, Cs) + * with B() being the blend function. + * Note that OVER is a special case of this operation, using B(Cb, Cs) = Cs + * + * These blend modes should match the SVG filter draft specification, as + * it has been designed to mirror ISO 32000. Note that at the current point + * no released draft exists that shows this, as the formulas have not been + * updated yet after the release of ISO 32000. + * + * The default implementation here uses the PDF_SEPARABLE_BLEND_MODE and + * PDF_NON_SEPARABLE_BLEND_MODE macros, which take the blend function as an + * argument. Note that this implementation operates on premultiplied colors, + * while the PDF specification does not. Therefore the code uses the formula + * ar.Cra = (1 – as) . Dca + (1 – ad) . Sca + B(Dca, ad, Sca, as) + */ + +#define MAKE_SEPARABLE_PDF_COMBINERS(name) \ + static force_inline float \ + combine_ ## name ## _a (float sa, float s, float da, float d) \ + { \ + return da + sa - da * sa; \ + } \ + \ + static force_inline float \ + combine_ ## name ## _c (float sa, float s, float da, float d) \ + { \ + float f = (1 - sa) * d + (1 - da) * s; \ + \ + return f + blend_ ## name (sa, s, da, d); \ + } \ + \ + MAKE_COMBINERS (name, combine_ ## name ## _a, combine_ ## name ## _c) + +static force_inline float +blend_multiply (float sa, float s, float da, float d) +{ + return d * s; +} + +static force_inline float +blend_screen (float sa, float s, float da, float d) +{ + return d * sa + s * da - s * d; +} + +static force_inline float +blend_overlay (float sa, float s, float da, float d) +{ + if (2 * d < da) + return 2 * s * d; + else + return sa * da - 2 * (da - d) * (sa - s); +} + +static force_inline float +blend_darken (float sa, float s, float da, float d) +{ + s = s * da; + d = d * sa; + + if (s > d) + return d; + else + return s; +} + +static force_inline float +blend_lighten (float sa, float s, float da, float d) +{ + s = s * da; + d = d * sa; + + if (s > d) + return s; + else + return d; +} + +static force_inline float +blend_color_dodge (float sa, float s, float da, float d) +{ + if (FLOAT_IS_ZERO (d)) + return 0.0f; + else if (d * sa >= sa * da - s * da) + return sa * da; + else if (FLOAT_IS_ZERO (sa - s)) + return sa * da; + else + return sa * sa * d / (sa - s); +} + +static force_inline float +blend_color_burn (float sa, float s, float da, float d) +{ + if (d >= da) + return sa * da; + else if (sa * (da - d) >= s * da) + return 0.0f; + else if (FLOAT_IS_ZERO (s)) + return 0.0f; + else + return sa * (da - sa * (da - d) / s); +} + +static force_inline float +blend_hard_light (float sa, float s, float da, float d) +{ + if (2 * s < sa) + return 2 * s * d; + else + return sa * da - 2 * (da - d) * (sa - s); +} + +static force_inline float +blend_soft_light (float sa, float s, float da, float d) +{ + if (2 * s < sa) + { + if (FLOAT_IS_ZERO (da)) + return d * sa; + else + return d * sa - d * (da - d) * (sa - 2 * s) / da; + } + else + { + if (FLOAT_IS_ZERO (da)) + { + return 0.0f; + } + else + { + if (4 * d <= da) + return d * sa + (2 * s - sa) * d * ((16 * d / da - 12) * d / da + 3); + else + return d * sa + (sqrtf (d * da) - d) * (2 * s - sa); + } + } +} + +static force_inline float +blend_difference (float sa, float s, float da, float d) +{ + float dsa = d * sa; + float sda = s * da; + + if (sda < dsa) + return dsa - sda; + else + return sda - dsa; +} + +static force_inline float +blend_exclusion (float sa, float s, float da, float d) +{ + return s * da + d * sa - 2 * d * s; +} + +MAKE_SEPARABLE_PDF_COMBINERS (multiply) +MAKE_SEPARABLE_PDF_COMBINERS (screen) +MAKE_SEPARABLE_PDF_COMBINERS (overlay) +MAKE_SEPARABLE_PDF_COMBINERS (darken) +MAKE_SEPARABLE_PDF_COMBINERS (lighten) +MAKE_SEPARABLE_PDF_COMBINERS (color_dodge) +MAKE_SEPARABLE_PDF_COMBINERS (color_burn) +MAKE_SEPARABLE_PDF_COMBINERS (hard_light) +MAKE_SEPARABLE_PDF_COMBINERS (soft_light) +MAKE_SEPARABLE_PDF_COMBINERS (difference) +MAKE_SEPARABLE_PDF_COMBINERS (exclusion) + +/* + * PDF nonseperable blend modes. + * + * These are implemented using the following functions to operate in Hsl + * space, with Cmax, Cmid, Cmin referring to the max, mid and min value + * of the red, green and blue components. + * + * LUM (C) = 0.3 × Cred + 0.59 × Cgreen + 0.11 × Cblue + * + * clip_color (C): + * l = LUM (C) + * min = Cmin + * max = Cmax + * if n < 0.0 + * C = l + (((C – l) × l) ⁄ (l – min)) + * if x > 1.0 + * C = l + (((C – l) × (1 – l)) (max – l)) + * return C + * + * set_lum (C, l): + * d = l – LUM (C) + * C += d + * return clip_color (C) + * + * SAT (C) = CH_MAX (C) - CH_MIN (C) + * + * set_sat (C, s): + * if Cmax > Cmin + * Cmid = ( ( ( Cmid – Cmin ) × s ) ⁄ ( Cmax – Cmin ) ) + * Cmax = s + * else + * Cmid = Cmax = 0.0 + * Cmin = 0.0 + * return C + */ + +/* For premultiplied colors, we need to know what happens when C is + * multiplied by a real number. LUM and SAT are linear: + * + * LUM (r × C) = r × LUM (C) SAT (r × C) = r × SAT (C) + * + * If we extend clip_color with an extra argument a and change + * + * if x >= 1.0 + * + * into + * + * if x >= a + * + * then clip_color is also linear: + * + * r * clip_color (C, a) = clip_color (r_c, ra); + * + * for positive r. + * + * Similarly, we can extend set_lum with an extra argument that is just passed + * on to clip_color: + * + * r × set_lum ( C, l, a) + * + * = r × clip_color ( C + l - LUM (C), a) + * + * = clip_color ( r * C + r × l - LUM (r × C), r * a) + * + * = set_lum ( r * C, r * l, r * a) + * + * Finally, set_sat: + * + * r * set_sat (C, s) = set_sat (x * C, r * s) + * + * The above holds for all non-zero x because they x'es in the fraction for + * C_mid cancel out. Specifically, it holds for x = r: + * + * r * set_sat (C, s) = set_sat (r_c, rs) + * + * + * + * + * So, for the non-separable PDF blend modes, we have (using s, d for + * non-premultiplied colors, and S, D for premultiplied: + * + * Color: + * + * a_s * a_d * B(s, d) + * = a_s * a_d * set_lum (S/a_s, LUM (D/a_d), 1) + * = set_lum (S * a_d, a_s * LUM (D), a_s * a_d) + * + * + * Luminosity: + * + * a_s * a_d * B(s, d) + * = a_s * a_d * set_lum (D/a_d, LUM(S/a_s), 1) + * = set_lum (a_s * D, a_d * LUM(S), a_s * a_d) + * + * + * Saturation: + * + * a_s * a_d * B(s, d) + * = a_s * a_d * set_lum (set_sat (D/a_d, SAT (S/a_s)), LUM (D/a_d), 1) + * = set_lum (a_s * a_d * set_sat (D/a_d, SAT (S/a_s)), + * a_s * LUM (D), a_s * a_d) + * = set_lum (set_sat (a_s * D, a_d * SAT (S), a_s * LUM (D), a_s * a_d)) + * + * Hue: + * + * a_s * a_d * B(s, d) + * = a_s * a_d * set_lum (set_sat (S/a_s, SAT (D/a_d)), LUM (D/a_d), 1) + * = set_lum (set_sat (a_d * S, a_s * SAT (D)), a_s * LUM (D), a_s * a_d) + * + */ + +typedef struct +{ + float r; + float g; + float b; +} rgb_t; + +static force_inline float +minf (float a, float b) +{ + return a < b? a : b; +} + +static force_inline float +maxf (float a, float b) +{ + return a > b? a : b; +} + +static force_inline float +channel_min (const rgb_t *c) +{ + return minf (minf (c->r, c->g), c->b); +} + +static force_inline float +channel_max (const rgb_t *c) +{ + return maxf (maxf (c->r, c->g), c->b); +} + +static force_inline float +get_lum (const rgb_t *c) +{ + return c->r * 0.3f + c->g * 0.59f + c->b * 0.11f; +} + +static force_inline float +get_sat (const rgb_t *c) +{ + return channel_max (c) - channel_min (c); +} + +static void +clip_color (rgb_t *color, float a) +{ + float l = get_lum (color); + float n = channel_min (color); + float x = channel_max (color); + float t; + + if (n < 0.0f) + { + t = l - n; + if (FLOAT_IS_ZERO (t)) + { + color->r = 0.0f; + color->g = 0.0f; + color->b = 0.0f; + } + else + { + color->r = l + (((color->r - l) * l) / t); + color->g = l + (((color->g - l) * l) / t); + color->b = l + (((color->b - l) * l) / t); + } + } + if (x > a) + { + t = x - l; + if (FLOAT_IS_ZERO (t)) + { + color->r = a; + color->g = a; + color->b = a; + } + else + { + color->r = l + (((color->r - l) * (a - l) / t)); + color->g = l + (((color->g - l) * (a - l) / t)); + color->b = l + (((color->b - l) * (a - l) / t)); + } + } +} + +static void +set_lum (rgb_t *color, float sa, float l) +{ + float d = l - get_lum (color); + + color->r = color->r + d; + color->g = color->g + d; + color->b = color->b + d; + + clip_color (color, sa); +} + +static void +set_sat (rgb_t *src, float sat) +{ + float *max, *mid, *min; + float t; + + if (src->r > src->g) + { + if (src->r > src->b) + { + max = &(src->r); + + if (src->g > src->b) + { + mid = &(src->g); + min = &(src->b); + } + else + { + mid = &(src->b); + min = &(src->g); + } + } + else + { + max = &(src->b); + mid = &(src->r); + min = &(src->g); + } + } + else + { + if (src->r > src->b) + { + max = &(src->g); + mid = &(src->r); + min = &(src->b); + } + else + { + min = &(src->r); + + if (src->g > src->b) + { + max = &(src->g); + mid = &(src->b); + } + else + { + max = &(src->b); + mid = &(src->g); + } + } + } + + t = *max - *min; + + if (FLOAT_IS_ZERO (t)) + { + *mid = *max = 0.0f; + } + else + { + *mid = ((*mid - *min) * sat) / t; + *max = sat; + } + + *min = 0.0f; +} + +/* + * Hue: + * B(Cb, Cs) = set_lum (set_sat (Cs, SAT (Cb)), LUM (Cb)) + */ +static force_inline void +blend_hsl_hue (rgb_t *res, + const rgb_t *dest, float da, + const rgb_t *src, float sa) +{ + res->r = src->r * da; + res->g = src->g * da; + res->b = src->b * da; + + set_sat (res, get_sat (dest) * sa); + set_lum (res, sa * da, get_lum (dest) * sa); +} + +/* + * Saturation: + * B(Cb, Cs) = set_lum (set_sat (Cb, SAT (Cs)), LUM (Cb)) + */ +static force_inline void +blend_hsl_saturation (rgb_t *res, + const rgb_t *dest, float da, + const rgb_t *src, float sa) +{ + res->r = dest->r * sa; + res->g = dest->g * sa; + res->b = dest->b * sa; + + set_sat (res, get_sat (src) * da); + set_lum (res, sa * da, get_lum (dest) * sa); +} + +/* + * Color: + * B(Cb, Cs) = set_lum (Cs, LUM (Cb)) + */ +static force_inline void +blend_hsl_color (rgb_t *res, + const rgb_t *dest, float da, + const rgb_t *src, float sa) +{ + res->r = src->r * da; + res->g = src->g * da; + res->b = src->b * da; + + set_lum (res, sa * da, get_lum (dest) * sa); +} + +/* + * Luminosity: + * B(Cb, Cs) = set_lum (Cb, LUM (Cs)) + */ +static force_inline void +blend_hsl_luminosity (rgb_t *res, + const rgb_t *dest, float da, + const rgb_t *src, float sa) +{ + res->r = dest->r * sa; + res->g = dest->g * sa; + res->b = dest->b * sa; + + set_lum (res, sa * da, get_lum (src) * da); +} + +#define MAKE_NON_SEPARABLE_PDF_COMBINERS(name) \ + static void \ + combine_ ## name ## _u_float (pixman_implementation_t *imp, \ + pixman_op_t op, \ + float *dest, \ + const float *src, \ + const float *mask, \ + int n_pixels) \ + { \ + int i; \ + \ + for (i = 0; i < 4 * n_pixels; i += 4) \ + { \ + float sa, da; \ + rgb_t sc, dc, rc; \ + \ + sa = src[i + 0]; \ + sc.r = src[i + 1]; \ + sc.g = src[i + 2]; \ + sc.b = src[i + 3]; \ + \ + da = dest[i + 0]; \ + dc.r = dest[i + 1]; \ + dc.g = dest[i + 2]; \ + dc.b = dest[i + 3]; \ + \ + if (mask) \ + { \ + float ma = mask[i + 0]; \ + \ + /* Component alpha is not supported for HSL modes */ \ + sa *= ma; \ + sc.r *= ma; \ + sc.g *= ma; \ + sc.g *= ma; \ + } \ + \ + blend_ ## name (&rc, &dc, da, &sc, sa); \ + \ + dest[i + 0] = sa + da - sa * da; \ + dest[i + 1] = (1 - sa) * dc.r + (1 - da) * sc.r + rc.r; \ + dest[i + 2] = (1 - sa) * dc.g + (1 - da) * sc.g + rc.g; \ + dest[i + 3] = (1 - sa) * dc.b + (1 - da) * sc.b + rc.b; \ + } \ + } + +MAKE_NON_SEPARABLE_PDF_COMBINERS(hsl_hue) +MAKE_NON_SEPARABLE_PDF_COMBINERS(hsl_saturation) +MAKE_NON_SEPARABLE_PDF_COMBINERS(hsl_color) +MAKE_NON_SEPARABLE_PDF_COMBINERS(hsl_luminosity) + +void +_pixman_setup_combiner_functions_float (pixman_implementation_t *imp) +{ + /* Unified alpha */ + imp->combine_float[PIXMAN_OP_CLEAR] = combine_clear_u_float; + imp->combine_float[PIXMAN_OP_SRC] = combine_src_u_float; + imp->combine_float[PIXMAN_OP_DST] = combine_dst_u_float; + imp->combine_float[PIXMAN_OP_OVER] = combine_over_u_float; + imp->combine_float[PIXMAN_OP_OVER_REVERSE] = combine_over_reverse_u_float; + imp->combine_float[PIXMAN_OP_IN] = combine_in_u_float; + imp->combine_float[PIXMAN_OP_IN_REVERSE] = combine_in_reverse_u_float; + imp->combine_float[PIXMAN_OP_OUT] = combine_out_u_float; + imp->combine_float[PIXMAN_OP_OUT_REVERSE] = combine_out_reverse_u_float; + imp->combine_float[PIXMAN_OP_ATOP] = combine_atop_u_float; + imp->combine_float[PIXMAN_OP_ATOP_REVERSE] = combine_atop_reverse_u_float; + imp->combine_float[PIXMAN_OP_XOR] = combine_xor_u_float; + imp->combine_float[PIXMAN_OP_ADD] = combine_add_u_float; + imp->combine_float[PIXMAN_OP_SATURATE] = combine_saturate_u_float; + + /* Disjoint, unified */ + imp->combine_float[PIXMAN_OP_DISJOINT_CLEAR] = combine_disjoint_clear_u_float; + imp->combine_float[PIXMAN_OP_DISJOINT_SRC] = combine_disjoint_src_u_float; + imp->combine_float[PIXMAN_OP_DISJOINT_DST] = combine_disjoint_dst_u_float; + imp->combine_float[PIXMAN_OP_DISJOINT_OVER] = combine_disjoint_over_u_float; + imp->combine_float[PIXMAN_OP_DISJOINT_OVER_REVERSE] = combine_disjoint_over_reverse_u_float; + imp->combine_float[PIXMAN_OP_DISJOINT_IN] = combine_disjoint_in_u_float; + imp->combine_float[PIXMAN_OP_DISJOINT_IN_REVERSE] = combine_disjoint_in_reverse_u_float; + imp->combine_float[PIXMAN_OP_DISJOINT_OUT] = combine_disjoint_out_u_float; + imp->combine_float[PIXMAN_OP_DISJOINT_OUT_REVERSE] = combine_disjoint_out_reverse_u_float; + imp->combine_float[PIXMAN_OP_DISJOINT_ATOP] = combine_disjoint_atop_u_float; + imp->combine_float[PIXMAN_OP_DISJOINT_ATOP_REVERSE] = combine_disjoint_atop_reverse_u_float; + imp->combine_float[PIXMAN_OP_DISJOINT_XOR] = combine_disjoint_xor_u_float; + + /* Conjoint, unified */ + imp->combine_float[PIXMAN_OP_CONJOINT_CLEAR] = combine_conjoint_clear_u_float; + imp->combine_float[PIXMAN_OP_CONJOINT_SRC] = combine_conjoint_src_u_float; + imp->combine_float[PIXMAN_OP_CONJOINT_DST] = combine_conjoint_dst_u_float; + imp->combine_float[PIXMAN_OP_CONJOINT_OVER] = combine_conjoint_over_u_float; + imp->combine_float[PIXMAN_OP_CONJOINT_OVER_REVERSE] = combine_conjoint_over_reverse_u_float; + imp->combine_float[PIXMAN_OP_CONJOINT_IN] = combine_conjoint_in_u_float; + imp->combine_float[PIXMAN_OP_CONJOINT_IN_REVERSE] = combine_conjoint_in_reverse_u_float; + imp->combine_float[PIXMAN_OP_CONJOINT_OUT] = combine_conjoint_out_u_float; + imp->combine_float[PIXMAN_OP_CONJOINT_OUT_REVERSE] = combine_conjoint_out_reverse_u_float; + imp->combine_float[PIXMAN_OP_CONJOINT_ATOP] = combine_conjoint_atop_u_float; + imp->combine_float[PIXMAN_OP_CONJOINT_ATOP_REVERSE] = combine_conjoint_atop_reverse_u_float; + imp->combine_float[PIXMAN_OP_CONJOINT_XOR] = combine_conjoint_xor_u_float; + + /* PDF operators, unified */ + imp->combine_float[PIXMAN_OP_MULTIPLY] = combine_multiply_u_float; + imp->combine_float[PIXMAN_OP_SCREEN] = combine_screen_u_float; + imp->combine_float[PIXMAN_OP_OVERLAY] = combine_overlay_u_float; + imp->combine_float[PIXMAN_OP_DARKEN] = combine_darken_u_float; + imp->combine_float[PIXMAN_OP_LIGHTEN] = combine_lighten_u_float; + imp->combine_float[PIXMAN_OP_COLOR_DODGE] = combine_color_dodge_u_float; + imp->combine_float[PIXMAN_OP_COLOR_BURN] = combine_color_burn_u_float; + imp->combine_float[PIXMAN_OP_HARD_LIGHT] = combine_hard_light_u_float; + imp->combine_float[PIXMAN_OP_SOFT_LIGHT] = combine_soft_light_u_float; + imp->combine_float[PIXMAN_OP_DIFFERENCE] = combine_difference_u_float; + imp->combine_float[PIXMAN_OP_EXCLUSION] = combine_exclusion_u_float; + + imp->combine_float[PIXMAN_OP_HSL_HUE] = combine_hsl_hue_u_float; + imp->combine_float[PIXMAN_OP_HSL_SATURATION] = combine_hsl_saturation_u_float; + imp->combine_float[PIXMAN_OP_HSL_COLOR] = combine_hsl_color_u_float; + imp->combine_float[PIXMAN_OP_HSL_LUMINOSITY] = combine_hsl_luminosity_u_float; + + /* Component alpha combiners */ + imp->combine_float_ca[PIXMAN_OP_CLEAR] = combine_clear_ca_float; + imp->combine_float_ca[PIXMAN_OP_SRC] = combine_src_ca_float; + imp->combine_float_ca[PIXMAN_OP_DST] = combine_dst_ca_float; + imp->combine_float_ca[PIXMAN_OP_OVER] = combine_over_ca_float; + imp->combine_float_ca[PIXMAN_OP_OVER_REVERSE] = combine_over_reverse_ca_float; + imp->combine_float_ca[PIXMAN_OP_IN] = combine_in_ca_float; + imp->combine_float_ca[PIXMAN_OP_IN_REVERSE] = combine_in_reverse_ca_float; + imp->combine_float_ca[PIXMAN_OP_OUT] = combine_out_ca_float; + imp->combine_float_ca[PIXMAN_OP_OUT_REVERSE] = combine_out_reverse_ca_float; + imp->combine_float_ca[PIXMAN_OP_ATOP] = combine_atop_ca_float; + imp->combine_float_ca[PIXMAN_OP_ATOP_REVERSE] = combine_atop_reverse_ca_float; + imp->combine_float_ca[PIXMAN_OP_XOR] = combine_xor_ca_float; + imp->combine_float_ca[PIXMAN_OP_ADD] = combine_add_ca_float; + imp->combine_float_ca[PIXMAN_OP_SATURATE] = combine_saturate_ca_float; + + /* Disjoint CA */ + imp->combine_float_ca[PIXMAN_OP_DISJOINT_CLEAR] = combine_disjoint_clear_ca_float; + imp->combine_float_ca[PIXMAN_OP_DISJOINT_SRC] = combine_disjoint_src_ca_float; + imp->combine_float_ca[PIXMAN_OP_DISJOINT_DST] = combine_disjoint_dst_ca_float; + imp->combine_float_ca[PIXMAN_OP_DISJOINT_OVER] = combine_disjoint_over_ca_float; + imp->combine_float_ca[PIXMAN_OP_DISJOINT_OVER_REVERSE] = combine_disjoint_over_reverse_ca_float; + imp->combine_float_ca[PIXMAN_OP_DISJOINT_IN] = combine_disjoint_in_ca_float; + imp->combine_float_ca[PIXMAN_OP_DISJOINT_IN_REVERSE] = combine_disjoint_in_reverse_ca_float; + imp->combine_float_ca[PIXMAN_OP_DISJOINT_OUT] = combine_disjoint_out_ca_float; + imp->combine_float_ca[PIXMAN_OP_DISJOINT_OUT_REVERSE] = combine_disjoint_out_reverse_ca_float; + imp->combine_float_ca[PIXMAN_OP_DISJOINT_ATOP] = combine_disjoint_atop_ca_float; + imp->combine_float_ca[PIXMAN_OP_DISJOINT_ATOP_REVERSE] = combine_disjoint_atop_reverse_ca_float; + imp->combine_float_ca[PIXMAN_OP_DISJOINT_XOR] = combine_disjoint_xor_ca_float; + + /* Conjoint CA */ + imp->combine_float_ca[PIXMAN_OP_CONJOINT_CLEAR] = combine_conjoint_clear_ca_float; + imp->combine_float_ca[PIXMAN_OP_CONJOINT_SRC] = combine_conjoint_src_ca_float; + imp->combine_float_ca[PIXMAN_OP_CONJOINT_DST] = combine_conjoint_dst_ca_float; + imp->combine_float_ca[PIXMAN_OP_CONJOINT_OVER] = combine_conjoint_over_ca_float; + imp->combine_float_ca[PIXMAN_OP_CONJOINT_OVER_REVERSE] = combine_conjoint_over_reverse_ca_float; + imp->combine_float_ca[PIXMAN_OP_CONJOINT_IN] = combine_conjoint_in_ca_float; + imp->combine_float_ca[PIXMAN_OP_CONJOINT_IN_REVERSE] = combine_conjoint_in_reverse_ca_float; + imp->combine_float_ca[PIXMAN_OP_CONJOINT_OUT] = combine_conjoint_out_ca_float; + imp->combine_float_ca[PIXMAN_OP_CONJOINT_OUT_REVERSE] = combine_conjoint_out_reverse_ca_float; + imp->combine_float_ca[PIXMAN_OP_CONJOINT_ATOP] = combine_conjoint_atop_ca_float; + imp->combine_float_ca[PIXMAN_OP_CONJOINT_ATOP_REVERSE] = combine_conjoint_atop_reverse_ca_float; + imp->combine_float_ca[PIXMAN_OP_CONJOINT_XOR] = combine_conjoint_xor_ca_float; + + /* PDF operators CA */ + imp->combine_float_ca[PIXMAN_OP_MULTIPLY] = combine_multiply_ca_float; + imp->combine_float_ca[PIXMAN_OP_SCREEN] = combine_screen_ca_float; + imp->combine_float_ca[PIXMAN_OP_OVERLAY] = combine_overlay_ca_float; + imp->combine_float_ca[PIXMAN_OP_DARKEN] = combine_darken_ca_float; + imp->combine_float_ca[PIXMAN_OP_LIGHTEN] = combine_lighten_ca_float; + imp->combine_float_ca[PIXMAN_OP_COLOR_DODGE] = combine_color_dodge_ca_float; + imp->combine_float_ca[PIXMAN_OP_COLOR_BURN] = combine_color_burn_ca_float; + imp->combine_float_ca[PIXMAN_OP_HARD_LIGHT] = combine_hard_light_ca_float; + imp->combine_float_ca[PIXMAN_OP_SOFT_LIGHT] = combine_soft_light_ca_float; + imp->combine_float_ca[PIXMAN_OP_DIFFERENCE] = combine_difference_ca_float; + imp->combine_float_ca[PIXMAN_OP_EXCLUSION] = combine_exclusion_ca_float; + + /* It is not clear that these make sense, so make them noops for now */ + imp->combine_float_ca[PIXMAN_OP_HSL_HUE] = combine_dst_u_float; + imp->combine_float_ca[PIXMAN_OP_HSL_SATURATION] = combine_dst_u_float; + imp->combine_float_ca[PIXMAN_OP_HSL_COLOR] = combine_dst_u_float; + imp->combine_float_ca[PIXMAN_OP_HSL_LUMINOSITY] = combine_dst_u_float; +} diff --git a/src/pixman/pixman/pixman-combine32.c b/src/pixman/pixman/pixman-combine32.c new file mode 100644 index 0000000..450114a --- /dev/null +++ b/src/pixman/pixman/pixman-combine32.c @@ -0,0 +1,2581 @@ +/* + * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc. + * 2005 Lars Knoll & Zack Rusin, Trolltech + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of Keith Packard not be used in + * advertising or publicity pertaining to distribution of the software without + * specific, written prior permission. Keith Packard makes no + * representations about the suitability of this software for any purpose. It + * is provided "as is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <math.h> +#include <string.h> + +#include "pixman-private.h" +#include "pixman-combine32.h" + +/* component alpha helper functions */ + +static void +combine_mask_ca (uint32_t *src, uint32_t *mask) +{ + uint32_t a = *mask; + + uint32_t x; + uint16_t xa; + + if (!a) + { + *(src) = 0; + return; + } + + x = *(src); + if (a == ~0) + { + x = x >> A_SHIFT; + x |= x << G_SHIFT; + x |= x << R_SHIFT; + *(mask) = x; + return; + } + + xa = x >> A_SHIFT; + UN8x4_MUL_UN8x4 (x, a); + *(src) = x; + + UN8x4_MUL_UN8 (a, xa); + *(mask) = a; +} + +static void +combine_mask_value_ca (uint32_t *src, const uint32_t *mask) +{ + uint32_t a = *mask; + uint32_t x; + + if (!a) + { + *(src) = 0; + return; + } + + if (a == ~0) + return; + + x = *(src); + UN8x4_MUL_UN8x4 (x, a); + *(src) = x; +} + +static void +combine_mask_alpha_ca (const uint32_t *src, uint32_t *mask) +{ + uint32_t a = *(mask); + uint32_t x; + + if (!a) + return; + + x = *(src) >> A_SHIFT; + if (x == MASK) + return; + + if (a == ~0) + { + x |= x << G_SHIFT; + x |= x << R_SHIFT; + *(mask) = x; + return; + } + + UN8x4_MUL_UN8 (a, x); + *(mask) = a; +} + +/* + * There are two ways of handling alpha -- either as a single unified value or + * a separate value for each component, hence each macro must have two + * versions. The unified alpha version has a 'u' at the end of the name, + * the component version has a 'ca'. Similarly, functions which deal with + * this difference will have two versions using the same convention. + */ + +static force_inline uint32_t +combine_mask (const uint32_t *src, const uint32_t *mask, int i) +{ + uint32_t s, m; + + if (mask) + { + m = *(mask + i) >> A_SHIFT; + + if (!m) + return 0; + } + + s = *(src + i); + + if (mask) + UN8x4_MUL_UN8 (s, m); + + return s; +} + +static void +combine_clear (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + memset (dest, 0, width * sizeof (uint32_t)); +} + +static void +combine_dst (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + return; +} + +static void +combine_src_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + int i; + + if (!mask) + { + memcpy (dest, src, width * sizeof (uint32_t)); + } + else + { + for (i = 0; i < width; ++i) + { + uint32_t s = combine_mask (src, mask, i); + + *(dest + i) = s; + } + } +} + +static void +combine_over_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + int i; + + if (!mask) + { + for (i = 0; i < width; ++i) + { + uint32_t s = *(src + i); + uint32_t a = ALPHA_8 (s); + if (a == 0xFF) + { + *(dest + i) = s; + } + else if (s) + { + uint32_t d = *(dest + i); + uint32_t ia = a ^ 0xFF; + UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s); + *(dest + i) = d; + } + } + } + else + { + for (i = 0; i < width; ++i) + { + uint32_t m = ALPHA_8 (*(mask + i)); + if (m == 0xFF) + { + uint32_t s = *(src + i); + uint32_t a = ALPHA_8 (s); + if (a == 0xFF) + { + *(dest + i) = s; + } + else if (s) + { + uint32_t d = *(dest + i); + uint32_t ia = a ^ 0xFF; + UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s); + *(dest + i) = d; + } + } + else if (m) + { + uint32_t s = *(src + i); + if (s) + { + uint32_t d = *(dest + i); + UN8x4_MUL_UN8 (s, m); + UN8x4_MUL_UN8_ADD_UN8x4 (d, ALPHA_8 (~s), s); + *(dest + i) = d; + } + } + } + } +} + +static void +combine_over_reverse_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + uint32_t s = combine_mask (src, mask, i); + uint32_t d = *(dest + i); + uint32_t ia = ALPHA_8 (~*(dest + i)); + UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d); + *(dest + i) = s; + } +} + +static void +combine_in_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + uint32_t s = combine_mask (src, mask, i); + uint32_t a = ALPHA_8 (*(dest + i)); + UN8x4_MUL_UN8 (s, a); + *(dest + i) = s; + } +} + +static void +combine_in_reverse_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + uint32_t s = combine_mask (src, mask, i); + uint32_t d = *(dest + i); + uint32_t a = ALPHA_8 (s); + UN8x4_MUL_UN8 (d, a); + *(dest + i) = d; + } +} + +static void +combine_out_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + uint32_t s = combine_mask (src, mask, i); + uint32_t a = ALPHA_8 (~*(dest + i)); + UN8x4_MUL_UN8 (s, a); + *(dest + i) = s; + } +} + +static void +combine_out_reverse_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + uint32_t s = combine_mask (src, mask, i); + uint32_t d = *(dest + i); + uint32_t a = ALPHA_8 (~s); + UN8x4_MUL_UN8 (d, a); + *(dest + i) = d; + } +} + +static void +combine_atop_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + uint32_t s = combine_mask (src, mask, i); + uint32_t d = *(dest + i); + uint32_t dest_a = ALPHA_8 (d); + uint32_t src_ia = ALPHA_8 (~s); + + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia); + *(dest + i) = s; + } +} + +static void +combine_atop_reverse_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + uint32_t s = combine_mask (src, mask, i); + uint32_t d = *(dest + i); + uint32_t src_a = ALPHA_8 (s); + uint32_t dest_ia = ALPHA_8 (~d); + + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a); + *(dest + i) = s; + } +} + +static void +combine_xor_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + uint32_t s = combine_mask (src, mask, i); + uint32_t d = *(dest + i); + uint32_t src_ia = ALPHA_8 (~s); + uint32_t dest_ia = ALPHA_8 (~d); + + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia); + *(dest + i) = s; + } +} + +static void +combine_add_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + uint32_t s = combine_mask (src, mask, i); + uint32_t d = *(dest + i); + UN8x4_ADD_UN8x4 (d, s); + *(dest + i) = d; + } +} + +static void +combine_saturate_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + uint32_t s = combine_mask (src, mask, i); + uint32_t d = *(dest + i); + uint16_t sa, da; + + sa = s >> A_SHIFT; + da = ~d >> A_SHIFT; + if (sa > da) + { + sa = DIV_UN8 (da, sa); + UN8x4_MUL_UN8 (s, sa); + } + ; + UN8x4_ADD_UN8x4 (d, s); + *(dest + i) = d; + } +} + + +/* + * PDF blend modes: + * + * The following blend modes have been taken from the PDF ISO 32000 + * specification, which at this point in time is available from + * + * http://www.adobe.com/devnet/pdf/pdf_reference.html + * + * The specific documents of interest are the PDF spec itself: + * + * http://wwwimages.adobe.com/www.adobe.com/content/dam/Adobe/en/devnet/pdf/pdfs/PDF32000_2008.pdf + * + * chapters 11.3.5 and 11.3.6 and a later supplement for Adobe Acrobat + * 9.1 and Reader 9.1: + * + * http://wwwimages.adobe.com/www.adobe.com/content/dam/Adobe/en/devnet/pdf/pdfs/adobe_supplement_iso32000_1.pdf + * + * that clarifies the specifications for blend modes ColorDodge and + * ColorBurn. + * + * The formula for computing the final pixel color given in 11.3.6 is: + * + * αr × Cr = (1 – αs) × αb × Cb + (1 – αb) × αs × Cs + αb × αs × B(Cb, Cs) + * + * with B() is the blend function. When B(Cb, Cs) = Cs, this formula + * reduces to the regular OVER operator. + * + * Cs and Cb are not premultiplied, so in our implementation we instead + * use: + * + * cr = (1 – αs) × cb + (1 – αb) × cs + αb × αs × B (cb/αb, cs/αs) + * + * where cr, cs, and cb are premultiplied colors, and where the + * + * αb × αs × B(cb/αb, cs/αs) + * + * part is first arithmetically simplified under the assumption that αb + * and αs are not 0, and then updated to produce a meaningful result when + * they are. + * + * For all the blend mode operators, the alpha channel is given by + * + * αr = αs + αb + αb × αs + */ + +/* + * Multiply + * + * ad * as * B(d / ad, s / as) + * = ad * as * d/ad * s/as + * = d * s + * + */ +static void +combine_multiply_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + uint32_t s = combine_mask (src, mask, i); + uint32_t d = *(dest + i); + uint32_t ss = s; + uint32_t src_ia = ALPHA_8 (~s); + uint32_t dest_ia = ALPHA_8 (~d); + + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (ss, dest_ia, d, src_ia); + UN8x4_MUL_UN8x4 (d, s); + UN8x4_ADD_UN8x4 (d, ss); + + *(dest + i) = d; + } +} + +static void +combine_multiply_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + uint32_t m = *(mask + i); + uint32_t s = *(src + i); + uint32_t d = *(dest + i); + uint32_t r = d; + uint32_t dest_ia = ALPHA_8 (~d); + + combine_mask_ca (&s, &m); + + UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (r, ~m, s, dest_ia); + UN8x4_MUL_UN8x4 (d, s); + UN8x4_ADD_UN8x4 (r, d); + + *(dest + i) = r; + } +} + +#define PDF_SEPARABLE_BLEND_MODE(name) \ + static void \ + combine_ ## name ## _u (pixman_implementation_t *imp, \ + pixman_op_t op, \ + uint32_t * dest, \ + const uint32_t * src, \ + const uint32_t * mask, \ + int width) \ + { \ + int i; \ + for (i = 0; i < width; ++i) \ + { \ + uint32_t s = combine_mask (src, mask, i); \ + uint32_t d = *(dest + i); \ + uint8_t sa = ALPHA_8 (s); \ + uint8_t isa = ~sa; \ + uint8_t da = ALPHA_8 (d); \ + uint8_t ida = ~da; \ + uint32_t result; \ + \ + result = d; \ + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (result, isa, s, ida); \ + \ + *(dest + i) = result + \ + (DIV_ONE_UN8 (sa * (uint32_t)da) << A_SHIFT) + \ + (blend_ ## name (RED_8 (d), da, RED_8 (s), sa) << R_SHIFT) + \ + (blend_ ## name (GREEN_8 (d), da, GREEN_8 (s), sa) << G_SHIFT) + \ + (blend_ ## name (BLUE_8 (d), da, BLUE_8 (s), sa)); \ + } \ + } \ + \ + static void \ + combine_ ## name ## _ca (pixman_implementation_t *imp, \ + pixman_op_t op, \ + uint32_t * dest, \ + const uint32_t * src, \ + const uint32_t * mask, \ + int width) \ + { \ + int i; \ + for (i = 0; i < width; ++i) \ + { \ + uint32_t m = *(mask + i); \ + uint32_t s = *(src + i); \ + uint32_t d = *(dest + i); \ + uint8_t da = ALPHA_8 (d); \ + uint8_t ida = ~da; \ + uint32_t result; \ + \ + combine_mask_ca (&s, &m); \ + \ + result = d; \ + UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (result, ~m, s, ida); \ + \ + result += \ + (DIV_ONE_UN8 (ALPHA_8 (m) * (uint32_t)da) << A_SHIFT) + \ + (blend_ ## name (RED_8 (d), da, RED_8 (s), RED_8 (m)) << R_SHIFT) + \ + (blend_ ## name (GREEN_8 (d), da, GREEN_8 (s), GREEN_8 (m)) << G_SHIFT) + \ + (blend_ ## name (BLUE_8 (d), da, BLUE_8 (s), BLUE_8 (m))); \ + \ + *(dest + i) = result; \ + } \ + } + +/* + * Screen + * + * ad * as * B(d/ad, s/as) + * = ad * as * (d/ad + s/as - s/as * d/ad) + * = ad * s + as * d - s * d + */ +static inline uint32_t +blend_screen (uint32_t d, uint32_t ad, uint32_t s, uint32_t as) +{ + return DIV_ONE_UN8 (s * ad + d * as - s * d); +} + +PDF_SEPARABLE_BLEND_MODE (screen) + +/* + * Overlay + * + * ad * as * B(d/ad, s/as) + * = ad * as * Hardlight (s, d) + * = if (d / ad < 0.5) + * as * ad * Multiply (s/as, 2 * d/ad) + * else + * as * ad * Screen (s/as, 2 * d / ad - 1) + * = if (d < 0.5 * ad) + * as * ad * s/as * 2 * d /ad + * else + * as * ad * (s/as + 2 * d / ad - 1 - s / as * (2 * d / ad - 1)) + * = if (2 * d < ad) + * 2 * s * d + * else + * ad * s + 2 * as * d - as * ad - ad * s * (2 * d / ad - 1) + * = if (2 * d < ad) + * 2 * s * d + * else + * as * ad - 2 * (ad - d) * (as - s) + */ +static inline uint32_t +blend_overlay (uint32_t d, uint32_t ad, uint32_t s, uint32_t as) +{ + uint32_t r; + + if (2 * d < ad) + r = 2 * s * d; + else + r = as * ad - 2 * (ad - d) * (as - s); + + return DIV_ONE_UN8 (r); +} + +PDF_SEPARABLE_BLEND_MODE (overlay) + +/* + * Darken + * + * ad * as * B(d/ad, s/as) + * = ad * as * MIN(d/ad, s/as) + * = MIN (as * d, ad * s) + */ +static inline uint32_t +blend_darken (uint32_t d, uint32_t ad, uint32_t s, uint32_t as) +{ + s = ad * s; + d = as * d; + + return DIV_ONE_UN8 (s > d ? d : s); +} + +PDF_SEPARABLE_BLEND_MODE (darken) + +/* + * Lighten + * + * ad * as * B(d/ad, s/as) + * = ad * as * MAX(d/ad, s/as) + * = MAX (as * d, ad * s) + */ +static inline uint32_t +blend_lighten (uint32_t d, uint32_t ad, uint32_t s, uint32_t as) +{ + s = ad * s; + d = as * d; + + return DIV_ONE_UN8 (s > d ? s : d); +} + +PDF_SEPARABLE_BLEND_MODE (lighten) + +/* + * Color dodge + * + * ad * as * B(d/ad, s/as) + * = if d/ad = 0 + * ad * as * 0 + * else if (d/ad >= (1 - s/as) + * ad * as * 1 + * else + * ad * as * ((d/ad) / (1 - s/as)) + * = if d = 0 + * 0 + * elif as * d >= ad * (as - s) + * ad * as + * else + * as * (as * d / (as - s)) + * + */ +static inline uint32_t +blend_color_dodge (uint32_t d, uint32_t ad, uint32_t s, uint32_t as) +{ + if (d == 0) + return 0; + else if (as * d >= ad * (as - s)) + return DIV_ONE_UN8 (as * ad); + else if (as - s == 0) + return DIV_ONE_UN8 (as * ad); + else + return DIV_ONE_UN8 (as * ((d * as) / ((as - s)))); +} + +PDF_SEPARABLE_BLEND_MODE (color_dodge) + +/* + * Color burn + * + * We modify the first clause "if d = 1" to "if d >= 1" since with + * premultiplied colors d > 1 can actually happen. + * + * ad * as * B(d/ad, s/as) + * = if d/ad >= 1 + * ad * as * 1 + * elif (1 - d/ad) >= s/as + * ad * as * 0 + * else + * ad * as * (1 - ((1 - d/ad) / (s/as))) + * = if d >= ad + * ad * as + * elif as * ad - as * d >= ad * s + * 0 + * else + * ad * as - as * as * (ad - d) / s + */ +static inline uint32_t +blend_color_burn (uint32_t d, uint32_t ad, uint32_t s, uint32_t as) +{ + if (d >= ad) + return DIV_ONE_UN8 (ad * as); + else if (as * ad - as * d >= ad * s) + return 0; + else if (s == 0) + return 0; + else + return DIV_ONE_UN8 (ad * as - (as * as * (ad - d)) / s); +} + +PDF_SEPARABLE_BLEND_MODE (color_burn) + +/* + * Hard light + * + * ad * as * B(d/ad, s/as) + * = if (s/as <= 0.5) + * ad * as * Multiply (d/ad, 2 * s/as) + * else + * ad * as * Screen (d/ad, 2 * s/as - 1) + * = if 2 * s <= as + * ad * as * d/ad * 2 * s / as + * else + * ad * as * (d/ad + (2 * s/as - 1) + d/ad * (2 * s/as - 1)) + * = if 2 * s <= as + * 2 * s * d + * else + * as * ad - 2 * (ad - d) * (as - s) + */ +static inline uint32_t +blend_hard_light (uint32_t d, uint32_t ad, uint32_t s, uint32_t as) +{ + if (2 * s < as) + return DIV_ONE_UN8 (2 * s * d); + else + return DIV_ONE_UN8 (as * ad - 2 * (ad - d) * (as - s)); +} + +PDF_SEPARABLE_BLEND_MODE (hard_light) + +/* + * Soft light + * + * ad * as * B(d/ad, s/as) + * = if (s/as <= 0.5) + * ad * as * (d/ad - (1 - 2 * s/as) * d/ad * (1 - d/ad)) + * else if (d/ad <= 0.25) + * ad * as * (d/ad + (2 * s/as - 1) * ((((16 * d/ad - 12) * d/ad + 4) * d/ad) - d/ad)) + * else + * ad * as * (d/ad + (2 * s/as - 1) * sqrt (d/ad)) + * = if (2 * s <= as) + * d * as - d * (ad - d) * (as - 2 * s) / ad; + * else if (4 * d <= ad) + * (2 * s - as) * d * ((16 * d / ad - 12) * d / ad + 3); + * else + * d * as + (sqrt (d * ad) - d) * (2 * s - as); + */ +static inline uint32_t +blend_soft_light (uint32_t d_org, + uint32_t ad_org, + uint32_t s_org, + uint32_t as_org) +{ + double d = d_org * (1.0 / MASK); + double ad = ad_org * (1.0 / MASK); + double s = s_org * (1.0 / MASK); + double as = as_org * (1.0 / MASK); + double r; + + if (2 * s < as) + { + if (ad == 0) + r = d * as; + else + r = d * as - d * (ad - d) * (as - 2 * s) / ad; + } + else if (ad == 0) + { + r = 0; + } + else if (4 * d <= ad) + { + r = d * as + + (2 * s - as) * d * ((16 * d / ad - 12) * d / ad + 3); + } + else + { + r = d * as + (sqrt (d * ad) - d) * (2 * s - as); + } + return r * MASK + 0.5; +} + +PDF_SEPARABLE_BLEND_MODE (soft_light) + +/* + * Difference + * + * ad * as * B(s/as, d/ad) + * = ad * as * abs (s/as - d/ad) + * = if (s/as <= d/ad) + * ad * as * (d/ad - s/as) + * else + * ad * as * (s/as - d/ad) + * = if (ad * s <= as * d) + * as * d - ad * s + * else + * ad * s - as * d + */ +static inline uint32_t +blend_difference (uint32_t d, uint32_t ad, uint32_t s, uint32_t as) +{ + uint32_t das = d * as; + uint32_t sad = s * ad; + + if (sad < das) + return DIV_ONE_UN8 (das - sad); + else + return DIV_ONE_UN8 (sad - das); +} + +PDF_SEPARABLE_BLEND_MODE (difference) + +/* + * Exclusion + * + * ad * as * B(s/as, d/ad) + * = ad * as * (d/ad + s/as - 2 * d/ad * s/as) + * = as * d + ad * s - 2 * s * d + */ + +/* This can be made faster by writing it directly and not using + * PDF_SEPARABLE_BLEND_MODE, but that's a performance optimization */ + +static inline uint32_t +blend_exclusion (uint32_t d, uint32_t ad, uint32_t s, uint32_t as) +{ + return DIV_ONE_UN8 (s * ad + d * as - 2 * d * s); +} + +PDF_SEPARABLE_BLEND_MODE (exclusion) + +#undef PDF_SEPARABLE_BLEND_MODE + +/* + * PDF nonseperable blend modes are implemented using the following functions + * to operate in Hsl space, with Cmax, Cmid, Cmin referring to the max, mid + * and min value of the red, green and blue components. + * + * LUM (C) = 0.3 × Cred + 0.59 × Cgreen + 0.11 × Cblue + * + * clip_color (C): + * l = LUM (C) + * min = Cmin + * max = Cmax + * if n < 0.0 + * C = l + (((C – l) × l) ⁄ (l – min)) + * if x > 1.0 + * C = l + (((C – l) × (1 – l) ) ⁄ (max – l)) + * return C + * + * set_lum (C, l): + * d = l – LUM (C) + * C += d + * return clip_color (C) + * + * SAT (C) = CH_MAX (C) - CH_MIN (C) + * + * set_sat (C, s): + * if Cmax > Cmin + * Cmid = ( ( ( Cmid – Cmin ) × s ) ⁄ ( Cmax – Cmin ) ) + * Cmax = s + * else + * Cmid = Cmax = 0.0 + * Cmin = 0.0 + * return C + */ + +/* For premultiplied colors, we need to know what happens when C is + * multiplied by a real number. LUM and SAT are linear: + * + * LUM (r × C) = r × LUM (C) SAT (r * C) = r * SAT (C) + * + * If we extend clip_color with an extra argument a and change + * + * if x >= 1.0 + * + * into + * + * if x >= a + * + * then clip_color is also linear: + * + * r * clip_color (C, a) = clip_color (r * C, r * a); + * + * for positive r. + * + * Similarly, we can extend set_lum with an extra argument that is just passed + * on to clip_color: + * + * r * set_lum (C, l, a) + * + * = r × clip_color (C + l - LUM (C), a) + * + * = clip_color (r * C + r × l - r * LUM (C), r * a) + * + * = set_lum (r * C, r * l, r * a) + * + * Finally, set_sat: + * + * r * set_sat (C, s) = set_sat (x * C, r * s) + * + * The above holds for all non-zero x, because the x'es in the fraction for + * C_mid cancel out. Specifically, it holds for x = r: + * + * r * set_sat (C, s) = set_sat (r * C, r * s) + * + */ + +#define CH_MIN(c) (c[0] < c[1] ? (c[0] < c[2] ? c[0] : c[2]) : (c[1] < c[2] ? c[1] : c[2])) +#define CH_MAX(c) (c[0] > c[1] ? (c[0] > c[2] ? c[0] : c[2]) : (c[1] > c[2] ? c[1] : c[2])) +#define LUM(c) ((c[0] * 30 + c[1] * 59 + c[2] * 11) / 100) +#define SAT(c) (CH_MAX (c) - CH_MIN (c)) + +#define PDF_NON_SEPARABLE_BLEND_MODE(name) \ + static void \ + combine_ ## name ## _u (pixman_implementation_t *imp, \ + pixman_op_t op, \ + uint32_t * dest, \ + const uint32_t * src, \ + const uint32_t * mask, \ + int width) \ + { \ + int i; \ + for (i = 0; i < width; ++i) \ + { \ + uint32_t s = combine_mask (src, mask, i); \ + uint32_t d = *(dest + i); \ + uint8_t sa = ALPHA_8 (s); \ + uint8_t isa = ~sa; \ + uint8_t da = ALPHA_8 (d); \ + uint8_t ida = ~da; \ + uint32_t result; \ + uint32_t sc[3], dc[3], c[3]; \ + \ + result = d; \ + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (result, isa, s, ida); \ + dc[0] = RED_8 (d); \ + sc[0] = RED_8 (s); \ + dc[1] = GREEN_8 (d); \ + sc[1] = GREEN_8 (s); \ + dc[2] = BLUE_8 (d); \ + sc[2] = BLUE_8 (s); \ + blend_ ## name (c, dc, da, sc, sa); \ + \ + *(dest + i) = result + \ + (DIV_ONE_UN8 (sa * (uint32_t)da) << A_SHIFT) + \ + (DIV_ONE_UN8 (c[0]) << R_SHIFT) + \ + (DIV_ONE_UN8 (c[1]) << G_SHIFT) + \ + (DIV_ONE_UN8 (c[2])); \ + } \ + } + +static void +set_lum (uint32_t dest[3], uint32_t src[3], uint32_t sa, uint32_t lum) +{ + double a, l, min, max; + double tmp[3]; + + a = sa * (1.0 / MASK); + + l = lum * (1.0 / MASK); + tmp[0] = src[0] * (1.0 / MASK); + tmp[1] = src[1] * (1.0 / MASK); + tmp[2] = src[2] * (1.0 / MASK); + + l = l - LUM (tmp); + tmp[0] += l; + tmp[1] += l; + tmp[2] += l; + + /* clip_color */ + l = LUM (tmp); + min = CH_MIN (tmp); + max = CH_MAX (tmp); + + if (min < 0) + { + if (l - min == 0.0) + { + tmp[0] = 0; + tmp[1] = 0; + tmp[2] = 0; + } + else + { + tmp[0] = l + (tmp[0] - l) * l / (l - min); + tmp[1] = l + (tmp[1] - l) * l / (l - min); + tmp[2] = l + (tmp[2] - l) * l / (l - min); + } + } + if (max > a) + { + if (max - l == 0.0) + { + tmp[0] = a; + tmp[1] = a; + tmp[2] = a; + } + else + { + tmp[0] = l + (tmp[0] - l) * (a - l) / (max - l); + tmp[1] = l + (tmp[1] - l) * (a - l) / (max - l); + tmp[2] = l + (tmp[2] - l) * (a - l) / (max - l); + } + } + + dest[0] = tmp[0] * MASK + 0.5; + dest[1] = tmp[1] * MASK + 0.5; + dest[2] = tmp[2] * MASK + 0.5; +} + +static void +set_sat (uint32_t dest[3], uint32_t src[3], uint32_t sat) +{ + int id[3]; + uint32_t min, max; + + if (src[0] > src[1]) + { + if (src[0] > src[2]) + { + id[0] = 0; + if (src[1] > src[2]) + { + id[1] = 1; + id[2] = 2; + } + else + { + id[1] = 2; + id[2] = 1; + } + } + else + { + id[0] = 2; + id[1] = 0; + id[2] = 1; + } + } + else + { + if (src[0] > src[2]) + { + id[0] = 1; + id[1] = 0; + id[2] = 2; + } + else + { + id[2] = 0; + if (src[1] > src[2]) + { + id[0] = 1; + id[1] = 2; + } + else + { + id[0] = 2; + id[1] = 1; + } + } + } + + max = dest[id[0]]; + min = dest[id[2]]; + if (max > min) + { + dest[id[1]] = (dest[id[1]] - min) * sat / (max - min); + dest[id[0]] = sat; + dest[id[2]] = 0; + } + else + { + dest[0] = dest[1] = dest[2] = 0; + } +} + +/* Hue: + * + * as * ad * B(s/as, d/as) + * = as * ad * set_lum (set_sat (s/as, SAT (d/ad)), LUM (d/ad), 1) + * = set_lum (set_sat (ad * s, as * SAT (d)), as * LUM (d), as * ad) + * + */ +static inline void +blend_hsl_hue (uint32_t r[3], + uint32_t d[3], + uint32_t ad, + uint32_t s[3], + uint32_t as) +{ + r[0] = s[0] * ad; + r[1] = s[1] * ad; + r[2] = s[2] * ad; + set_sat (r, r, SAT (d) * as); + set_lum (r, r, as * ad, LUM (d) * as); +} + +PDF_NON_SEPARABLE_BLEND_MODE (hsl_hue) + +/* + * Saturation + * + * as * ad * B(s/as, d/ad) + * = as * ad * set_lum (set_sat (d/ad, SAT (s/as)), LUM (d/ad), 1) + * = set_lum (as * ad * set_sat (d/ad, SAT (s/as)), + * as * LUM (d), as * ad) + * = set_lum (set_sat (as * d, ad * SAT (s), as * LUM (d), as * ad)) + */ +static inline void +blend_hsl_saturation (uint32_t r[3], + uint32_t d[3], + uint32_t ad, + uint32_t s[3], + uint32_t as) +{ + r[0] = d[0] * as; + r[1] = d[1] * as; + r[2] = d[2] * as; + set_sat (r, r, SAT (s) * ad); + set_lum (r, r, as * ad, LUM (d) * as); +} + +PDF_NON_SEPARABLE_BLEND_MODE (hsl_saturation) + +/* + * Color + * + * as * ad * B(s/as, d/as) + * = as * ad * set_lum (s/as, LUM (d/ad), 1) + * = set_lum (s * ad, as * LUM (d), as * ad) + */ +static inline void +blend_hsl_color (uint32_t r[3], + uint32_t d[3], + uint32_t ad, + uint32_t s[3], + uint32_t as) +{ + r[0] = s[0] * ad; + r[1] = s[1] * ad; + r[2] = s[2] * ad; + set_lum (r, r, as * ad, LUM (d) * as); +} + +PDF_NON_SEPARABLE_BLEND_MODE (hsl_color) + +/* + * Luminosity + * + * as * ad * B(s/as, d/ad) + * = as * ad * set_lum (d/ad, LUM (s/as), 1) + * = set_lum (as * d, ad * LUM (s), as * ad) + */ +static inline void +blend_hsl_luminosity (uint32_t r[3], + uint32_t d[3], + uint32_t ad, + uint32_t s[3], + uint32_t as) +{ + r[0] = d[0] * as; + r[1] = d[1] * as; + r[2] = d[2] * as; + set_lum (r, r, as * ad, LUM (s) * ad); +} + +PDF_NON_SEPARABLE_BLEND_MODE (hsl_luminosity) + +#undef SAT +#undef LUM +#undef CH_MAX +#undef CH_MIN +#undef PDF_NON_SEPARABLE_BLEND_MODE + +/* All of the disjoint/conjoint composing functions + * + * The four entries in the first column indicate what source contributions + * come from each of the four areas of the picture -- areas covered by neither + * A nor B, areas covered only by A, areas covered only by B and finally + * areas covered by both A and B. + * + * Disjoint Conjoint + * Fa Fb Fa Fb + * (0,0,0,0) 0 0 0 0 + * (0,A,0,A) 1 0 1 0 + * (0,0,B,B) 0 1 0 1 + * (0,A,B,A) 1 min((1-a)/b,1) 1 max(1-a/b,0) + * (0,A,B,B) min((1-b)/a,1) 1 max(1-b/a,0) 1 + * (0,0,0,A) max(1-(1-b)/a,0) 0 min(1,b/a) 0 + * (0,0,0,B) 0 max(1-(1-a)/b,0) 0 min(a/b,1) + * (0,A,0,0) min(1,(1-b)/a) 0 max(1-b/a,0) 0 + * (0,0,B,0) 0 min(1,(1-a)/b) 0 max(1-a/b,0) + * (0,0,B,A) max(1-(1-b)/a,0) min(1,(1-a)/b) min(1,b/a) max(1-a/b,0) + * (0,A,0,B) min(1,(1-b)/a) max(1-(1-a)/b,0) max(1-b/a,0) min(1,a/b) + * (0,A,B,0) min(1,(1-b)/a) min(1,(1-a)/b) max(1-b/a,0) max(1-a/b,0) + * + * See http://marc.info/?l=xfree-render&m=99792000027857&w=2 for more + * information about these operators. + */ + +#define COMBINE_A_OUT 1 +#define COMBINE_A_IN 2 +#define COMBINE_B_OUT 4 +#define COMBINE_B_IN 8 + +#define COMBINE_CLEAR 0 +#define COMBINE_A (COMBINE_A_OUT | COMBINE_A_IN) +#define COMBINE_B (COMBINE_B_OUT | COMBINE_B_IN) +#define COMBINE_A_OVER (COMBINE_A_OUT | COMBINE_B_OUT | COMBINE_A_IN) +#define COMBINE_B_OVER (COMBINE_A_OUT | COMBINE_B_OUT | COMBINE_B_IN) +#define COMBINE_A_ATOP (COMBINE_B_OUT | COMBINE_A_IN) +#define COMBINE_B_ATOP (COMBINE_A_OUT | COMBINE_B_IN) +#define COMBINE_XOR (COMBINE_A_OUT | COMBINE_B_OUT) + +/* portion covered by a but not b */ +static uint8_t +combine_disjoint_out_part (uint8_t a, uint8_t b) +{ + /* min (1, (1-b) / a) */ + + b = ~b; /* 1 - b */ + if (b >= a) /* 1 - b >= a -> (1-b)/a >= 1 */ + return MASK; /* 1 */ + return DIV_UN8 (b, a); /* (1-b) / a */ +} + +/* portion covered by both a and b */ +static uint8_t +combine_disjoint_in_part (uint8_t a, uint8_t b) +{ + /* max (1-(1-b)/a,0) */ + /* = - min ((1-b)/a - 1, 0) */ + /* = 1 - min (1, (1-b)/a) */ + + b = ~b; /* 1 - b */ + if (b >= a) /* 1 - b >= a -> (1-b)/a >= 1 */ + return 0; /* 1 - 1 */ + return ~DIV_UN8(b, a); /* 1 - (1-b) / a */ +} + +/* portion covered by a but not b */ +static uint8_t +combine_conjoint_out_part (uint8_t a, uint8_t b) +{ + /* max (1-b/a,0) */ + /* = 1-min(b/a,1) */ + + /* min (1, (1-b) / a) */ + + if (b >= a) /* b >= a -> b/a >= 1 */ + return 0x00; /* 0 */ + return ~DIV_UN8(b, a); /* 1 - b/a */ +} + +/* portion covered by both a and b */ +static uint8_t +combine_conjoint_in_part (uint8_t a, uint8_t b) +{ + /* min (1,b/a) */ + + if (b >= a) /* b >= a -> b/a >= 1 */ + return MASK; /* 1 */ + return DIV_UN8 (b, a); /* b/a */ +} + +#define GET_COMP(v, i) ((uint16_t) (uint8_t) ((v) >> i)) + +#define ADD(x, y, i, t) \ + ((t) = GET_COMP (x, i) + GET_COMP (y, i), \ + (uint32_t) ((uint8_t) ((t) | (0 - ((t) >> G_SHIFT)))) << (i)) + +#define GENERIC(x, y, i, ax, ay, t, u, v) \ + ((t) = (MUL_UN8 (GET_COMP (y, i), ay, (u)) + \ + MUL_UN8 (GET_COMP (x, i), ax, (v))), \ + (uint32_t) ((uint8_t) ((t) | \ + (0 - ((t) >> G_SHIFT)))) << (i)) + +static void +combine_disjoint_general_u (uint32_t * dest, + const uint32_t *src, + const uint32_t *mask, + int width, + uint8_t combine) +{ + int i; + + for (i = 0; i < width; ++i) + { + uint32_t s = combine_mask (src, mask, i); + uint32_t d = *(dest + i); + uint32_t m, n, o, p; + uint16_t Fa, Fb, t, u, v; + uint8_t sa = s >> A_SHIFT; + uint8_t da = d >> A_SHIFT; + + switch (combine & COMBINE_A) + { + default: + Fa = 0; + break; + + case COMBINE_A_OUT: + Fa = combine_disjoint_out_part (sa, da); + break; + + case COMBINE_A_IN: + Fa = combine_disjoint_in_part (sa, da); + break; + + case COMBINE_A: + Fa = MASK; + break; + } + + switch (combine & COMBINE_B) + { + default: + Fb = 0; + break; + + case COMBINE_B_OUT: + Fb = combine_disjoint_out_part (da, sa); + break; + + case COMBINE_B_IN: + Fb = combine_disjoint_in_part (da, sa); + break; + + case COMBINE_B: + Fb = MASK; + break; + } + m = GENERIC (s, d, 0, Fa, Fb, t, u, v); + n = GENERIC (s, d, G_SHIFT, Fa, Fb, t, u, v); + o = GENERIC (s, d, R_SHIFT, Fa, Fb, t, u, v); + p = GENERIC (s, d, A_SHIFT, Fa, Fb, t, u, v); + s = m | n | o | p; + *(dest + i) = s; + } +} + +static void +combine_disjoint_over_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + uint32_t s = combine_mask (src, mask, i); + uint16_t a = s >> A_SHIFT; + + if (s != 0x00) + { + uint32_t d = *(dest + i); + a = combine_disjoint_out_part (d >> A_SHIFT, a); + UN8x4_MUL_UN8_ADD_UN8x4 (d, a, s); + + *(dest + i) = d; + } + } +} + +static void +combine_disjoint_in_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + combine_disjoint_general_u (dest, src, mask, width, COMBINE_A_IN); +} + +static void +combine_disjoint_in_reverse_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + combine_disjoint_general_u (dest, src, mask, width, COMBINE_B_IN); +} + +static void +combine_disjoint_out_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + combine_disjoint_general_u (dest, src, mask, width, COMBINE_A_OUT); +} + +static void +combine_disjoint_out_reverse_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + combine_disjoint_general_u (dest, src, mask, width, COMBINE_B_OUT); +} + +static void +combine_disjoint_atop_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + combine_disjoint_general_u (dest, src, mask, width, COMBINE_A_ATOP); +} + +static void +combine_disjoint_atop_reverse_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + combine_disjoint_general_u (dest, src, mask, width, COMBINE_B_ATOP); +} + +static void +combine_disjoint_xor_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + combine_disjoint_general_u (dest, src, mask, width, COMBINE_XOR); +} + +static void +combine_conjoint_general_u (uint32_t * dest, + const uint32_t *src, + const uint32_t *mask, + int width, + uint8_t combine) +{ + int i; + + for (i = 0; i < width; ++i) + { + uint32_t s = combine_mask (src, mask, i); + uint32_t d = *(dest + i); + uint32_t m, n, o, p; + uint16_t Fa, Fb, t, u, v; + uint8_t sa = s >> A_SHIFT; + uint8_t da = d >> A_SHIFT; + + switch (combine & COMBINE_A) + { + default: + Fa = 0; + break; + + case COMBINE_A_OUT: + Fa = combine_conjoint_out_part (sa, da); + break; + + case COMBINE_A_IN: + Fa = combine_conjoint_in_part (sa, da); + break; + + case COMBINE_A: + Fa = MASK; + break; + } + + switch (combine & COMBINE_B) + { + default: + Fb = 0; + break; + + case COMBINE_B_OUT: + Fb = combine_conjoint_out_part (da, sa); + break; + + case COMBINE_B_IN: + Fb = combine_conjoint_in_part (da, sa); + break; + + case COMBINE_B: + Fb = MASK; + break; + } + + m = GENERIC (s, d, 0, Fa, Fb, t, u, v); + n = GENERIC (s, d, G_SHIFT, Fa, Fb, t, u, v); + o = GENERIC (s, d, R_SHIFT, Fa, Fb, t, u, v); + p = GENERIC (s, d, A_SHIFT, Fa, Fb, t, u, v); + + s = m | n | o | p; + + *(dest + i) = s; + } +} + +static void +combine_conjoint_over_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_OVER); +} + +static void +combine_conjoint_over_reverse_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_OVER); +} + +static void +combine_conjoint_in_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_IN); +} + +static void +combine_conjoint_in_reverse_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_IN); +} + +static void +combine_conjoint_out_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_OUT); +} + +static void +combine_conjoint_out_reverse_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_OUT); +} + +static void +combine_conjoint_atop_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_ATOP); +} + +static void +combine_conjoint_atop_reverse_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_ATOP); +} + +static void +combine_conjoint_xor_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + combine_conjoint_general_u (dest, src, mask, width, COMBINE_XOR); +} + + +/* Component alpha combiners */ + +static void +combine_clear_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + memset (dest, 0, width * sizeof(uint32_t)); +} + +static void +combine_src_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + uint32_t s = *(src + i); + uint32_t m = *(mask + i); + + combine_mask_value_ca (&s, &m); + + *(dest + i) = s; + } +} + +static void +combine_over_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + uint32_t s = *(src + i); + uint32_t m = *(mask + i); + uint32_t a; + + combine_mask_ca (&s, &m); + + a = ~m; + if (a) + { + uint32_t d = *(dest + i); + UN8x4_MUL_UN8x4_ADD_UN8x4 (d, a, s); + s = d; + } + + *(dest + i) = s; + } +} + +static void +combine_over_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + uint32_t d = *(dest + i); + uint32_t a = ~d >> A_SHIFT; + + if (a) + { + uint32_t s = *(src + i); + uint32_t m = *(mask + i); + + UN8x4_MUL_UN8x4 (s, m); + UN8x4_MUL_UN8_ADD_UN8x4 (s, a, d); + + *(dest + i) = s; + } + } +} + +static void +combine_in_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + uint32_t d = *(dest + i); + uint16_t a = d >> A_SHIFT; + uint32_t s = 0; + + if (a) + { + uint32_t m = *(mask + i); + + s = *(src + i); + combine_mask_value_ca (&s, &m); + + if (a != MASK) + UN8x4_MUL_UN8 (s, a); + } + + *(dest + i) = s; + } +} + +static void +combine_in_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + uint32_t s = *(src + i); + uint32_t m = *(mask + i); + uint32_t a; + + combine_mask_alpha_ca (&s, &m); + + a = m; + if (a != ~0) + { + uint32_t d = 0; + + if (a) + { + d = *(dest + i); + UN8x4_MUL_UN8x4 (d, a); + } + + *(dest + i) = d; + } + } +} + +static void +combine_out_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + uint32_t d = *(dest + i); + uint16_t a = ~d >> A_SHIFT; + uint32_t s = 0; + + if (a) + { + uint32_t m = *(mask + i); + + s = *(src + i); + combine_mask_value_ca (&s, &m); + + if (a != MASK) + UN8x4_MUL_UN8 (s, a); + } + + *(dest + i) = s; + } +} + +static void +combine_out_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + uint32_t s = *(src + i); + uint32_t m = *(mask + i); + uint32_t a; + + combine_mask_alpha_ca (&s, &m); + + a = ~m; + if (a != ~0) + { + uint32_t d = 0; + + if (a) + { + d = *(dest + i); + UN8x4_MUL_UN8x4 (d, a); + } + + *(dest + i) = d; + } + } +} + +static void +combine_atop_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + uint32_t d = *(dest + i); + uint32_t s = *(src + i); + uint32_t m = *(mask + i); + uint32_t ad; + uint16_t as = d >> A_SHIFT; + + combine_mask_ca (&s, &m); + + ad = ~m; + + UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ad, s, as); + + *(dest + i) = d; + } +} + +static void +combine_atop_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + uint32_t d = *(dest + i); + uint32_t s = *(src + i); + uint32_t m = *(mask + i); + uint32_t ad; + uint16_t as = ~d >> A_SHIFT; + + combine_mask_ca (&s, &m); + + ad = m; + + UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ad, s, as); + + *(dest + i) = d; + } +} + +static void +combine_xor_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + uint32_t d = *(dest + i); + uint32_t s = *(src + i); + uint32_t m = *(mask + i); + uint32_t ad; + uint16_t as = ~d >> A_SHIFT; + + combine_mask_ca (&s, &m); + + ad = ~m; + + UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ad, s, as); + + *(dest + i) = d; + } +} + +static void +combine_add_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + uint32_t s = *(src + i); + uint32_t m = *(mask + i); + uint32_t d = *(dest + i); + + combine_mask_value_ca (&s, &m); + + UN8x4_ADD_UN8x4 (d, s); + + *(dest + i) = d; + } +} + +static void +combine_saturate_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + uint32_t s, d; + uint16_t sa, sr, sg, sb, da; + uint16_t t, u, v; + uint32_t m, n, o, p; + + d = *(dest + i); + s = *(src + i); + m = *(mask + i); + + combine_mask_ca (&s, &m); + + sa = (m >> A_SHIFT); + sr = (m >> R_SHIFT) & MASK; + sg = (m >> G_SHIFT) & MASK; + sb = m & MASK; + da = ~d >> A_SHIFT; + + if (sb <= da) + m = ADD (s, d, 0, t); + else + m = GENERIC (s, d, 0, (da << G_SHIFT) / sb, MASK, t, u, v); + + if (sg <= da) + n = ADD (s, d, G_SHIFT, t); + else + n = GENERIC (s, d, G_SHIFT, (da << G_SHIFT) / sg, MASK, t, u, v); + + if (sr <= da) + o = ADD (s, d, R_SHIFT, t); + else + o = GENERIC (s, d, R_SHIFT, (da << G_SHIFT) / sr, MASK, t, u, v); + + if (sa <= da) + p = ADD (s, d, A_SHIFT, t); + else + p = GENERIC (s, d, A_SHIFT, (da << G_SHIFT) / sa, MASK, t, u, v); + + *(dest + i) = m | n | o | p; + } +} + +static void +combine_disjoint_general_ca (uint32_t * dest, + const uint32_t *src, + const uint32_t *mask, + int width, + uint8_t combine) +{ + int i; + + for (i = 0; i < width; ++i) + { + uint32_t s, d; + uint32_t m, n, o, p; + uint32_t Fa, Fb; + uint16_t t, u, v; + uint32_t sa; + uint8_t da; + + s = *(src + i); + m = *(mask + i); + d = *(dest + i); + da = d >> A_SHIFT; + + combine_mask_ca (&s, &m); + + sa = m; + + switch (combine & COMBINE_A) + { + default: + Fa = 0; + break; + + case COMBINE_A_OUT: + m = (uint32_t)combine_disjoint_out_part ((uint8_t) (sa >> 0), da); + n = (uint32_t)combine_disjoint_out_part ((uint8_t) (sa >> G_SHIFT), da) << G_SHIFT; + o = (uint32_t)combine_disjoint_out_part ((uint8_t) (sa >> R_SHIFT), da) << R_SHIFT; + p = (uint32_t)combine_disjoint_out_part ((uint8_t) (sa >> A_SHIFT), da) << A_SHIFT; + Fa = m | n | o | p; + break; + + case COMBINE_A_IN: + m = (uint32_t)combine_disjoint_in_part ((uint8_t) (sa >> 0), da); + n = (uint32_t)combine_disjoint_in_part ((uint8_t) (sa >> G_SHIFT), da) << G_SHIFT; + o = (uint32_t)combine_disjoint_in_part ((uint8_t) (sa >> R_SHIFT), da) << R_SHIFT; + p = (uint32_t)combine_disjoint_in_part ((uint8_t) (sa >> A_SHIFT), da) << A_SHIFT; + Fa = m | n | o | p; + break; + + case COMBINE_A: + Fa = ~0; + break; + } + + switch (combine & COMBINE_B) + { + default: + Fb = 0; + break; + + case COMBINE_B_OUT: + m = (uint32_t)combine_disjoint_out_part (da, (uint8_t) (sa >> 0)); + n = (uint32_t)combine_disjoint_out_part (da, (uint8_t) (sa >> G_SHIFT)) << G_SHIFT; + o = (uint32_t)combine_disjoint_out_part (da, (uint8_t) (sa >> R_SHIFT)) << R_SHIFT; + p = (uint32_t)combine_disjoint_out_part (da, (uint8_t) (sa >> A_SHIFT)) << A_SHIFT; + Fb = m | n | o | p; + break; + + case COMBINE_B_IN: + m = (uint32_t)combine_disjoint_in_part (da, (uint8_t) (sa >> 0)); + n = (uint32_t)combine_disjoint_in_part (da, (uint8_t) (sa >> G_SHIFT)) << G_SHIFT; + o = (uint32_t)combine_disjoint_in_part (da, (uint8_t) (sa >> R_SHIFT)) << R_SHIFT; + p = (uint32_t)combine_disjoint_in_part (da, (uint8_t) (sa >> A_SHIFT)) << A_SHIFT; + Fb = m | n | o | p; + break; + + case COMBINE_B: + Fb = ~0; + break; + } + m = GENERIC (s, d, 0, GET_COMP (Fa, 0), GET_COMP (Fb, 0), t, u, v); + n = GENERIC (s, d, G_SHIFT, GET_COMP (Fa, G_SHIFT), GET_COMP (Fb, G_SHIFT), t, u, v); + o = GENERIC (s, d, R_SHIFT, GET_COMP (Fa, R_SHIFT), GET_COMP (Fb, R_SHIFT), t, u, v); + p = GENERIC (s, d, A_SHIFT, GET_COMP (Fa, A_SHIFT), GET_COMP (Fb, A_SHIFT), t, u, v); + + s = m | n | o | p; + + *(dest + i) = s; + } +} + +static void +combine_disjoint_over_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_OVER); +} + +static void +combine_disjoint_in_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_IN); +} + +static void +combine_disjoint_in_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + combine_disjoint_general_ca (dest, src, mask, width, COMBINE_B_IN); +} + +static void +combine_disjoint_out_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_OUT); +} + +static void +combine_disjoint_out_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + combine_disjoint_general_ca (dest, src, mask, width, COMBINE_B_OUT); +} + +static void +combine_disjoint_atop_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_ATOP); +} + +static void +combine_disjoint_atop_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + combine_disjoint_general_ca (dest, src, mask, width, COMBINE_B_ATOP); +} + +static void +combine_disjoint_xor_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + combine_disjoint_general_ca (dest, src, mask, width, COMBINE_XOR); +} + +static void +combine_conjoint_general_ca (uint32_t * dest, + const uint32_t *src, + const uint32_t *mask, + int width, + uint8_t combine) +{ + int i; + + for (i = 0; i < width; ++i) + { + uint32_t s, d; + uint32_t m, n, o, p; + uint32_t Fa, Fb; + uint16_t t, u, v; + uint32_t sa; + uint8_t da; + + s = *(src + i); + m = *(mask + i); + d = *(dest + i); + da = d >> A_SHIFT; + + combine_mask_ca (&s, &m); + + sa = m; + + switch (combine & COMBINE_A) + { + default: + Fa = 0; + break; + + case COMBINE_A_OUT: + m = (uint32_t)combine_conjoint_out_part ((uint8_t) (sa >> 0), da); + n = (uint32_t)combine_conjoint_out_part ((uint8_t) (sa >> G_SHIFT), da) << G_SHIFT; + o = (uint32_t)combine_conjoint_out_part ((uint8_t) (sa >> R_SHIFT), da) << R_SHIFT; + p = (uint32_t)combine_conjoint_out_part ((uint8_t) (sa >> A_SHIFT), da) << A_SHIFT; + Fa = m | n | o | p; + break; + + case COMBINE_A_IN: + m = (uint32_t)combine_conjoint_in_part ((uint8_t) (sa >> 0), da); + n = (uint32_t)combine_conjoint_in_part ((uint8_t) (sa >> G_SHIFT), da) << G_SHIFT; + o = (uint32_t)combine_conjoint_in_part ((uint8_t) (sa >> R_SHIFT), da) << R_SHIFT; + p = (uint32_t)combine_conjoint_in_part ((uint8_t) (sa >> A_SHIFT), da) << A_SHIFT; + Fa = m | n | o | p; + break; + + case COMBINE_A: + Fa = ~0; + break; + } + + switch (combine & COMBINE_B) + { + default: + Fb = 0; + break; + + case COMBINE_B_OUT: + m = (uint32_t)combine_conjoint_out_part (da, (uint8_t) (sa >> 0)); + n = (uint32_t)combine_conjoint_out_part (da, (uint8_t) (sa >> G_SHIFT)) << G_SHIFT; + o = (uint32_t)combine_conjoint_out_part (da, (uint8_t) (sa >> R_SHIFT)) << R_SHIFT; + p = (uint32_t)combine_conjoint_out_part (da, (uint8_t) (sa >> A_SHIFT)) << A_SHIFT; + Fb = m | n | o | p; + break; + + case COMBINE_B_IN: + m = (uint32_t)combine_conjoint_in_part (da, (uint8_t) (sa >> 0)); + n = (uint32_t)combine_conjoint_in_part (da, (uint8_t) (sa >> G_SHIFT)) << G_SHIFT; + o = (uint32_t)combine_conjoint_in_part (da, (uint8_t) (sa >> R_SHIFT)) << R_SHIFT; + p = (uint32_t)combine_conjoint_in_part (da, (uint8_t) (sa >> A_SHIFT)) << A_SHIFT; + Fb = m | n | o | p; + break; + + case COMBINE_B: + Fb = ~0; + break; + } + m = GENERIC (s, d, 0, GET_COMP (Fa, 0), GET_COMP (Fb, 0), t, u, v); + n = GENERIC (s, d, G_SHIFT, GET_COMP (Fa, G_SHIFT), GET_COMP (Fb, G_SHIFT), t, u, v); + o = GENERIC (s, d, R_SHIFT, GET_COMP (Fa, R_SHIFT), GET_COMP (Fb, R_SHIFT), t, u, v); + p = GENERIC (s, d, A_SHIFT, GET_COMP (Fa, A_SHIFT), GET_COMP (Fb, A_SHIFT), t, u, v); + + s = m | n | o | p; + + *(dest + i) = s; + } +} + +static void +combine_conjoint_over_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_OVER); +} + +static void +combine_conjoint_over_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_OVER); +} + +static void +combine_conjoint_in_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_IN); +} + +static void +combine_conjoint_in_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_IN); +} + +static void +combine_conjoint_out_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_OUT); +} + +static void +combine_conjoint_out_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_OUT); +} + +static void +combine_conjoint_atop_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_ATOP); +} + +static void +combine_conjoint_atop_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_ATOP); +} + +static void +combine_conjoint_xor_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + combine_conjoint_general_ca (dest, src, mask, width, COMBINE_XOR); +} + +void +_pixman_setup_combiner_functions_32 (pixman_implementation_t *imp) +{ + /* Unified alpha */ + imp->combine_32[PIXMAN_OP_CLEAR] = combine_clear; + imp->combine_32[PIXMAN_OP_SRC] = combine_src_u; + imp->combine_32[PIXMAN_OP_DST] = combine_dst; + imp->combine_32[PIXMAN_OP_OVER] = combine_over_u; + imp->combine_32[PIXMAN_OP_OVER_REVERSE] = combine_over_reverse_u; + imp->combine_32[PIXMAN_OP_IN] = combine_in_u; + imp->combine_32[PIXMAN_OP_IN_REVERSE] = combine_in_reverse_u; + imp->combine_32[PIXMAN_OP_OUT] = combine_out_u; + imp->combine_32[PIXMAN_OP_OUT_REVERSE] = combine_out_reverse_u; + imp->combine_32[PIXMAN_OP_ATOP] = combine_atop_u; + imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = combine_atop_reverse_u; + imp->combine_32[PIXMAN_OP_XOR] = combine_xor_u; + imp->combine_32[PIXMAN_OP_ADD] = combine_add_u; + imp->combine_32[PIXMAN_OP_SATURATE] = combine_saturate_u; + + /* Disjoint, unified */ + imp->combine_32[PIXMAN_OP_DISJOINT_CLEAR] = combine_clear; + imp->combine_32[PIXMAN_OP_DISJOINT_SRC] = combine_src_u; + imp->combine_32[PIXMAN_OP_DISJOINT_DST] = combine_dst; + imp->combine_32[PIXMAN_OP_DISJOINT_OVER] = combine_disjoint_over_u; + imp->combine_32[PIXMAN_OP_DISJOINT_OVER_REVERSE] = combine_saturate_u; + imp->combine_32[PIXMAN_OP_DISJOINT_IN] = combine_disjoint_in_u; + imp->combine_32[PIXMAN_OP_DISJOINT_IN_REVERSE] = combine_disjoint_in_reverse_u; + imp->combine_32[PIXMAN_OP_DISJOINT_OUT] = combine_disjoint_out_u; + imp->combine_32[PIXMAN_OP_DISJOINT_OUT_REVERSE] = combine_disjoint_out_reverse_u; + imp->combine_32[PIXMAN_OP_DISJOINT_ATOP] = combine_disjoint_atop_u; + imp->combine_32[PIXMAN_OP_DISJOINT_ATOP_REVERSE] = combine_disjoint_atop_reverse_u; + imp->combine_32[PIXMAN_OP_DISJOINT_XOR] = combine_disjoint_xor_u; + + /* Conjoint, unified */ + imp->combine_32[PIXMAN_OP_CONJOINT_CLEAR] = combine_clear; + imp->combine_32[PIXMAN_OP_CONJOINT_SRC] = combine_src_u; + imp->combine_32[PIXMAN_OP_CONJOINT_DST] = combine_dst; + imp->combine_32[PIXMAN_OP_CONJOINT_OVER] = combine_conjoint_over_u; + imp->combine_32[PIXMAN_OP_CONJOINT_OVER_REVERSE] = combine_conjoint_over_reverse_u; + imp->combine_32[PIXMAN_OP_CONJOINT_IN] = combine_conjoint_in_u; + imp->combine_32[PIXMAN_OP_CONJOINT_IN_REVERSE] = combine_conjoint_in_reverse_u; + imp->combine_32[PIXMAN_OP_CONJOINT_OUT] = combine_conjoint_out_u; + imp->combine_32[PIXMAN_OP_CONJOINT_OUT_REVERSE] = combine_conjoint_out_reverse_u; + imp->combine_32[PIXMAN_OP_CONJOINT_ATOP] = combine_conjoint_atop_u; + imp->combine_32[PIXMAN_OP_CONJOINT_ATOP_REVERSE] = combine_conjoint_atop_reverse_u; + imp->combine_32[PIXMAN_OP_CONJOINT_XOR] = combine_conjoint_xor_u; + + imp->combine_32[PIXMAN_OP_MULTIPLY] = combine_multiply_u; + imp->combine_32[PIXMAN_OP_SCREEN] = combine_screen_u; + imp->combine_32[PIXMAN_OP_OVERLAY] = combine_overlay_u; + imp->combine_32[PIXMAN_OP_DARKEN] = combine_darken_u; + imp->combine_32[PIXMAN_OP_LIGHTEN] = combine_lighten_u; + imp->combine_32[PIXMAN_OP_COLOR_DODGE] = combine_color_dodge_u; + imp->combine_32[PIXMAN_OP_COLOR_BURN] = combine_color_burn_u; + imp->combine_32[PIXMAN_OP_HARD_LIGHT] = combine_hard_light_u; + imp->combine_32[PIXMAN_OP_SOFT_LIGHT] = combine_soft_light_u; + imp->combine_32[PIXMAN_OP_DIFFERENCE] = combine_difference_u; + imp->combine_32[PIXMAN_OP_EXCLUSION] = combine_exclusion_u; + imp->combine_32[PIXMAN_OP_HSL_HUE] = combine_hsl_hue_u; + imp->combine_32[PIXMAN_OP_HSL_SATURATION] = combine_hsl_saturation_u; + imp->combine_32[PIXMAN_OP_HSL_COLOR] = combine_hsl_color_u; + imp->combine_32[PIXMAN_OP_HSL_LUMINOSITY] = combine_hsl_luminosity_u; + + /* Component alpha combiners */ + imp->combine_32_ca[PIXMAN_OP_CLEAR] = combine_clear_ca; + imp->combine_32_ca[PIXMAN_OP_SRC] = combine_src_ca; + /* dest */ + imp->combine_32_ca[PIXMAN_OP_OVER] = combine_over_ca; + imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = combine_over_reverse_ca; + imp->combine_32_ca[PIXMAN_OP_IN] = combine_in_ca; + imp->combine_32_ca[PIXMAN_OP_IN_REVERSE] = combine_in_reverse_ca; + imp->combine_32_ca[PIXMAN_OP_OUT] = combine_out_ca; + imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = combine_out_reverse_ca; + imp->combine_32_ca[PIXMAN_OP_ATOP] = combine_atop_ca; + imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = combine_atop_reverse_ca; + imp->combine_32_ca[PIXMAN_OP_XOR] = combine_xor_ca; + imp->combine_32_ca[PIXMAN_OP_ADD] = combine_add_ca; + imp->combine_32_ca[PIXMAN_OP_SATURATE] = combine_saturate_ca; + + /* Disjoint CA */ + imp->combine_32_ca[PIXMAN_OP_DISJOINT_CLEAR] = combine_clear_ca; + imp->combine_32_ca[PIXMAN_OP_DISJOINT_SRC] = combine_src_ca; + imp->combine_32_ca[PIXMAN_OP_DISJOINT_DST] = combine_dst; + imp->combine_32_ca[PIXMAN_OP_DISJOINT_OVER] = combine_disjoint_over_ca; + imp->combine_32_ca[PIXMAN_OP_DISJOINT_OVER_REVERSE] = combine_saturate_ca; + imp->combine_32_ca[PIXMAN_OP_DISJOINT_IN] = combine_disjoint_in_ca; + imp->combine_32_ca[PIXMAN_OP_DISJOINT_IN_REVERSE] = combine_disjoint_in_reverse_ca; + imp->combine_32_ca[PIXMAN_OP_DISJOINT_OUT] = combine_disjoint_out_ca; + imp->combine_32_ca[PIXMAN_OP_DISJOINT_OUT_REVERSE] = combine_disjoint_out_reverse_ca; + imp->combine_32_ca[PIXMAN_OP_DISJOINT_ATOP] = combine_disjoint_atop_ca; + imp->combine_32_ca[PIXMAN_OP_DISJOINT_ATOP_REVERSE] = combine_disjoint_atop_reverse_ca; + imp->combine_32_ca[PIXMAN_OP_DISJOINT_XOR] = combine_disjoint_xor_ca; + + /* Conjoint CA */ + imp->combine_32_ca[PIXMAN_OP_CONJOINT_CLEAR] = combine_clear_ca; + imp->combine_32_ca[PIXMAN_OP_CONJOINT_SRC] = combine_src_ca; + imp->combine_32_ca[PIXMAN_OP_CONJOINT_DST] = combine_dst; + imp->combine_32_ca[PIXMAN_OP_CONJOINT_OVER] = combine_conjoint_over_ca; + imp->combine_32_ca[PIXMAN_OP_CONJOINT_OVER_REVERSE] = combine_conjoint_over_reverse_ca; + imp->combine_32_ca[PIXMAN_OP_CONJOINT_IN] = combine_conjoint_in_ca; + imp->combine_32_ca[PIXMAN_OP_CONJOINT_IN_REVERSE] = combine_conjoint_in_reverse_ca; + imp->combine_32_ca[PIXMAN_OP_CONJOINT_OUT] = combine_conjoint_out_ca; + imp->combine_32_ca[PIXMAN_OP_CONJOINT_OUT_REVERSE] = combine_conjoint_out_reverse_ca; + imp->combine_32_ca[PIXMAN_OP_CONJOINT_ATOP] = combine_conjoint_atop_ca; + imp->combine_32_ca[PIXMAN_OP_CONJOINT_ATOP_REVERSE] = combine_conjoint_atop_reverse_ca; + imp->combine_32_ca[PIXMAN_OP_CONJOINT_XOR] = combine_conjoint_xor_ca; + + imp->combine_32_ca[PIXMAN_OP_MULTIPLY] = combine_multiply_ca; + imp->combine_32_ca[PIXMAN_OP_SCREEN] = combine_screen_ca; + imp->combine_32_ca[PIXMAN_OP_OVERLAY] = combine_overlay_ca; + imp->combine_32_ca[PIXMAN_OP_DARKEN] = combine_darken_ca; + imp->combine_32_ca[PIXMAN_OP_LIGHTEN] = combine_lighten_ca; + imp->combine_32_ca[PIXMAN_OP_COLOR_DODGE] = combine_color_dodge_ca; + imp->combine_32_ca[PIXMAN_OP_COLOR_BURN] = combine_color_burn_ca; + imp->combine_32_ca[PIXMAN_OP_HARD_LIGHT] = combine_hard_light_ca; + imp->combine_32_ca[PIXMAN_OP_SOFT_LIGHT] = combine_soft_light_ca; + imp->combine_32_ca[PIXMAN_OP_DIFFERENCE] = combine_difference_ca; + imp->combine_32_ca[PIXMAN_OP_EXCLUSION] = combine_exclusion_ca; + + /* It is not clear that these make sense, so make them noops for now */ + imp->combine_32_ca[PIXMAN_OP_HSL_HUE] = combine_dst; + imp->combine_32_ca[PIXMAN_OP_HSL_SATURATION] = combine_dst; + imp->combine_32_ca[PIXMAN_OP_HSL_COLOR] = combine_dst; + imp->combine_32_ca[PIXMAN_OP_HSL_LUMINOSITY] = combine_dst; +} diff --git a/src/pixman/pixman/pixman-combine32.h b/src/pixman/pixman/pixman-combine32.h new file mode 100644 index 0000000..cdd56a6 --- /dev/null +++ b/src/pixman/pixman/pixman-combine32.h @@ -0,0 +1,272 @@ +#define COMPONENT_SIZE 8 +#define MASK 0xff +#define ONE_HALF 0x80 + +#define A_SHIFT 8 * 3 +#define R_SHIFT 8 * 2 +#define G_SHIFT 8 +#define A_MASK 0xff000000 +#define R_MASK 0xff0000 +#define G_MASK 0xff00 + +#define RB_MASK 0xff00ff +#define AG_MASK 0xff00ff00 +#define RB_ONE_HALF 0x800080 +#define RB_MASK_PLUS_ONE 0x10000100 + +#define ALPHA_8(x) ((x) >> A_SHIFT) +#define RED_8(x) (((x) >> R_SHIFT) & MASK) +#define GREEN_8(x) (((x) >> G_SHIFT) & MASK) +#define BLUE_8(x) ((x) & MASK) + +/* + * ARMv6 has UQADD8 instruction, which implements unsigned saturated + * addition for 8-bit values packed in 32-bit registers. It is very useful + * for UN8x4_ADD_UN8x4, UN8_rb_ADD_UN8_rb and ADD_UN8 macros (which would + * otherwise need a lot of arithmetic operations to simulate this operation). + * Since most of the major ARM linux distros are built for ARMv7, we are + * much less dependent on runtime CPU detection and can get practical + * benefits from conditional compilation here for a lot of users. + */ + +#if defined(USE_GCC_INLINE_ASM) && defined(__arm__) && \ + !defined(__aarch64__) && (!defined(__thumb__) || defined(__thumb2__)) +#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \ + defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \ + defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) || \ + defined(__ARM_ARCH_6M__) || defined(__ARM_ARCH_7__) || \ + defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || \ + defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__) + +static force_inline uint32_t +un8x4_add_un8x4 (uint32_t x, uint32_t y) +{ + uint32_t t; + asm ("uqadd8 %0, %1, %2" : "=r" (t) : "%r" (x), "r" (y)); + return t; +} + +#define UN8x4_ADD_UN8x4(x, y) \ + ((x) = un8x4_add_un8x4 ((x), (y))) + +#define UN8_rb_ADD_UN8_rb(x, y, t) \ + ((t) = un8x4_add_un8x4 ((x), (y)), (x) = (t)) + +#define ADD_UN8(x, y, t) \ + ((t) = (x), un8x4_add_un8x4 ((t), (y))) + +#endif +#endif + +/*****************************************************************************/ + +/* + * Helper macros. + */ + +#define MUL_UN8(a, b, t) \ + ((t) = (a) * (uint16_t)(b) + ONE_HALF, ((((t) >> G_SHIFT ) + (t) ) >> G_SHIFT )) + +#define DIV_UN8(a, b) \ + (((uint16_t) (a) * MASK + ((b) / 2)) / (b)) + +#ifndef ADD_UN8 +#define ADD_UN8(x, y, t) \ + ((t) = (x) + (y), \ + (uint32_t) (uint8_t) ((t) | (0 - ((t) >> G_SHIFT)))) +#endif + +#define DIV_ONE_UN8(x) \ + (((x) + ONE_HALF + (((x) + ONE_HALF) >> G_SHIFT)) >> G_SHIFT) + +/* + * The methods below use some tricks to be able to do two color + * components at the same time. + */ + +/* + * x_rb = (x_rb * a) / 255 + */ +#define UN8_rb_MUL_UN8(x, a, t) \ + do \ + { \ + t = ((x) & RB_MASK) * (a); \ + t += RB_ONE_HALF; \ + x = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT; \ + x &= RB_MASK; \ + } while (0) + +/* + * x_rb = min (x_rb + y_rb, 255) + */ +#ifndef UN8_rb_ADD_UN8_rb +#define UN8_rb_ADD_UN8_rb(x, y, t) \ + do \ + { \ + t = ((x) + (y)); \ + t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK); \ + x = (t & RB_MASK); \ + } while (0) +#endif + +/* + * x_rb = (x_rb * a_rb) / 255 + */ +#define UN8_rb_MUL_UN8_rb(x, a, t) \ + do \ + { \ + t = (x & MASK) * (a & MASK); \ + t |= (x & R_MASK) * ((a >> R_SHIFT) & MASK); \ + t += RB_ONE_HALF; \ + t = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT; \ + x = t & RB_MASK; \ + } while (0) + +/* + * x_c = (x_c * a) / 255 + */ +#define UN8x4_MUL_UN8(x, a) \ + do \ + { \ + uint32_t r1__, r2__, t__; \ + \ + r1__ = (x); \ + UN8_rb_MUL_UN8 (r1__, (a), t__); \ + \ + r2__ = (x) >> G_SHIFT; \ + UN8_rb_MUL_UN8 (r2__, (a), t__); \ + \ + (x) = r1__ | (r2__ << G_SHIFT); \ + } while (0) + +/* + * x_c = (x_c * a) / 255 + y_c + */ +#define UN8x4_MUL_UN8_ADD_UN8x4(x, a, y) \ + do \ + { \ + uint32_t r1__, r2__, r3__, t__; \ + \ + r1__ = (x); \ + r2__ = (y) & RB_MASK; \ + UN8_rb_MUL_UN8 (r1__, (a), t__); \ + UN8_rb_ADD_UN8_rb (r1__, r2__, t__); \ + \ + r2__ = (x) >> G_SHIFT; \ + r3__ = ((y) >> G_SHIFT) & RB_MASK; \ + UN8_rb_MUL_UN8 (r2__, (a), t__); \ + UN8_rb_ADD_UN8_rb (r2__, r3__, t__); \ + \ + (x) = r1__ | (r2__ << G_SHIFT); \ + } while (0) + +/* + * x_c = (x_c * a + y_c * b) / 255 + */ +#define UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8(x, a, y, b) \ + do \ + { \ + uint32_t r1__, r2__, r3__, t__; \ + \ + r1__ = (x); \ + r2__ = (y); \ + UN8_rb_MUL_UN8 (r1__, (a), t__); \ + UN8_rb_MUL_UN8 (r2__, (b), t__); \ + UN8_rb_ADD_UN8_rb (r1__, r2__, t__); \ + \ + r2__ = ((x) >> G_SHIFT); \ + r3__ = ((y) >> G_SHIFT); \ + UN8_rb_MUL_UN8 (r2__, (a), t__); \ + UN8_rb_MUL_UN8 (r3__, (b), t__); \ + UN8_rb_ADD_UN8_rb (r2__, r3__, t__); \ + \ + (x) = r1__ | (r2__ << G_SHIFT); \ + } while (0) + +/* + * x_c = (x_c * a_c) / 255 + */ +#define UN8x4_MUL_UN8x4(x, a) \ + do \ + { \ + uint32_t r1__, r2__, r3__, t__; \ + \ + r1__ = (x); \ + r2__ = (a); \ + UN8_rb_MUL_UN8_rb (r1__, r2__, t__); \ + \ + r2__ = (x) >> G_SHIFT; \ + r3__ = (a) >> G_SHIFT; \ + UN8_rb_MUL_UN8_rb (r2__, r3__, t__); \ + \ + (x) = r1__ | (r2__ << G_SHIFT); \ + } while (0) + +/* + * x_c = (x_c * a_c) / 255 + y_c + */ +#define UN8x4_MUL_UN8x4_ADD_UN8x4(x, a, y) \ + do \ + { \ + uint32_t r1__, r2__, r3__, t__; \ + \ + r1__ = (x); \ + r2__ = (a); \ + UN8_rb_MUL_UN8_rb (r1__, r2__, t__); \ + r2__ = (y) & RB_MASK; \ + UN8_rb_ADD_UN8_rb (r1__, r2__, t__); \ + \ + r2__ = ((x) >> G_SHIFT); \ + r3__ = ((a) >> G_SHIFT); \ + UN8_rb_MUL_UN8_rb (r2__, r3__, t__); \ + r3__ = ((y) >> G_SHIFT) & RB_MASK; \ + UN8_rb_ADD_UN8_rb (r2__, r3__, t__); \ + \ + (x) = r1__ | (r2__ << G_SHIFT); \ + } while (0) + +/* + * x_c = (x_c * a_c + y_c * b) / 255 + */ +#define UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8(x, a, y, b) \ + do \ + { \ + uint32_t r1__, r2__, r3__, t__; \ + \ + r1__ = (x); \ + r2__ = (a); \ + UN8_rb_MUL_UN8_rb (r1__, r2__, t__); \ + r2__ = (y); \ + UN8_rb_MUL_UN8 (r2__, (b), t__); \ + UN8_rb_ADD_UN8_rb (r1__, r2__, t__); \ + \ + r2__ = (x) >> G_SHIFT; \ + r3__ = (a) >> G_SHIFT; \ + UN8_rb_MUL_UN8_rb (r2__, r3__, t__); \ + r3__ = (y) >> G_SHIFT; \ + UN8_rb_MUL_UN8 (r3__, (b), t__); \ + UN8_rb_ADD_UN8_rb (r2__, r3__, t__); \ + \ + x = r1__ | (r2__ << G_SHIFT); \ + } while (0) + +/* + x_c = min(x_c + y_c, 255) +*/ +#ifndef UN8x4_ADD_UN8x4 +#define UN8x4_ADD_UN8x4(x, y) \ + do \ + { \ + uint32_t r1__, r2__, r3__, t__; \ + \ + r1__ = (x) & RB_MASK; \ + r2__ = (y) & RB_MASK; \ + UN8_rb_ADD_UN8_rb (r1__, r2__, t__); \ + \ + r2__ = ((x) >> G_SHIFT) & RB_MASK; \ + r3__ = ((y) >> G_SHIFT) & RB_MASK; \ + UN8_rb_ADD_UN8_rb (r2__, r3__, t__); \ + \ + x = r1__ | (r2__ << G_SHIFT); \ + } while (0) +#endif diff --git a/src/pixman/pixman/pixman-compiler.h b/src/pixman/pixman/pixman-compiler.h new file mode 100644 index 0000000..2489adc --- /dev/null +++ b/src/pixman/pixman/pixman-compiler.h @@ -0,0 +1,232 @@ +/* Pixman uses some non-standard compiler features. This file ensures + * they exist + * + * The features are: + * + * FUNC must be defined to expand to the current function + * PIXMAN_EXPORT should be defined to whatever is required to + * export functions from a shared library + * limits limits for various types must be defined + * inline must be defined + * force_inline must be defined + */ +#if defined (__GNUC__) +# define FUNC ((const char*) (__PRETTY_FUNCTION__)) +#elif defined (__sun) || (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) +# define FUNC ((const char*) (__func__)) +#else +# define FUNC ((const char*) ("???")) +#endif + +#if defined (__GNUC__) +# define unlikely(expr) __builtin_expect ((expr), 0) +#else +# define unlikely(expr) (expr) +#endif + +#if defined (__GNUC__) +# define MAYBE_UNUSED __attribute__((unused)) +#else +# define MAYBE_UNUSED +#endif + +#ifndef INT16_MIN +# define INT16_MIN (-32767-1) +#endif + +#ifndef INT16_MAX +# define INT16_MAX (32767) +#endif + +#ifndef INT32_MIN +# define INT32_MIN (-2147483647-1) +#endif + +#ifndef INT32_MAX +# define INT32_MAX (2147483647) +#endif + +#ifndef UINT32_MIN +# define UINT32_MIN (0) +#endif + +#ifndef UINT32_MAX +# define UINT32_MAX (4294967295U) +#endif + +#ifndef INT64_MIN +# define INT64_MIN (-9223372036854775807-1) +#endif + +#ifndef INT64_MAX +# define INT64_MAX (9223372036854775807) +#endif + +#ifndef SIZE_MAX +# define SIZE_MAX ((size_t)-1) +#endif + + +#ifndef M_PI +# define M_PI 3.14159265358979323846 +#endif + +#ifdef _MSC_VER +/* 'inline' is available only in C++ in MSVC */ +# define inline __inline +# define force_inline __forceinline +# define noinline __declspec(noinline) +#elif defined __GNUC__ || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)) +# define inline __inline__ +# define force_inline __inline__ __attribute__ ((__always_inline__)) +# define noinline __attribute__((noinline)) +#else +# ifndef force_inline +# define force_inline inline +# endif +# ifndef noinline +# define noinline +# endif +#endif + +/* GCC visibility */ +#if defined(__GNUC__) && __GNUC__ >= 4 && !defined(_WIN32) +# define PIXMAN_EXPORT __attribute__ ((visibility("default"))) +/* Sun Studio 8 visibility */ +#elif defined(__SUNPRO_C) && (__SUNPRO_C >= 0x550) +# define PIXMAN_EXPORT __global +#else +# define PIXMAN_EXPORT +#endif + +/* member offsets */ +#define CONTAINER_OF(type, member, data) \ + ((type *)(((uint8_t *)data) - offsetof (type, member))) + +/* TLS */ +#if defined(PIXMAN_NO_TLS) + +# define PIXMAN_DEFINE_THREAD_LOCAL(type, name) \ + static type name +# define PIXMAN_GET_THREAD_LOCAL(name) \ + (&name) + +#elif defined(TLS) + +# define PIXMAN_DEFINE_THREAD_LOCAL(type, name) \ + static TLS type name +# define PIXMAN_GET_THREAD_LOCAL(name) \ + (&name) + +#elif defined(__MINGW32__) + +# define _NO_W32_PSEUDO_MODIFIERS +# include <windows.h> + +# define PIXMAN_DEFINE_THREAD_LOCAL(type, name) \ + static volatile int tls_ ## name ## _initialized = 0; \ + static void *tls_ ## name ## _mutex = NULL; \ + static unsigned tls_ ## name ## _index; \ + \ + static type * \ + tls_ ## name ## _alloc (void) \ + { \ + type *value = calloc (1, sizeof (type)); \ + if (value) \ + TlsSetValue (tls_ ## name ## _index, value); \ + return value; \ + } \ + \ + static force_inline type * \ + tls_ ## name ## _get (void) \ + { \ + type *value; \ + if (!tls_ ## name ## _initialized) \ + { \ + if (!tls_ ## name ## _mutex) \ + { \ + void *mutex = CreateMutexA (NULL, 0, NULL); \ + if (InterlockedCompareExchangePointer ( \ + &tls_ ## name ## _mutex, mutex, NULL) != NULL) \ + { \ + CloseHandle (mutex); \ + } \ + } \ + WaitForSingleObject (tls_ ## name ## _mutex, 0xFFFFFFFF); \ + if (!tls_ ## name ## _initialized) \ + { \ + tls_ ## name ## _index = TlsAlloc (); \ + tls_ ## name ## _initialized = 1; \ + } \ + ReleaseMutex (tls_ ## name ## _mutex); \ + } \ + if (tls_ ## name ## _index == 0xFFFFFFFF) \ + return NULL; \ + value = TlsGetValue (tls_ ## name ## _index); \ + if (!value) \ + value = tls_ ## name ## _alloc (); \ + return value; \ + } + +# define PIXMAN_GET_THREAD_LOCAL(name) \ + tls_ ## name ## _get () + +#elif defined(_MSC_VER) + +# define PIXMAN_DEFINE_THREAD_LOCAL(type, name) \ + static __declspec(thread) type name +# define PIXMAN_GET_THREAD_LOCAL(name) \ + (&name) + +#elif defined(HAVE_PTHREADS) + +#include <pthread.h> + +# define PIXMAN_DEFINE_THREAD_LOCAL(type, name) \ + static pthread_once_t tls_ ## name ## _once_control = PTHREAD_ONCE_INIT; \ + static pthread_key_t tls_ ## name ## _key; \ + \ + static void \ + tls_ ## name ## _destroy_value (void *value) \ + { \ + free (value); \ + } \ + \ + static void \ + tls_ ## name ## _make_key (void) \ + { \ + pthread_key_create (&tls_ ## name ## _key, \ + tls_ ## name ## _destroy_value); \ + } \ + \ + static type * \ + tls_ ## name ## _alloc (void) \ + { \ + type *value = calloc (1, sizeof (type)); \ + if (value) \ + pthread_setspecific (tls_ ## name ## _key, value); \ + return value; \ + } \ + \ + static force_inline type * \ + tls_ ## name ## _get (void) \ + { \ + type *value = NULL; \ + if (pthread_once (&tls_ ## name ## _once_control, \ + tls_ ## name ## _make_key) == 0) \ + { \ + value = pthread_getspecific (tls_ ## name ## _key); \ + if (!value) \ + value = tls_ ## name ## _alloc (); \ + } \ + return value; \ + } + +# define PIXMAN_GET_THREAD_LOCAL(name) \ + tls_ ## name ## _get () + +#else + +# error "Unknown thread local support for this system. Pixman will not work with multiple threads. Define PIXMAN_NO_TLS to acknowledge and accept this limitation and compile pixman without thread-safety support." + +#endif diff --git a/src/pixman/pixman/pixman-conical-gradient.c b/src/pixman/pixman/pixman-conical-gradient.c new file mode 100644 index 0000000..8bb46ae --- /dev/null +++ b/src/pixman/pixman/pixman-conical-gradient.c @@ -0,0 +1,212 @@ +/* + * Copyright © 2000 SuSE, Inc. + * Copyright © 2007 Red Hat, Inc. + * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc. + * 2005 Lars Knoll & Zack Rusin, Trolltech + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of Keith Packard not be used in + * advertising or publicity pertaining to distribution of the software without + * specific, written prior permission. Keith Packard makes no + * representations about the suitability of this software for any purpose. It + * is provided "as is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <stdlib.h> +#include <math.h> +#include "pixman-private.h" + +static force_inline double +coordinates_to_parameter (double x, double y, double angle) +{ + double t; + + t = atan2 (y, x) + angle; + + while (t < 0) + t += 2 * M_PI; + + while (t >= 2 * M_PI) + t -= 2 * M_PI; + + return 1 - t * (1 / (2 * M_PI)); /* Scale t to [0, 1] and + * make rotation CCW + */ +} + +static uint32_t * +conical_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask) +{ + pixman_image_t *image = iter->image; + int x = iter->x; + int y = iter->y; + int width = iter->width; + uint32_t *buffer = iter->buffer; + + gradient_t *gradient = (gradient_t *)image; + conical_gradient_t *conical = (conical_gradient_t *)image; + uint32_t *end = buffer + width; + pixman_gradient_walker_t walker; + pixman_bool_t affine = TRUE; + double cx = 1.; + double cy = 0.; + double cz = 0.; + double rx = x + 0.5; + double ry = y + 0.5; + double rz = 1.; + + _pixman_gradient_walker_init (&walker, gradient, image->common.repeat); + + if (image->common.transform) + { + pixman_vector_t v; + + /* reference point is the center of the pixel */ + v.vector[0] = pixman_int_to_fixed (x) + pixman_fixed_1 / 2; + v.vector[1] = pixman_int_to_fixed (y) + pixman_fixed_1 / 2; + v.vector[2] = pixman_fixed_1; + + if (!pixman_transform_point_3d (image->common.transform, &v)) + return iter->buffer; + + cx = image->common.transform->matrix[0][0] / 65536.; + cy = image->common.transform->matrix[1][0] / 65536.; + cz = image->common.transform->matrix[2][0] / 65536.; + + rx = v.vector[0] / 65536.; + ry = v.vector[1] / 65536.; + rz = v.vector[2] / 65536.; + + affine = + image->common.transform->matrix[2][0] == 0 && + v.vector[2] == pixman_fixed_1; + } + + if (affine) + { + rx -= conical->center.x / 65536.; + ry -= conical->center.y / 65536.; + + while (buffer < end) + { + if (!mask || *mask++) + { + double t = coordinates_to_parameter (rx, ry, conical->angle); + + *buffer = _pixman_gradient_walker_pixel ( + &walker, (pixman_fixed_48_16_t)pixman_double_to_fixed (t)); + } + + ++buffer; + + rx += cx; + ry += cy; + } + } + else + { + while (buffer < end) + { + double x, y; + + if (!mask || *mask++) + { + double t; + + if (rz != 0) + { + x = rx / rz; + y = ry / rz; + } + else + { + x = y = 0.; + } + + x -= conical->center.x / 65536.; + y -= conical->center.y / 65536.; + + t = coordinates_to_parameter (x, y, conical->angle); + + *buffer = _pixman_gradient_walker_pixel ( + &walker, (pixman_fixed_48_16_t)pixman_double_to_fixed (t)); + } + + ++buffer; + + rx += cx; + ry += cy; + rz += cz; + } + } + + iter->y++; + return iter->buffer; +} + +static uint32_t * +conical_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask) +{ + uint32_t *buffer = conical_get_scanline_narrow (iter, NULL); + + pixman_expand_to_float ( + (argb_t *)buffer, buffer, PIXMAN_a8r8g8b8, iter->width); + + return buffer; +} + +void +_pixman_conical_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter) +{ + if (iter->iter_flags & ITER_NARROW) + iter->get_scanline = conical_get_scanline_narrow; + else + iter->get_scanline = conical_get_scanline_wide; +} + +PIXMAN_EXPORT pixman_image_t * +pixman_image_create_conical_gradient (const pixman_point_fixed_t * center, + pixman_fixed_t angle, + const pixman_gradient_stop_t *stops, + int n_stops) +{ + pixman_image_t *image = _pixman_image_allocate (); + conical_gradient_t *conical; + + if (!image) + return NULL; + + conical = &image->conical; + + if (!_pixman_init_gradient (&conical->common, stops, n_stops)) + { + free (image); + return NULL; + } + + angle = MOD (angle, pixman_int_to_fixed (360)); + + image->type = CONICAL; + + conical->center = *center; + conical->angle = (pixman_fixed_to_double (angle) / 180.0) * M_PI; + + return image; +} + diff --git a/src/pixman/pixman/pixman-edge-accessors.c b/src/pixman/pixman/pixman-edge-accessors.c new file mode 100644 index 0000000..ea3a31e --- /dev/null +++ b/src/pixman/pixman/pixman-edge-accessors.c @@ -0,0 +1,4 @@ + +#define PIXMAN_FB_ACCESSORS + +#include "pixman-edge.c" diff --git a/src/pixman/pixman/pixman-edge-imp.h b/src/pixman/pixman/pixman-edge-imp.h new file mode 100644 index 0000000..a4698ed --- /dev/null +++ b/src/pixman/pixman/pixman-edge-imp.h @@ -0,0 +1,182 @@ +/* + * Copyright © 2004 Keith Packard + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of Keith Packard not be used in + * advertising or publicity pertaining to distribution of the software without + * specific, written prior permission. Keith Packard makes no + * representations about the suitability of this software for any purpose. It + * is provided "as is" without express or implied warranty. + * + * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef rasterize_span +#endif + +static void +RASTERIZE_EDGES (pixman_image_t *image, + pixman_edge_t *l, + pixman_edge_t *r, + pixman_fixed_t t, + pixman_fixed_t b) +{ + pixman_fixed_t y = t; + uint32_t *line; + uint32_t *buf = (image)->bits.bits; + int stride = (image)->bits.rowstride; + int width = (image)->bits.width; + + line = buf + pixman_fixed_to_int (y) * stride; + + for (;;) + { + pixman_fixed_t lx; + pixman_fixed_t rx; + int lxi; + int rxi; + + lx = l->x; + rx = r->x; +#if N_BITS == 1 + /* For the non-antialiased case, round the coordinates up, in effect + * sampling just slightly to the left of the pixel. This is so that + * when the sample point lies exactly on the line, we round towards + * north-west. + * + * (The AA case does a similar adjustment in RENDER_SAMPLES_X) + */ + lx += X_FRAC_FIRST(1) - pixman_fixed_e; + rx += X_FRAC_FIRST(1) - pixman_fixed_e; +#endif + /* clip X */ + if (lx < 0) + lx = 0; + if (pixman_fixed_to_int (rx) >= width) +#if N_BITS == 1 + rx = pixman_int_to_fixed (width); +#else + /* Use the last pixel of the scanline, covered 100%. + * We can't use the first pixel following the scanline, + * because accessing it could result in a buffer overrun. + */ + rx = pixman_int_to_fixed (width) - 1; +#endif + + /* Skip empty (or backwards) sections */ + if (rx > lx) + { + + /* Find pixel bounds for span */ + lxi = pixman_fixed_to_int (lx); + rxi = pixman_fixed_to_int (rx); + +#if N_BITS == 1 + { + +#define LEFT_MASK(x) \ + (((x) & 0x1f) ? \ + SCREEN_SHIFT_RIGHT (0xffffffff, (x) & 0x1f) : 0) +#define RIGHT_MASK(x) \ + (((32 - (x)) & 0x1f) ? \ + SCREEN_SHIFT_LEFT (0xffffffff, (32 - (x)) & 0x1f) : 0) + +#define MASK_BITS(x,w,l,n,r) { \ + n = (w); \ + r = RIGHT_MASK ((x) + n); \ + l = LEFT_MASK (x); \ + if (l) { \ + n -= 32 - ((x) & 0x1f); \ + if (n < 0) { \ + n = 0; \ + l &= r; \ + r = 0; \ + } \ + } \ + n >>= 5; \ + } + + uint32_t *a = line; + uint32_t startmask; + uint32_t endmask; + int nmiddle; + int width = rxi - lxi; + int x = lxi; + + a += x >> 5; + x &= 0x1f; + + MASK_BITS (x, width, startmask, nmiddle, endmask); + + if (startmask) { + WRITE(image, a, READ(image, a) | startmask); + a++; + } + while (nmiddle--) + WRITE(image, a++, 0xffffffff); + if (endmask) + WRITE(image, a, READ(image, a) | endmask); + } +#else + { + DEFINE_ALPHA(line,lxi); + int lxs; + int rxs; + + /* Sample coverage for edge pixels */ + lxs = RENDER_SAMPLES_X (lx, N_BITS); + rxs = RENDER_SAMPLES_X (rx, N_BITS); + + /* Add coverage across row */ + if (lxi == rxi) + { + ADD_ALPHA (rxs - lxs); + } + else + { + int xi; + + ADD_ALPHA (N_X_FRAC(N_BITS) - lxs); + STEP_ALPHA; + for (xi = lxi + 1; xi < rxi; xi++) + { + ADD_ALPHA (N_X_FRAC(N_BITS)); + STEP_ALPHA; + } + ADD_ALPHA (rxs); + } + } +#endif + } + + if (y == b) + break; + +#if N_BITS > 1 + if (pixman_fixed_frac (y) != Y_FRAC_LAST(N_BITS)) + { + RENDER_EDGE_STEP_SMALL (l); + RENDER_EDGE_STEP_SMALL (r); + y += STEP_Y_SMALL(N_BITS); + } + else +#endif + { + RENDER_EDGE_STEP_BIG (l); + RENDER_EDGE_STEP_BIG (r); + y += STEP_Y_BIG(N_BITS); + line += stride; + } + } +} + +#undef rasterize_span diff --git a/src/pixman/pixman/pixman-edge.c b/src/pixman/pixman/pixman-edge.c new file mode 100644 index 0000000..ad6dfc4 --- /dev/null +++ b/src/pixman/pixman/pixman-edge.c @@ -0,0 +1,385 @@ +/* + * Copyright © 2004 Keith Packard + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of Keith Packard not be used in + * advertising or publicity pertaining to distribution of the software without + * specific, written prior permission. Keith Packard makes no + * representations about the suitability of this software for any purpose. It + * is provided "as is" without express or implied warranty. + * + * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * PERFORMANCE OF THIS SOFTWARE. + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <string.h> + +#include "pixman-private.h" +#include "pixman-accessor.h" + +/* + * Step across a small sample grid gap + */ +#define RENDER_EDGE_STEP_SMALL(edge) \ + { \ + edge->x += edge->stepx_small; \ + edge->e += edge->dx_small; \ + if (edge->e > 0) \ + { \ + edge->e -= edge->dy; \ + edge->x += edge->signdx; \ + } \ + } + +/* + * Step across a large sample grid gap + */ +#define RENDER_EDGE_STEP_BIG(edge) \ + { \ + edge->x += edge->stepx_big; \ + edge->e += edge->dx_big; \ + if (edge->e > 0) \ + { \ + edge->e -= edge->dy; \ + edge->x += edge->signdx; \ + } \ + } + +#ifdef PIXMAN_FB_ACCESSORS +#define PIXMAN_RASTERIZE_EDGES pixman_rasterize_edges_accessors +#else +#define PIXMAN_RASTERIZE_EDGES pixman_rasterize_edges_no_accessors +#endif + +/* + * 4 bit alpha + */ + +#define N_BITS 4 +#define RASTERIZE_EDGES rasterize_edges_4 + +#ifndef WORDS_BIGENDIAN +#define SHIFT_4(o) ((o) << 2) +#else +#define SHIFT_4(o) ((1 - (o)) << 2) +#endif + +#define GET_4(x, o) (((x) >> SHIFT_4 (o)) & 0xf) +#define PUT_4(x, o, v) \ + (((x) & ~(0xf << SHIFT_4 (o))) | (((v) & 0xf) << SHIFT_4 (o))) + +#define DEFINE_ALPHA(line, x) \ + uint8_t *__ap = (uint8_t *) line + ((x) >> 1); \ + int __ao = (x) & 1 + +#define STEP_ALPHA ((__ap += __ao), (__ao ^= 1)) + +#define ADD_ALPHA(a) \ + { \ + uint8_t __o = READ (image, __ap); \ + uint8_t __a = (a) + GET_4 (__o, __ao); \ + WRITE (image, __ap, PUT_4 (__o, __ao, __a | (0 - ((__a) >> 4)))); \ + } + +#include "pixman-edge-imp.h" + +#undef ADD_ALPHA +#undef STEP_ALPHA +#undef DEFINE_ALPHA +#undef RASTERIZE_EDGES +#undef N_BITS + + +/* + * 1 bit alpha + */ + +#define N_BITS 1 +#define RASTERIZE_EDGES rasterize_edges_1 + +#include "pixman-edge-imp.h" + +#undef RASTERIZE_EDGES +#undef N_BITS + +/* + * 8 bit alpha + */ + +static force_inline uint8_t +clip255 (int x) +{ + if (x > 255) + return 255; + + return x; +} + +#define ADD_SATURATE_8(buf, val, length) \ + do \ + { \ + int i__ = (length); \ + uint8_t *buf__ = (buf); \ + int val__ = (val); \ + \ + while (i__--) \ + { \ + WRITE (image, (buf__), clip255 (READ (image, (buf__)) + (val__))); \ + (buf__)++; \ + } \ + } while (0) + +/* + * We want to detect the case where we add the same value to a long + * span of pixels. The triangles on the end are filled in while we + * count how many sub-pixel scanlines contribute to the middle section. + * + * +--------------------------+ + * fill_height =| \ / + * +------------------+ + * |================| + * fill_start fill_end + */ +static void +rasterize_edges_8 (pixman_image_t *image, + pixman_edge_t * l, + pixman_edge_t * r, + pixman_fixed_t t, + pixman_fixed_t b) +{ + pixman_fixed_t y = t; + uint32_t *line; + int fill_start = -1, fill_end = -1; + int fill_size = 0; + uint32_t *buf = (image)->bits.bits; + int stride = (image)->bits.rowstride; + int width = (image)->bits.width; + + line = buf + pixman_fixed_to_int (y) * stride; + + for (;;) + { + uint8_t *ap = (uint8_t *) line; + pixman_fixed_t lx, rx; + int lxi, rxi; + + /* clip X */ + lx = l->x; + if (lx < 0) + lx = 0; + + rx = r->x; + + if (pixman_fixed_to_int (rx) >= width) + { + /* Use the last pixel of the scanline, covered 100%. + * We can't use the first pixel following the scanline, + * because accessing it could result in a buffer overrun. + */ + rx = pixman_int_to_fixed (width) - 1; + } + + /* Skip empty (or backwards) sections */ + if (rx > lx) + { + int lxs, rxs; + + /* Find pixel bounds for span. */ + lxi = pixman_fixed_to_int (lx); + rxi = pixman_fixed_to_int (rx); + + /* Sample coverage for edge pixels */ + lxs = RENDER_SAMPLES_X (lx, 8); + rxs = RENDER_SAMPLES_X (rx, 8); + + /* Add coverage across row */ + if (lxi == rxi) + { + WRITE (image, ap + lxi, + clip255 (READ (image, ap + lxi) + rxs - lxs)); + } + else + { + WRITE (image, ap + lxi, + clip255 (READ (image, ap + lxi) + N_X_FRAC (8) - lxs)); + + /* Move forward so that lxi/rxi is the pixel span */ + lxi++; + + /* Don't bother trying to optimize the fill unless + * the span is longer than 4 pixels. */ + if (rxi - lxi > 4) + { + if (fill_start < 0) + { + fill_start = lxi; + fill_end = rxi; + fill_size++; + } + else + { + if (lxi >= fill_end || rxi < fill_start) + { + /* We're beyond what we saved, just fill it */ + ADD_SATURATE_8 (ap + fill_start, + fill_size * N_X_FRAC (8), + fill_end - fill_start); + fill_start = lxi; + fill_end = rxi; + fill_size = 1; + } + else + { + /* Update fill_start */ + if (lxi > fill_start) + { + ADD_SATURATE_8 (ap + fill_start, + fill_size * N_X_FRAC (8), + lxi - fill_start); + fill_start = lxi; + } + else if (lxi < fill_start) + { + ADD_SATURATE_8 (ap + lxi, N_X_FRAC (8), + fill_start - lxi); + } + + /* Update fill_end */ + if (rxi < fill_end) + { + ADD_SATURATE_8 (ap + rxi, + fill_size * N_X_FRAC (8), + fill_end - rxi); + fill_end = rxi; + } + else if (fill_end < rxi) + { + ADD_SATURATE_8 (ap + fill_end, + N_X_FRAC (8), + rxi - fill_end); + } + fill_size++; + } + } + } + else + { + ADD_SATURATE_8 (ap + lxi, N_X_FRAC (8), rxi - lxi); + } + + WRITE (image, ap + rxi, clip255 (READ (image, ap + rxi) + rxs)); + } + } + + if (y == b) + { + /* We're done, make sure we clean up any remaining fill. */ + if (fill_start != fill_end) + { + if (fill_size == N_Y_FRAC (8)) + { + MEMSET_WRAPPED (image, ap + fill_start, + 0xff, fill_end - fill_start); + } + else + { + ADD_SATURATE_8 (ap + fill_start, fill_size * N_X_FRAC (8), + fill_end - fill_start); + } + } + break; + } + + if (pixman_fixed_frac (y) != Y_FRAC_LAST (8)) + { + RENDER_EDGE_STEP_SMALL (l); + RENDER_EDGE_STEP_SMALL (r); + y += STEP_Y_SMALL (8); + } + else + { + RENDER_EDGE_STEP_BIG (l); + RENDER_EDGE_STEP_BIG (r); + y += STEP_Y_BIG (8); + if (fill_start != fill_end) + { + if (fill_size == N_Y_FRAC (8)) + { + MEMSET_WRAPPED (image, ap + fill_start, + 0xff, fill_end - fill_start); + } + else + { + ADD_SATURATE_8 (ap + fill_start, fill_size * N_X_FRAC (8), + fill_end - fill_start); + } + + fill_start = fill_end = -1; + fill_size = 0; + } + + line += stride; + } + } +} + +#ifndef PIXMAN_FB_ACCESSORS +static +#endif +void +PIXMAN_RASTERIZE_EDGES (pixman_image_t *image, + pixman_edge_t * l, + pixman_edge_t * r, + pixman_fixed_t t, + pixman_fixed_t b) +{ + switch (PIXMAN_FORMAT_BPP (image->bits.format)) + { + case 1: + rasterize_edges_1 (image, l, r, t, b); + break; + + case 4: + rasterize_edges_4 (image, l, r, t, b); + break; + + case 8: + rasterize_edges_8 (image, l, r, t, b); + break; + + default: + break; + } +} + +#ifndef PIXMAN_FB_ACCESSORS + +PIXMAN_EXPORT void +pixman_rasterize_edges (pixman_image_t *image, + pixman_edge_t * l, + pixman_edge_t * r, + pixman_fixed_t t, + pixman_fixed_t b) +{ + return_if_fail (image->type == BITS); + return_if_fail (PIXMAN_FORMAT_TYPE (image->bits.format) == PIXMAN_TYPE_A); + + if (image->bits.read_func || image->bits.write_func) + pixman_rasterize_edges_accessors (image, l, r, t, b); + else + pixman_rasterize_edges_no_accessors (image, l, r, t, b); +} + +#endif diff --git a/src/pixman/pixman/pixman-fast-path.c b/src/pixman/pixman/pixman-fast-path.c new file mode 100644 index 0000000..c6e43de --- /dev/null +++ b/src/pixman/pixman/pixman-fast-path.c @@ -0,0 +1,3292 @@ +/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */ +/* + * Copyright © 2000 SuSE, Inc. + * Copyright © 2007 Red Hat, Inc. + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of SuSE not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. SuSE makes no representations about the + * suitability of this software for any purpose. It is provided "as is" + * without express or implied warranty. + * + * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE + * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Author: Keith Packard, SuSE, Inc. + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif +#include <string.h> +#include <stdlib.h> +#include "pixman-private.h" +#include "pixman-combine32.h" +#include "pixman-inlines.h" + +static force_inline uint32_t +fetch_24 (uint8_t *a) +{ + if (((uintptr_t)a) & 1) + { +#ifdef WORDS_BIGENDIAN + return (*a << 16) | (*(uint16_t *)(a + 1)); +#else + return *a | (*(uint16_t *)(a + 1) << 8); +#endif + } + else + { +#ifdef WORDS_BIGENDIAN + return (*(uint16_t *)a << 8) | *(a + 2); +#else + return *(uint16_t *)a | (*(a + 2) << 16); +#endif + } +} + +static force_inline void +store_24 (uint8_t *a, + uint32_t v) +{ + if (((uintptr_t)a) & 1) + { +#ifdef WORDS_BIGENDIAN + *a = (uint8_t) (v >> 16); + *(uint16_t *)(a + 1) = (uint16_t) (v); +#else + *a = (uint8_t) (v); + *(uint16_t *)(a + 1) = (uint16_t) (v >> 8); +#endif + } + else + { +#ifdef WORDS_BIGENDIAN + *(uint16_t *)a = (uint16_t)(v >> 8); + *(a + 2) = (uint8_t)v; +#else + *(uint16_t *)a = (uint16_t)v; + *(a + 2) = (uint8_t)(v >> 16); +#endif + } +} + +static force_inline uint32_t +over (uint32_t src, + uint32_t dest) +{ + uint32_t a = ~src >> 24; + + UN8x4_MUL_UN8_ADD_UN8x4 (dest, a, src); + + return dest; +} + +static force_inline uint32_t +in (uint32_t x, + uint8_t y) +{ + uint16_t a = y; + + UN8x4_MUL_UN8 (x, a); + + return x; +} + +/* + * Naming convention: + * + * op_src_mask_dest + */ +static void +fast_composite_over_x888_8_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *src, *src_line; + uint32_t *dst, *dst_line; + uint8_t *mask, *mask_line; + int src_stride, mask_stride, dst_stride; + uint8_t m; + uint32_t s, d; + int32_t w; + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + + while (height--) + { + src = src_line; + src_line += src_stride; + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + + w = width; + while (w--) + { + m = *mask++; + if (m) + { + s = *src | 0xff000000; + + if (m == 0xff) + { + *dst = s; + } + else + { + d = in (s, m); + *dst = over (d, *dst); + } + } + src++; + dst++; + } + } +} + +static void +fast_composite_in_n_8_8 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src, srca; + uint8_t *dst_line, *dst; + uint8_t *mask_line, *mask, m; + int dst_stride, mask_stride; + int32_t w; + uint16_t t; + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + + srca = src >> 24; + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + + if (srca == 0xff) + { + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w--) + { + m = *mask++; + + if (m == 0) + *dst = 0; + else if (m != 0xff) + *dst = MUL_UN8 (m, *dst, t); + + dst++; + } + } + } + else + { + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w--) + { + m = *mask++; + m = MUL_UN8 (m, srca, t); + + if (m == 0) + *dst = 0; + else if (m != 0xff) + *dst = MUL_UN8 (m, *dst, t); + + dst++; + } + } + } +} + +static void +fast_composite_in_8_8 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint8_t *dst_line, *dst; + uint8_t *src_line, *src; + int dst_stride, src_stride; + int32_t w; + uint8_t s; + uint16_t t; + + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w--) + { + s = *src++; + + if (s == 0) + *dst = 0; + else if (s != 0xff) + *dst = MUL_UN8 (s, *dst, t); + + dst++; + } + } +} + +static void +fast_composite_over_n_8_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src, srca; + uint32_t *dst_line, *dst, d; + uint8_t *mask_line, *mask, m; + int dst_stride, mask_stride; + int32_t w; + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + + srca = src >> 24; + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w--) + { + m = *mask++; + if (m == 0xff) + { + if (srca == 0xff) + *dst = src; + else + *dst = over (src, *dst); + } + else if (m) + { + d = in (src, m); + *dst = over (d, *dst); + } + dst++; + } + } +} + +static void +fast_composite_add_n_8888_8888_ca (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src, s; + uint32_t *dst_line, *dst, d; + uint32_t *mask_line, *mask, ma; + int dst_stride, mask_stride; + int32_t w; + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w--) + { + ma = *mask++; + + if (ma) + { + d = *dst; + s = src; + + UN8x4_MUL_UN8x4_ADD_UN8x4 (s, ma, d); + + *dst = s; + } + + dst++; + } + } +} + +static void +fast_composite_over_n_8888_8888_ca (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src, srca, s; + uint32_t *dst_line, *dst, d; + uint32_t *mask_line, *mask, ma; + int dst_stride, mask_stride; + int32_t w; + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + + srca = src >> 24; + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w--) + { + ma = *mask++; + if (ma == 0xffffffff) + { + if (srca == 0xff) + *dst = src; + else + *dst = over (src, *dst); + } + else if (ma) + { + d = *dst; + s = src; + + UN8x4_MUL_UN8x4 (s, ma); + UN8x4_MUL_UN8 (ma, srca); + ma = ~ma; + UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s); + + *dst = d; + } + + dst++; + } + } +} + +static void +fast_composite_over_n_8_0888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src, srca; + uint8_t *dst_line, *dst; + uint32_t d; + uint8_t *mask_line, *mask, m; + int dst_stride, mask_stride; + int32_t w; + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + + srca = src >> 24; + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w--) + { + m = *mask++; + if (m == 0xff) + { + if (srca == 0xff) + { + d = src; + } + else + { + d = fetch_24 (dst); + d = over (src, d); + } + store_24 (dst, d); + } + else if (m) + { + d = over (in (src, m), fetch_24 (dst)); + store_24 (dst, d); + } + dst += 3; + } + } +} + +static void +fast_composite_over_n_8_0565 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src, srca; + uint16_t *dst_line, *dst; + uint32_t d; + uint8_t *mask_line, *mask, m; + int dst_stride, mask_stride; + int32_t w; + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + + srca = src >> 24; + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w--) + { + m = *mask++; + if (m == 0xff) + { + if (srca == 0xff) + { + d = src; + } + else + { + d = *dst; + d = over (src, convert_0565_to_0888 (d)); + } + *dst = convert_8888_to_0565 (d); + } + else if (m) + { + d = *dst; + d = over (in (src, m), convert_0565_to_0888 (d)); + *dst = convert_8888_to_0565 (d); + } + dst++; + } + } +} + +static void +fast_composite_over_n_8888_0565_ca (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src, srca, s; + uint16_t src16; + uint16_t *dst_line, *dst; + uint32_t d; + uint32_t *mask_line, *mask, ma; + int dst_stride, mask_stride; + int32_t w; + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + + srca = src >> 24; + if (src == 0) + return; + + src16 = convert_8888_to_0565 (src); + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w--) + { + ma = *mask++; + if (ma == 0xffffffff) + { + if (srca == 0xff) + { + *dst = src16; + } + else + { + d = *dst; + d = over (src, convert_0565_to_0888 (d)); + *dst = convert_8888_to_0565 (d); + } + } + else if (ma) + { + d = *dst; + d = convert_0565_to_0888 (d); + + s = src; + + UN8x4_MUL_UN8x4 (s, ma); + UN8x4_MUL_UN8 (ma, srca); + ma = ~ma; + UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s); + + *dst = convert_8888_to_0565 (d); + } + dst++; + } + } +} + +static void +fast_composite_over_8888_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *dst_line, *dst; + uint32_t *src_line, *src, s; + int dst_stride, src_stride; + uint8_t a; + int32_t w; + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w--) + { + s = *src++; + a = s >> 24; + if (a == 0xff) + *dst = s; + else if (s) + *dst = over (s, *dst); + dst++; + } + } +} + +static void +fast_composite_src_x888_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; + int dst_stride, src_stride; + int32_t w; + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w--) + *dst++ = (*src++) | 0xff000000; + } +} + +#if 0 +static void +fast_composite_over_8888_0888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint8_t *dst_line, *dst; + uint32_t d; + uint32_t *src_line, *src, s; + uint8_t a; + int dst_stride, src_stride; + int32_t w; + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w--) + { + s = *src++; + a = s >> 24; + if (a) + { + if (a == 0xff) + d = s; + else + d = over (s, fetch_24 (dst)); + + store_24 (dst, d); + } + dst += 3; + } + } +} +#endif + +static void +fast_composite_over_8888_0565 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint16_t *dst_line, *dst; + uint32_t d; + uint32_t *src_line, *src, s; + uint8_t a; + int dst_stride, src_stride; + int32_t w; + + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w--) + { + s = *src++; + a = s >> 24; + if (s) + { + if (a == 0xff) + { + d = s; + } + else + { + d = *dst; + d = over (s, convert_0565_to_0888 (d)); + } + *dst = convert_8888_to_0565 (d); + } + dst++; + } + } +} + +static void +fast_composite_add_8_8 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint8_t *dst_line, *dst; + uint8_t *src_line, *src; + int dst_stride, src_stride; + int32_t w; + uint8_t s, d; + uint16_t t; + + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w--) + { + s = *src++; + if (s) + { + if (s != 0xff) + { + d = *dst; + t = d + s; + s = t | (0 - (t >> 8)); + } + *dst = s; + } + dst++; + } + } +} + +static void +fast_composite_add_0565_0565 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint16_t *dst_line, *dst; + uint32_t d; + uint16_t *src_line, *src; + uint32_t s; + int dst_stride, src_stride; + int32_t w; + + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint16_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w--) + { + s = *src++; + if (s) + { + d = *dst; + s = convert_0565_to_8888 (s); + if (d) + { + d = convert_0565_to_8888 (d); + UN8x4_ADD_UN8x4 (s, d); + } + *dst = convert_8888_to_0565 (s); + } + dst++; + } + } +} + +static void +fast_composite_add_8888_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; + int dst_stride, src_stride; + int32_t w; + uint32_t s, d; + + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w--) + { + s = *src++; + if (s) + { + if (s != 0xffffffff) + { + d = *dst; + if (d) + UN8x4_ADD_UN8x4 (s, d); + } + *dst = s; + } + dst++; + } + } +} + +static void +fast_composite_add_n_8_8 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint8_t *dst_line, *dst; + uint8_t *mask_line, *mask; + int dst_stride, mask_stride; + int32_t w; + uint32_t src; + uint8_t sa; + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + sa = (src >> 24); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w--) + { + uint16_t tmp; + uint16_t a; + uint32_t m, d; + uint32_t r; + + a = *mask++; + d = *dst; + + m = MUL_UN8 (sa, a, tmp); + r = ADD_UN8 (m, d, tmp); + + *dst++ = r; + } + } +} + +#ifdef WORDS_BIGENDIAN +#define CREATE_BITMASK(n) (0x80000000 >> (n)) +#define UPDATE_BITMASK(n) ((n) >> 1) +#else +#define CREATE_BITMASK(n) (1 << (n)) +#define UPDATE_BITMASK(n) ((n) << 1) +#endif + +#define TEST_BIT(p, n) \ + (*((p) + ((n) >> 5)) & CREATE_BITMASK ((n) & 31)) +#define SET_BIT(p, n) \ + do { *((p) + ((n) >> 5)) |= CREATE_BITMASK ((n) & 31); } while (0); + +static void +fast_composite_add_1_1 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; + int dst_stride, src_stride; + int32_t w; + + PIXMAN_IMAGE_GET_LINE (src_image, 0, src_y, uint32_t, + src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, 0, dest_y, uint32_t, + dst_stride, dst_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w--) + { + /* + * TODO: improve performance by processing uint32_t data instead + * of individual bits + */ + if (TEST_BIT (src, src_x + w)) + SET_BIT (dst, dest_x + w); + } + } +} + +static void +fast_composite_over_n_1_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src, srca; + uint32_t *dst, *dst_line; + uint32_t *mask, *mask_line; + int mask_stride, dst_stride; + uint32_t bitcache, bitmask; + int32_t w; + + if (width <= 0) + return; + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + srca = src >> 24; + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, + dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t, + mask_stride, mask_line, 1); + mask_line += mask_x >> 5; + + if (srca == 0xff) + { + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + bitcache = *mask++; + bitmask = CREATE_BITMASK (mask_x & 31); + + while (w--) + { + if (bitmask == 0) + { + bitcache = *mask++; + bitmask = CREATE_BITMASK (0); + } + if (bitcache & bitmask) + *dst = src; + bitmask = UPDATE_BITMASK (bitmask); + dst++; + } + } + } + else + { + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + bitcache = *mask++; + bitmask = CREATE_BITMASK (mask_x & 31); + + while (w--) + { + if (bitmask == 0) + { + bitcache = *mask++; + bitmask = CREATE_BITMASK (0); + } + if (bitcache & bitmask) + *dst = over (src, *dst); + bitmask = UPDATE_BITMASK (bitmask); + dst++; + } + } + } +} + +static void +fast_composite_over_n_1_0565 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src, srca; + uint16_t *dst, *dst_line; + uint32_t *mask, *mask_line; + int mask_stride, dst_stride; + uint32_t bitcache, bitmask; + int32_t w; + uint32_t d; + uint16_t src565; + + if (width <= 0) + return; + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + srca = src >> 24; + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, + dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t, + mask_stride, mask_line, 1); + mask_line += mask_x >> 5; + + if (srca == 0xff) + { + src565 = convert_8888_to_0565 (src); + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + bitcache = *mask++; + bitmask = CREATE_BITMASK (mask_x & 31); + + while (w--) + { + if (bitmask == 0) + { + bitcache = *mask++; + bitmask = CREATE_BITMASK (0); + } + if (bitcache & bitmask) + *dst = src565; + bitmask = UPDATE_BITMASK (bitmask); + dst++; + } + } + } + else + { + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + bitcache = *mask++; + bitmask = CREATE_BITMASK (mask_x & 31); + + while (w--) + { + if (bitmask == 0) + { + bitcache = *mask++; + bitmask = CREATE_BITMASK (0); + } + if (bitcache & bitmask) + { + d = over (src, convert_0565_to_0888 (*dst)); + *dst = convert_8888_to_0565 (d); + } + bitmask = UPDATE_BITMASK (bitmask); + dst++; + } + } + } +} + +/* + * Simple bitblt + */ + +static void +fast_composite_solid_fill (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src; + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + + if (dest_image->bits.format == PIXMAN_a1) + { + src = src >> 31; + } + else if (dest_image->bits.format == PIXMAN_a8) + { + src = src >> 24; + } + else if (dest_image->bits.format == PIXMAN_r5g6b5 || + dest_image->bits.format == PIXMAN_b5g6r5) + { + src = convert_8888_to_0565 (src); + } + + pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride, + PIXMAN_FORMAT_BPP (dest_image->bits.format), + dest_x, dest_y, + width, height, + src); +} + +static void +fast_composite_src_memcpy (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + int bpp = PIXMAN_FORMAT_BPP (dest_image->bits.format) / 8; + uint32_t n_bytes = width * bpp; + int dst_stride, src_stride; + uint8_t *dst; + uint8_t *src; + + src_stride = src_image->bits.rowstride * 4; + dst_stride = dest_image->bits.rowstride * 4; + + src = (uint8_t *)src_image->bits.bits + src_y * src_stride + src_x * bpp; + dst = (uint8_t *)dest_image->bits.bits + dest_y * dst_stride + dest_x * bpp; + + while (height--) + { + memcpy (dst, src, n_bytes); + + dst += dst_stride; + src += src_stride; + } +} + +FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, SRC, COVER) +FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, SRC, NONE) +FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, SRC, PAD) +FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, SRC, NORMAL) +FAST_NEAREST (x888_8888_cover, x888, 8888, uint32_t, uint32_t, SRC, COVER) +FAST_NEAREST (x888_8888_pad, x888, 8888, uint32_t, uint32_t, SRC, PAD) +FAST_NEAREST (x888_8888_normal, x888, 8888, uint32_t, uint32_t, SRC, NORMAL) +FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, OVER, COVER) +FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, OVER, NONE) +FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, OVER, PAD) +FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, OVER, NORMAL) +FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, SRC, COVER) +FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, SRC, NONE) +FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, SRC, PAD) +FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, SRC, NORMAL) +FAST_NEAREST (565_565_normal, 0565, 0565, uint16_t, uint16_t, SRC, NORMAL) +FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, OVER, COVER) +FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, NONE) +FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, OVER, PAD) +FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL) + +#define REPEAT_MIN_WIDTH 32 + +static void +fast_composite_tiled_repeat (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + pixman_composite_func_t func; + pixman_format_code_t mask_format; + uint32_t src_flags, mask_flags; + int32_t sx, sy; + int32_t width_remain; + int32_t num_pixels; + int32_t src_width; + int32_t i, j; + pixman_image_t extended_src_image; + uint32_t extended_src[REPEAT_MIN_WIDTH * 2]; + pixman_bool_t need_src_extension; + uint32_t *src_line; + int32_t src_stride; + int32_t src_bpp; + pixman_composite_info_t info2 = *info; + + src_flags = (info->src_flags & ~FAST_PATH_NORMAL_REPEAT) | + FAST_PATH_SAMPLES_COVER_CLIP_NEAREST; + + if (mask_image) + { + mask_format = mask_image->common.extended_format_code; + mask_flags = info->mask_flags; + } + else + { + mask_format = PIXMAN_null; + mask_flags = FAST_PATH_IS_OPAQUE; + } + + _pixman_implementation_lookup_composite ( + imp->toplevel, info->op, + src_image->common.extended_format_code, src_flags, + mask_format, mask_flags, + dest_image->common.extended_format_code, info->dest_flags, + &imp, &func); + + src_bpp = PIXMAN_FORMAT_BPP (src_image->bits.format); + + if (src_image->bits.width < REPEAT_MIN_WIDTH && + (src_bpp == 32 || src_bpp == 16 || src_bpp == 8) && + !src_image->bits.indexed) + { + sx = src_x; + sx = MOD (sx, src_image->bits.width); + sx += width; + src_width = 0; + + while (src_width < REPEAT_MIN_WIDTH && src_width <= sx) + src_width += src_image->bits.width; + + src_stride = (src_width * (src_bpp >> 3) + 3) / (int) sizeof (uint32_t); + + /* Initialize/validate stack-allocated temporary image */ + _pixman_bits_image_init (&extended_src_image, src_image->bits.format, + src_width, 1, &extended_src[0], src_stride, + FALSE); + _pixman_image_validate (&extended_src_image); + + info2.src_image = &extended_src_image; + need_src_extension = TRUE; + } + else + { + src_width = src_image->bits.width; + need_src_extension = FALSE; + } + + sx = src_x; + sy = src_y; + + while (--height >= 0) + { + sx = MOD (sx, src_width); + sy = MOD (sy, src_image->bits.height); + + if (need_src_extension) + { + if (src_bpp == 32) + { + PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint32_t, src_stride, src_line, 1); + + for (i = 0; i < src_width; ) + { + for (j = 0; j < src_image->bits.width; j++, i++) + extended_src[i] = src_line[j]; + } + } + else if (src_bpp == 16) + { + uint16_t *src_line_16; + + PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint16_t, src_stride, + src_line_16, 1); + src_line = (uint32_t*)src_line_16; + + for (i = 0; i < src_width; ) + { + for (j = 0; j < src_image->bits.width; j++, i++) + ((uint16_t*)extended_src)[i] = ((uint16_t*)src_line)[j]; + } + } + else if (src_bpp == 8) + { + uint8_t *src_line_8; + + PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint8_t, src_stride, + src_line_8, 1); + src_line = (uint32_t*)src_line_8; + + for (i = 0; i < src_width; ) + { + for (j = 0; j < src_image->bits.width; j++, i++) + ((uint8_t*)extended_src)[i] = ((uint8_t*)src_line)[j]; + } + } + + info2.src_y = 0; + } + else + { + info2.src_y = sy; + } + + width_remain = width; + + while (width_remain > 0) + { + num_pixels = src_width - sx; + + if (num_pixels > width_remain) + num_pixels = width_remain; + + info2.src_x = sx; + info2.width = num_pixels; + info2.height = 1; + + func (imp, &info2); + + width_remain -= num_pixels; + info2.mask_x += num_pixels; + info2.dest_x += num_pixels; + sx = 0; + } + + sx = src_x; + sy++; + info2.mask_x = info->mask_x; + info2.mask_y++; + info2.dest_x = info->dest_x; + info2.dest_y++; + } + + if (need_src_extension) + _pixman_image_fini (&extended_src_image); +} + +/* Use more unrolling for src_0565_0565 because it is typically CPU bound */ +static force_inline void +scaled_nearest_scanline_565_565_SRC (uint16_t * dst, + const uint16_t * src, + int32_t w, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + pixman_fixed_t max_vx, + pixman_bool_t fully_transparent_src) +{ + uint16_t tmp1, tmp2, tmp3, tmp4; + while ((w -= 4) >= 0) + { + tmp1 = *(src + pixman_fixed_to_int (vx)); + vx += unit_x; + tmp2 = *(src + pixman_fixed_to_int (vx)); + vx += unit_x; + tmp3 = *(src + pixman_fixed_to_int (vx)); + vx += unit_x; + tmp4 = *(src + pixman_fixed_to_int (vx)); + vx += unit_x; + *dst++ = tmp1; + *dst++ = tmp2; + *dst++ = tmp3; + *dst++ = tmp4; + } + if (w & 2) + { + tmp1 = *(src + pixman_fixed_to_int (vx)); + vx += unit_x; + tmp2 = *(src + pixman_fixed_to_int (vx)); + vx += unit_x; + *dst++ = tmp1; + *dst++ = tmp2; + } + if (w & 1) + *dst = *(src + pixman_fixed_to_int (vx)); +} + +FAST_NEAREST_MAINLOOP (565_565_cover_SRC, + scaled_nearest_scanline_565_565_SRC, + uint16_t, uint16_t, COVER) +FAST_NEAREST_MAINLOOP (565_565_none_SRC, + scaled_nearest_scanline_565_565_SRC, + uint16_t, uint16_t, NONE) +FAST_NEAREST_MAINLOOP (565_565_pad_SRC, + scaled_nearest_scanline_565_565_SRC, + uint16_t, uint16_t, PAD) + +static force_inline uint32_t +fetch_nearest (pixman_repeat_t src_repeat, + pixman_format_code_t format, + uint32_t *src, int x, int src_width) +{ + if (repeat (src_repeat, &x, src_width)) + { + if (format == PIXMAN_x8r8g8b8 || format == PIXMAN_x8b8g8r8) + return *(src + x) | 0xff000000; + else + return *(src + x); + } + else + { + return 0; + } +} + +static force_inline void +combine_over (uint32_t s, uint32_t *dst) +{ + if (s) + { + uint8_t ia = 0xff - (s >> 24); + + if (ia) + UN8x4_MUL_UN8_ADD_UN8x4 (*dst, ia, s); + else + *dst = s; + } +} + +static force_inline void +combine_src (uint32_t s, uint32_t *dst) +{ + *dst = s; +} + +static void +fast_composite_scaled_nearest (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *dst_line; + uint32_t *src_line; + int dst_stride, src_stride; + int src_width, src_height; + pixman_repeat_t src_repeat; + pixman_fixed_t unit_x, unit_y; + pixman_format_code_t src_format; + pixman_vector_t v; + pixman_fixed_t vy; + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + /* pass in 0 instead of src_x and src_y because src_x and src_y need to be + * transformed from destination space to source space + */ + PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, uint32_t, src_stride, src_line, 1); + + /* reference point is the center of the pixel */ + v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; + v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; + v.vector[2] = pixman_fixed_1; + + if (!pixman_transform_point_3d (src_image->common.transform, &v)) + return; + + unit_x = src_image->common.transform->matrix[0][0]; + unit_y = src_image->common.transform->matrix[1][1]; + + /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */ + v.vector[0] -= pixman_fixed_e; + v.vector[1] -= pixman_fixed_e; + + src_height = src_image->bits.height; + src_width = src_image->bits.width; + src_repeat = src_image->common.repeat; + src_format = src_image->bits.format; + + vy = v.vector[1]; + while (height--) + { + pixman_fixed_t vx = v.vector[0]; + int y = pixman_fixed_to_int (vy); + uint32_t *dst = dst_line; + + dst_line += dst_stride; + + /* adjust the y location by a unit vector in the y direction + * this is equivalent to transforming y+1 of the destination point to source space */ + vy += unit_y; + + if (!repeat (src_repeat, &y, src_height)) + { + if (op == PIXMAN_OP_SRC) + memset (dst, 0, sizeof (*dst) * width); + } + else + { + int w = width; + + uint32_t *src = src_line + y * src_stride; + + while (w >= 2) + { + uint32_t s1, s2; + int x1, x2; + + x1 = pixman_fixed_to_int (vx); + vx += unit_x; + + x2 = pixman_fixed_to_int (vx); + vx += unit_x; + + w -= 2; + + s1 = fetch_nearest (src_repeat, src_format, src, x1, src_width); + s2 = fetch_nearest (src_repeat, src_format, src, x2, src_width); + + if (op == PIXMAN_OP_OVER) + { + combine_over (s1, dst++); + combine_over (s2, dst++); + } + else + { + combine_src (s1, dst++); + combine_src (s2, dst++); + } + } + + while (w--) + { + uint32_t s; + int x; + + x = pixman_fixed_to_int (vx); + vx += unit_x; + + s = fetch_nearest (src_repeat, src_format, src, x, src_width); + + if (op == PIXMAN_OP_OVER) + combine_over (s, dst++); + else + combine_src (s, dst++); + } + } + } +} + +#define CACHE_LINE_SIZE 64 + +#define FAST_SIMPLE_ROTATE(suffix, pix_type) \ + \ +static void \ +blt_rotated_90_trivial_##suffix (pix_type *dst, \ + int dst_stride, \ + const pix_type *src, \ + int src_stride, \ + int w, \ + int h) \ +{ \ + int x, y; \ + for (y = 0; y < h; y++) \ + { \ + const pix_type *s = src + (h - y - 1); \ + pix_type *d = dst + dst_stride * y; \ + for (x = 0; x < w; x++) \ + { \ + *d++ = *s; \ + s += src_stride; \ + } \ + } \ +} \ + \ +static void \ +blt_rotated_270_trivial_##suffix (pix_type *dst, \ + int dst_stride, \ + const pix_type *src, \ + int src_stride, \ + int w, \ + int h) \ +{ \ + int x, y; \ + for (y = 0; y < h; y++) \ + { \ + const pix_type *s = src + src_stride * (w - 1) + y; \ + pix_type *d = dst + dst_stride * y; \ + for (x = 0; x < w; x++) \ + { \ + *d++ = *s; \ + s -= src_stride; \ + } \ + } \ +} \ + \ +static void \ +blt_rotated_90_##suffix (pix_type *dst, \ + int dst_stride, \ + const pix_type *src, \ + int src_stride, \ + int W, \ + int H) \ +{ \ + int x; \ + int leading_pixels = 0, trailing_pixels = 0; \ + const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type); \ + \ + /* \ + * split processing into handling destination as TILE_SIZExH cache line \ + * aligned vertical stripes (optimistically assuming that destination \ + * stride is a multiple of cache line, if not - it will be just a bit \ + * slower) \ + */ \ + \ + if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1)) \ + { \ + leading_pixels = TILE_SIZE - (((uintptr_t)dst & \ + (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \ + if (leading_pixels > W) \ + leading_pixels = W; \ + \ + /* unaligned leading part NxH (where N < TILE_SIZE) */ \ + blt_rotated_90_trivial_##suffix ( \ + dst, \ + dst_stride, \ + src, \ + src_stride, \ + leading_pixels, \ + H); \ + \ + dst += leading_pixels; \ + src += leading_pixels * src_stride; \ + W -= leading_pixels; \ + } \ + \ + if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1)) \ + { \ + trailing_pixels = (((uintptr_t)(dst + W) & \ + (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \ + if (trailing_pixels > W) \ + trailing_pixels = W; \ + W -= trailing_pixels; \ + } \ + \ + for (x = 0; x < W; x += TILE_SIZE) \ + { \ + /* aligned middle part TILE_SIZExH */ \ + blt_rotated_90_trivial_##suffix ( \ + dst + x, \ + dst_stride, \ + src + src_stride * x, \ + src_stride, \ + TILE_SIZE, \ + H); \ + } \ + \ + if (trailing_pixels) \ + { \ + /* unaligned trailing part NxH (where N < TILE_SIZE) */ \ + blt_rotated_90_trivial_##suffix ( \ + dst + W, \ + dst_stride, \ + src + W * src_stride, \ + src_stride, \ + trailing_pixels, \ + H); \ + } \ +} \ + \ +static void \ +blt_rotated_270_##suffix (pix_type *dst, \ + int dst_stride, \ + const pix_type *src, \ + int src_stride, \ + int W, \ + int H) \ +{ \ + int x; \ + int leading_pixels = 0, trailing_pixels = 0; \ + const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type); \ + \ + /* \ + * split processing into handling destination as TILE_SIZExH cache line \ + * aligned vertical stripes (optimistically assuming that destination \ + * stride is a multiple of cache line, if not - it will be just a bit \ + * slower) \ + */ \ + \ + if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1)) \ + { \ + leading_pixels = TILE_SIZE - (((uintptr_t)dst & \ + (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \ + if (leading_pixels > W) \ + leading_pixels = W; \ + \ + /* unaligned leading part NxH (where N < TILE_SIZE) */ \ + blt_rotated_270_trivial_##suffix ( \ + dst, \ + dst_stride, \ + src + src_stride * (W - leading_pixels), \ + src_stride, \ + leading_pixels, \ + H); \ + \ + dst += leading_pixels; \ + W -= leading_pixels; \ + } \ + \ + if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1)) \ + { \ + trailing_pixels = (((uintptr_t)(dst + W) & \ + (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \ + if (trailing_pixels > W) \ + trailing_pixels = W; \ + W -= trailing_pixels; \ + src += trailing_pixels * src_stride; \ + } \ + \ + for (x = 0; x < W; x += TILE_SIZE) \ + { \ + /* aligned middle part TILE_SIZExH */ \ + blt_rotated_270_trivial_##suffix ( \ + dst + x, \ + dst_stride, \ + src + src_stride * (W - x - TILE_SIZE), \ + src_stride, \ + TILE_SIZE, \ + H); \ + } \ + \ + if (trailing_pixels) \ + { \ + /* unaligned trailing part NxH (where N < TILE_SIZE) */ \ + blt_rotated_270_trivial_##suffix ( \ + dst + W, \ + dst_stride, \ + src - trailing_pixels * src_stride, \ + src_stride, \ + trailing_pixels, \ + H); \ + } \ +} \ + \ +static void \ +fast_composite_rotate_90_##suffix (pixman_implementation_t *imp, \ + pixman_composite_info_t *info) \ +{ \ + PIXMAN_COMPOSITE_ARGS (info); \ + pix_type *dst_line; \ + pix_type *src_line; \ + int dst_stride, src_stride; \ + int src_x_t, src_y_t; \ + \ + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, pix_type, \ + dst_stride, dst_line, 1); \ + src_x_t = -src_y + pixman_fixed_to_int ( \ + src_image->common.transform->matrix[0][2] + \ + pixman_fixed_1 / 2 - pixman_fixed_e) - height;\ + src_y_t = src_x + pixman_fixed_to_int ( \ + src_image->common.transform->matrix[1][2] + \ + pixman_fixed_1 / 2 - pixman_fixed_e); \ + PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type, \ + src_stride, src_line, 1); \ + blt_rotated_90_##suffix (dst_line, dst_stride, src_line, src_stride, \ + width, height); \ +} \ + \ +static void \ +fast_composite_rotate_270_##suffix (pixman_implementation_t *imp, \ + pixman_composite_info_t *info) \ +{ \ + PIXMAN_COMPOSITE_ARGS (info); \ + pix_type *dst_line; \ + pix_type *src_line; \ + int dst_stride, src_stride; \ + int src_x_t, src_y_t; \ + \ + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, pix_type, \ + dst_stride, dst_line, 1); \ + src_x_t = src_y + pixman_fixed_to_int ( \ + src_image->common.transform->matrix[0][2] + \ + pixman_fixed_1 / 2 - pixman_fixed_e); \ + src_y_t = -src_x + pixman_fixed_to_int ( \ + src_image->common.transform->matrix[1][2] + \ + pixman_fixed_1 / 2 - pixman_fixed_e) - width; \ + PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type, \ + src_stride, src_line, 1); \ + blt_rotated_270_##suffix (dst_line, dst_stride, src_line, src_stride, \ + width, height); \ +} + +FAST_SIMPLE_ROTATE (8, uint8_t) +FAST_SIMPLE_ROTATE (565, uint16_t) +FAST_SIMPLE_ROTATE (8888, uint32_t) + +static const pixman_fast_path_t c_fast_paths[] = +{ + PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, fast_composite_over_n_8_0565), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, fast_composite_over_n_8_0565), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, r8g8b8, fast_composite_over_n_8_0888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, b8g8r8, fast_composite_over_n_8_0888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, fast_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, fast_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, fast_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, fast_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8r8g8b8, fast_composite_over_n_1_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8r8g8b8, fast_composite_over_n_1_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8b8g8r8, fast_composite_over_n_1_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8b8g8r8, fast_composite_over_n_1_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a1, r5g6b5, fast_composite_over_n_1_0565), + PIXMAN_STD_FAST_PATH (OVER, solid, a1, b5g6r5, fast_composite_over_n_1_0565), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, fast_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, fast_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, fast_composite_over_n_8888_0565_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, fast_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, fast_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, fast_composite_over_n_8888_0565_ca), + PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, fast_composite_over_x888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, fast_composite_over_x888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, fast_composite_over_x888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, fast_composite_over_x888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, fast_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, fast_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, fast_composite_over_8888_0565), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, fast_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, fast_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, fast_composite_over_8888_0565), + PIXMAN_STD_FAST_PATH (ADD, r5g6b5, null, r5g6b5, fast_composite_add_0565_0565), + PIXMAN_STD_FAST_PATH (ADD, b5g6r5, null, b5g6r5, fast_composite_add_0565_0565), + PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, fast_composite_add_8888_8888), + PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, fast_composite_add_8888_8888), + PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, fast_composite_add_8_8), + PIXMAN_STD_FAST_PATH (ADD, a1, null, a1, fast_composite_add_1_1), + PIXMAN_STD_FAST_PATH_CA (ADD, solid, a8r8g8b8, a8r8g8b8, fast_composite_add_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, fast_composite_add_n_8_8), + PIXMAN_STD_FAST_PATH (SRC, solid, null, a8r8g8b8, fast_composite_solid_fill), + PIXMAN_STD_FAST_PATH (SRC, solid, null, x8r8g8b8, fast_composite_solid_fill), + PIXMAN_STD_FAST_PATH (SRC, solid, null, a8b8g8r8, fast_composite_solid_fill), + PIXMAN_STD_FAST_PATH (SRC, solid, null, x8b8g8r8, fast_composite_solid_fill), + PIXMAN_STD_FAST_PATH (SRC, solid, null, a1, fast_composite_solid_fill), + PIXMAN_STD_FAST_PATH (SRC, solid, null, a8, fast_composite_solid_fill), + PIXMAN_STD_FAST_PATH (SRC, solid, null, r5g6b5, fast_composite_solid_fill), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, fast_composite_src_x888_8888), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, fast_composite_src_x888_8888), + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8x8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8a8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, b8g8r8x8, null, b8g8r8x8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, b8g8r8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, x1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, a8, null, a8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (IN, a8, null, a8, fast_composite_in_8_8), + PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, fast_composite_in_n_8_8), + + SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888_8888), + SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888_8888), + SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, 8888_8888), + SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, 8888_8888), + + SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888_8888), + SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, 8888_8888), + + SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, 8888_565), + SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, 8888_565), + + SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, 565_565), + + SIMPLE_NEAREST_FAST_PATH_COVER (SRC, x8r8g8b8, a8r8g8b8, x888_8888), + SIMPLE_NEAREST_FAST_PATH_COVER (SRC, x8b8g8r8, a8b8g8r8, x888_8888), + SIMPLE_NEAREST_FAST_PATH_PAD (SRC, x8r8g8b8, a8r8g8b8, x888_8888), + SIMPLE_NEAREST_FAST_PATH_PAD (SRC, x8b8g8r8, a8b8g8r8, x888_8888), + SIMPLE_NEAREST_FAST_PATH_NORMAL (SRC, x8r8g8b8, a8r8g8b8, x888_8888), + SIMPLE_NEAREST_FAST_PATH_NORMAL (SRC, x8b8g8r8, a8b8g8r8, x888_8888), + + SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, 8888_8888), + SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, 8888_8888), + SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, 8888_8888), + SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, 8888_8888), + + SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, 8888_565), + +#define NEAREST_FAST_PATH(op,s,d) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, SCALED_NEAREST_FLAGS, \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest, \ + } + + NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8), + NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8), + NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8), + NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8), + + NEAREST_FAST_PATH (SRC, x8r8g8b8, a8r8g8b8), + NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8), + NEAREST_FAST_PATH (SRC, x8b8g8r8, a8b8g8r8), + NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8), + + NEAREST_FAST_PATH (OVER, x8r8g8b8, x8r8g8b8), + NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8), + NEAREST_FAST_PATH (OVER, x8b8g8r8, x8b8g8r8), + NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8), + + NEAREST_FAST_PATH (OVER, x8r8g8b8, a8r8g8b8), + NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8), + NEAREST_FAST_PATH (OVER, x8b8g8r8, a8b8g8r8), + NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8), + +#define SIMPLE_ROTATE_FLAGS(angle) \ + (FAST_PATH_ROTATE_ ## angle ## _TRANSFORM | \ + FAST_PATH_NEAREST_FILTER | \ + FAST_PATH_SAMPLES_COVER_CLIP_NEAREST | \ + FAST_PATH_STANDARD_FLAGS) + +#define SIMPLE_ROTATE_FAST_PATH(op,s,d,suffix) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (90), \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_rotate_90_##suffix, \ + }, \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (270), \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_rotate_270_##suffix, \ + } + + SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888), + SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888), + SIMPLE_ROTATE_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888), + SIMPLE_ROTATE_FAST_PATH (SRC, r5g6b5, r5g6b5, 565), + SIMPLE_ROTATE_FAST_PATH (SRC, a8, a8, 8), + + /* Simple repeat fast path entry. */ + { PIXMAN_OP_any, + PIXMAN_any, + (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | FAST_PATH_BITS_IMAGE | + FAST_PATH_NORMAL_REPEAT), + PIXMAN_any, 0, + PIXMAN_any, FAST_PATH_STD_DEST_FLAGS, + fast_composite_tiled_repeat + }, + + { PIXMAN_OP_NONE }, +}; + +#ifdef WORDS_BIGENDIAN +#define A1_FILL_MASK(n, offs) (((1U << (n)) - 1) << (32 - (offs) - (n))) +#else +#define A1_FILL_MASK(n, offs) (((1U << (n)) - 1) << (offs)) +#endif + +static force_inline void +pixman_fill1_line (uint32_t *dst, int offs, int width, int v) +{ + if (offs) + { + int leading_pixels = 32 - offs; + if (leading_pixels >= width) + { + if (v) + *dst |= A1_FILL_MASK (width, offs); + else + *dst &= ~A1_FILL_MASK (width, offs); + return; + } + else + { + if (v) + *dst++ |= A1_FILL_MASK (leading_pixels, offs); + else + *dst++ &= ~A1_FILL_MASK (leading_pixels, offs); + width -= leading_pixels; + } + } + while (width >= 32) + { + if (v) + *dst++ = 0xFFFFFFFF; + else + *dst++ = 0; + width -= 32; + } + if (width > 0) + { + if (v) + *dst |= A1_FILL_MASK (width, 0); + else + *dst &= ~A1_FILL_MASK (width, 0); + } +} + +static void +pixman_fill1 (uint32_t *bits, + int stride, + int x, + int y, + int width, + int height, + uint32_t filler) +{ + uint32_t *dst = bits + y * stride + (x >> 5); + int offs = x & 31; + + if (filler & 1) + { + while (height--) + { + pixman_fill1_line (dst, offs, width, 1); + dst += stride; + } + } + else + { + while (height--) + { + pixman_fill1_line (dst, offs, width, 0); + dst += stride; + } + } +} + +static void +pixman_fill8 (uint32_t *bits, + int stride, + int x, + int y, + int width, + int height, + uint32_t filler) +{ + int byte_stride = stride * (int) sizeof (uint32_t); + uint8_t *dst = (uint8_t *) bits; + uint8_t v = filler & 0xff; + int i; + + dst = dst + y * byte_stride + x; + + while (height--) + { + for (i = 0; i < width; ++i) + dst[i] = v; + + dst += byte_stride; + } +} + +static void +pixman_fill16 (uint32_t *bits, + int stride, + int x, + int y, + int width, + int height, + uint32_t filler) +{ + int short_stride = + (stride * (int)sizeof (uint32_t)) / (int)sizeof (uint16_t); + uint16_t *dst = (uint16_t *)bits; + uint16_t v = filler & 0xffff; + int i; + + dst = dst + y * short_stride + x; + + while (height--) + { + for (i = 0; i < width; ++i) + dst[i] = v; + + dst += short_stride; + } +} + +static void +pixman_fill32 (uint32_t *bits, + int stride, + int x, + int y, + int width, + int height, + uint32_t filler) +{ + int i; + + bits = bits + y * stride + x; + + while (height--) + { + for (i = 0; i < width; ++i) + bits[i] = filler; + + bits += stride; + } +} + +static pixman_bool_t +fast_path_fill (pixman_implementation_t *imp, + uint32_t * bits, + int stride, + int bpp, + int x, + int y, + int width, + int height, + uint32_t filler) +{ + switch (bpp) + { + case 1: + pixman_fill1 (bits, stride, x, y, width, height, filler); + break; + + case 8: + pixman_fill8 (bits, stride, x, y, width, height, filler); + break; + + case 16: + pixman_fill16 (bits, stride, x, y, width, height, filler); + break; + + case 32: + pixman_fill32 (bits, stride, x, y, width, height, filler); + break; + + default: + return FALSE; + } + + return TRUE; +} + +/*****************************************************************************/ + +static uint32_t * +fast_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask) +{ + int32_t w = iter->width; + uint32_t *dst = iter->buffer; + const uint16_t *src = (const uint16_t *)iter->bits; + + iter->bits += iter->stride; + + /* Align the source buffer at 4 bytes boundary */ + if (w > 0 && ((uintptr_t)src & 3)) + { + *dst++ = convert_0565_to_8888 (*src++); + w--; + } + /* Process two pixels per iteration */ + while ((w -= 2) >= 0) + { + uint32_t sr, sb, sg, t0, t1; + uint32_t s = *(const uint32_t *)src; + src += 2; + sr = (s >> 8) & 0x00F800F8; + sb = (s << 3) & 0x00F800F8; + sg = (s >> 3) & 0x00FC00FC; + sr |= sr >> 5; + sb |= sb >> 5; + sg |= sg >> 6; + t0 = ((sr << 16) & 0x00FF0000) | ((sg << 8) & 0x0000FF00) | + (sb & 0xFF) | 0xFF000000; + t1 = (sr & 0x00FF0000) | ((sg >> 8) & 0x0000FF00) | + (sb >> 16) | 0xFF000000; +#ifdef WORDS_BIGENDIAN + *dst++ = t1; + *dst++ = t0; +#else + *dst++ = t0; + *dst++ = t1; +#endif + } + if (w & 1) + { + *dst = convert_0565_to_8888 (*src); + } + + return iter->buffer; +} + +static uint32_t * +fast_dest_fetch_noop (pixman_iter_t *iter, const uint32_t *mask) +{ + iter->bits += iter->stride; + return iter->buffer; +} + +/* Helper function for a workaround, which tries to ensure that 0x1F001F + * constant is always allocated in a register on RISC architectures. + */ +static force_inline uint32_t +convert_8888_to_0565_workaround (uint32_t s, uint32_t x1F001F) +{ + uint32_t a, b; + a = (s >> 3) & x1F001F; + b = s & 0xFC00; + a |= a >> 5; + a |= b >> 5; + return a; +} + +static void +fast_write_back_r5g6b5 (pixman_iter_t *iter) +{ + int32_t w = iter->width; + uint16_t *dst = (uint16_t *)(iter->bits - iter->stride); + const uint32_t *src = iter->buffer; + /* Workaround to ensure that x1F001F variable is allocated in a register */ + static volatile uint32_t volatile_x1F001F = 0x1F001F; + uint32_t x1F001F = volatile_x1F001F; + + while ((w -= 4) >= 0) + { + uint32_t s1 = *src++; + uint32_t s2 = *src++; + uint32_t s3 = *src++; + uint32_t s4 = *src++; + *dst++ = convert_8888_to_0565_workaround (s1, x1F001F); + *dst++ = convert_8888_to_0565_workaround (s2, x1F001F); + *dst++ = convert_8888_to_0565_workaround (s3, x1F001F); + *dst++ = convert_8888_to_0565_workaround (s4, x1F001F); + } + if (w & 2) + { + *dst++ = convert_8888_to_0565_workaround (*src++, x1F001F); + *dst++ = convert_8888_to_0565_workaround (*src++, x1F001F); + } + if (w & 1) + { + *dst = convert_8888_to_0565_workaround (*src, x1F001F); + } +} + +typedef struct +{ + int y; + uint64_t * buffer; +} line_t; + +typedef struct +{ + line_t lines[2]; + pixman_fixed_t y; + pixman_fixed_t x; + uint64_t data[1]; +} bilinear_info_t; + +static void +fetch_horizontal (bits_image_t *image, line_t *line, + int y, pixman_fixed_t x, pixman_fixed_t ux, int n) +{ + uint32_t *bits = image->bits + y * image->rowstride; + int i; + + for (i = 0; i < n; ++i) + { + int x0 = pixman_fixed_to_int (x); + int x1 = x0 + 1; + int32_t dist_x; + + uint32_t left = *(bits + x0); + uint32_t right = *(bits + x1); + + dist_x = pixman_fixed_to_bilinear_weight (x); + dist_x <<= (8 - BILINEAR_INTERPOLATION_BITS); + +#if SIZEOF_LONG <= 4 + { + uint32_t lag, rag, ag; + uint32_t lrb, rrb, rb; + + lag = (left & 0xff00ff00) >> 8; + rag = (right & 0xff00ff00) >> 8; + ag = (lag << 8) + dist_x * (rag - lag); + + lrb = (left & 0x00ff00ff); + rrb = (right & 0x00ff00ff); + rb = (lrb << 8) + dist_x * (rrb - lrb); + + *((uint32_t *)(line->buffer + i)) = ag; + *((uint32_t *)(line->buffer + i) + 1) = rb; + } +#else + { + uint64_t lagrb, ragrb; + uint32_t lag, rag; + uint32_t lrb, rrb; + + lag = (left & 0xff00ff00); + lrb = (left & 0x00ff00ff); + rag = (right & 0xff00ff00); + rrb = (right & 0x00ff00ff); + lagrb = (((uint64_t)lag) << 24) | lrb; + ragrb = (((uint64_t)rag) << 24) | rrb; + + line->buffer[i] = (lagrb << 8) + dist_x * (ragrb - lagrb); + } +#endif + + x += ux; + } + + line->y = y; +} + +static uint32_t * +fast_fetch_bilinear_cover (pixman_iter_t *iter, const uint32_t *mask) +{ + pixman_fixed_t fx, ux; + bilinear_info_t *info = iter->data; + line_t *line0, *line1; + int y0, y1; + int32_t dist_y; + int i; + + fx = info->x; + ux = iter->image->common.transform->matrix[0][0]; + + y0 = pixman_fixed_to_int (info->y); + y1 = y0 + 1; + dist_y = pixman_fixed_to_bilinear_weight (info->y); + dist_y <<= (8 - BILINEAR_INTERPOLATION_BITS); + + line0 = &info->lines[y0 & 0x01]; + line1 = &info->lines[y1 & 0x01]; + + if (line0->y != y0) + { + fetch_horizontal ( + &iter->image->bits, line0, y0, fx, ux, iter->width); + } + + if (line1->y != y1) + { + fetch_horizontal ( + &iter->image->bits, line1, y1, fx, ux, iter->width); + } + + for (i = 0; i < iter->width; ++i) + { +#if SIZEOF_LONG <= 4 + uint32_t ta, tr, tg, tb; + uint32_t ba, br, bg, bb; + uint32_t tag, trb; + uint32_t bag, brb; + uint32_t a, r, g, b; + + tag = *((uint32_t *)(line0->buffer + i)); + trb = *((uint32_t *)(line0->buffer + i) + 1); + bag = *((uint32_t *)(line1->buffer + i)); + brb = *((uint32_t *)(line1->buffer + i) + 1); + + ta = tag >> 16; + ba = bag >> 16; + a = (ta << 8) + dist_y * (ba - ta); + + tr = trb >> 16; + br = brb >> 16; + r = (tr << 8) + dist_y * (br - tr); + + tg = tag & 0xffff; + bg = bag & 0xffff; + g = (tg << 8) + dist_y * (bg - tg); + + tb = trb & 0xffff; + bb = brb & 0xffff; + b = (tb << 8) + dist_y * (bb - tb); + + a = (a << 8) & 0xff000000; + r = (r << 0) & 0x00ff0000; + g = (g >> 8) & 0x0000ff00; + b = (b >> 16) & 0x000000ff; +#else + uint64_t top = line0->buffer[i]; + uint64_t bot = line1->buffer[i]; + uint64_t tar = (top & 0xffff0000ffff0000ULL) >> 16; + uint64_t bar = (bot & 0xffff0000ffff0000ULL) >> 16; + uint64_t tgb = (top & 0x0000ffff0000ffffULL); + uint64_t bgb = (bot & 0x0000ffff0000ffffULL); + uint64_t ar, gb; + uint32_t a, r, g, b; + + ar = (tar << 8) + dist_y * (bar - tar); + gb = (tgb << 8) + dist_y * (bgb - tgb); + + a = ((ar >> 24) & 0xff000000); + r = ((ar >> 0) & 0x00ff0000); + g = ((gb >> 40) & 0x0000ff00); + b = ((gb >> 16) & 0x000000ff); +#endif + + iter->buffer[i] = a | r | g | b; + } + + info->y += iter->image->common.transform->matrix[1][1]; + + return iter->buffer; +} + +static void +bilinear_cover_iter_fini (pixman_iter_t *iter) +{ + free (iter->data); +} + +static void +fast_bilinear_cover_iter_init (pixman_iter_t *iter, const pixman_iter_info_t *iter_info) +{ + int width = iter->width; + bilinear_info_t *info; + pixman_vector_t v; + + /* Reference point is the center of the pixel */ + v.vector[0] = pixman_int_to_fixed (iter->x) + pixman_fixed_1 / 2; + v.vector[1] = pixman_int_to_fixed (iter->y) + pixman_fixed_1 / 2; + v.vector[2] = pixman_fixed_1; + + if (!pixman_transform_point_3d (iter->image->common.transform, &v)) + goto fail; + + info = malloc (sizeof (*info) + (2 * width - 1) * sizeof (uint64_t)); + if (!info) + goto fail; + + info->x = v.vector[0] - pixman_fixed_1 / 2; + info->y = v.vector[1] - pixman_fixed_1 / 2; + + /* It is safe to set the y coordinates to -1 initially + * because COVER_CLIP_BILINEAR ensures that we will only + * be asked to fetch lines in the [0, height) interval + */ + info->lines[0].y = -1; + info->lines[0].buffer = &(info->data[0]); + info->lines[1].y = -1; + info->lines[1].buffer = &(info->data[width]); + + iter->get_scanline = fast_fetch_bilinear_cover; + iter->fini = bilinear_cover_iter_fini; + + iter->data = info; + return; + +fail: + /* Something went wrong, either a bad matrix or OOM; in such cases, + * we don't guarantee any particular rendering. + */ + _pixman_log_error ( + FUNC, "Allocation failure or bad matrix, skipping rendering\n"); + + iter->get_scanline = _pixman_iter_get_scanline_noop; + iter->fini = NULL; +} + +static uint32_t * +bits_image_fetch_bilinear_no_repeat_8888 (pixman_iter_t *iter, + const uint32_t *mask) +{ + + pixman_image_t * ima = iter->image; + int offset = iter->x; + int line = iter->y++; + int width = iter->width; + uint32_t * buffer = iter->buffer; + + bits_image_t *bits = &ima->bits; + pixman_fixed_t x_top, x_bottom, x; + pixman_fixed_t ux_top, ux_bottom, ux; + pixman_vector_t v; + uint32_t top_mask, bottom_mask; + uint32_t *top_row; + uint32_t *bottom_row; + uint32_t *end; + uint32_t zero[2] = { 0, 0 }; + uint32_t one = 1; + int y, y1, y2; + int disty; + int mask_inc; + int w; + + /* reference point is the center of the pixel */ + v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2; + v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2; + v.vector[2] = pixman_fixed_1; + + if (!pixman_transform_point_3d (bits->common.transform, &v)) + return iter->buffer; + + ux = ux_top = ux_bottom = bits->common.transform->matrix[0][0]; + x = x_top = x_bottom = v.vector[0] - pixman_fixed_1/2; + + y = v.vector[1] - pixman_fixed_1/2; + disty = pixman_fixed_to_bilinear_weight (y); + + /* Load the pointers to the first and second lines from the source + * image that bilinear code must read. + * + * The main trick in this code is about the check if any line are + * outside of the image; + * + * When I realize that a line (any one) is outside, I change + * the pointer to a dummy area with zeros. Once I change this, I + * must be sure the pointer will not change, so I set the + * variables to each pointer increments inside the loop. + */ + y1 = pixman_fixed_to_int (y); + y2 = y1 + 1; + + if (y1 < 0 || y1 >= bits->height) + { + top_row = zero; + x_top = 0; + ux_top = 0; + } + else + { + top_row = bits->bits + y1 * bits->rowstride; + x_top = x; + ux_top = ux; + } + + if (y2 < 0 || y2 >= bits->height) + { + bottom_row = zero; + x_bottom = 0; + ux_bottom = 0; + } + else + { + bottom_row = bits->bits + y2 * bits->rowstride; + x_bottom = x; + ux_bottom = ux; + } + + /* Instead of checking whether the operation uses the mast in + * each loop iteration, verify this only once and prepare the + * variables to make the code smaller inside the loop. + */ + if (!mask) + { + mask_inc = 0; + mask = &one; + } + else + { + /* If have a mask, prepare the variables to check it */ + mask_inc = 1; + } + + /* If both are zero, then the whole thing is zero */ + if (top_row == zero && bottom_row == zero) + { + memset (buffer, 0, width * sizeof (uint32_t)); + return iter->buffer; + } + else if (bits->format == PIXMAN_x8r8g8b8) + { + if (top_row == zero) + { + top_mask = 0; + bottom_mask = 0xff000000; + } + else if (bottom_row == zero) + { + top_mask = 0xff000000; + bottom_mask = 0; + } + else + { + top_mask = 0xff000000; + bottom_mask = 0xff000000; + } + } + else + { + top_mask = 0; + bottom_mask = 0; + } + + end = buffer + width; + + /* Zero fill to the left of the image */ + while (buffer < end && x < pixman_fixed_minus_1) + { + *buffer++ = 0; + x += ux; + x_top += ux_top; + x_bottom += ux_bottom; + mask += mask_inc; + } + + /* Left edge + */ + while (buffer < end && x < 0) + { + uint32_t tr, br; + int32_t distx; + + tr = top_row[pixman_fixed_to_int (x_top) + 1] | top_mask; + br = bottom_row[pixman_fixed_to_int (x_bottom) + 1] | bottom_mask; + + distx = pixman_fixed_to_bilinear_weight (x); + + *buffer++ = bilinear_interpolation (0, tr, 0, br, distx, disty); + + x += ux; + x_top += ux_top; + x_bottom += ux_bottom; + mask += mask_inc; + } + + /* Main part */ + w = pixman_int_to_fixed (bits->width - 1); + + while (buffer < end && x < w) + { + if (*mask) + { + uint32_t tl, tr, bl, br; + int32_t distx; + + tl = top_row [pixman_fixed_to_int (x_top)] | top_mask; + tr = top_row [pixman_fixed_to_int (x_top) + 1] | top_mask; + bl = bottom_row [pixman_fixed_to_int (x_bottom)] | bottom_mask; + br = bottom_row [pixman_fixed_to_int (x_bottom) + 1] | bottom_mask; + + distx = pixman_fixed_to_bilinear_weight (x); + + *buffer = bilinear_interpolation (tl, tr, bl, br, distx, disty); + } + + buffer++; + x += ux; + x_top += ux_top; + x_bottom += ux_bottom; + mask += mask_inc; + } + + /* Right Edge */ + w = pixman_int_to_fixed (bits->width); + while (buffer < end && x < w) + { + if (*mask) + { + uint32_t tl, bl; + int32_t distx; + + tl = top_row [pixman_fixed_to_int (x_top)] | top_mask; + bl = bottom_row [pixman_fixed_to_int (x_bottom)] | bottom_mask; + + distx = pixman_fixed_to_bilinear_weight (x); + + *buffer = bilinear_interpolation (tl, 0, bl, 0, distx, disty); + } + + buffer++; + x += ux; + x_top += ux_top; + x_bottom += ux_bottom; + mask += mask_inc; + } + + /* Zero fill to the left of the image */ + while (buffer < end) + *buffer++ = 0; + + return iter->buffer; +} + +typedef uint32_t (* convert_pixel_t) (const uint8_t *row, int x); + +static force_inline void +bits_image_fetch_separable_convolution_affine (pixman_image_t * image, + int offset, + int line, + int width, + uint32_t * buffer, + const uint32_t * mask, + + convert_pixel_t convert_pixel, + pixman_format_code_t format, + pixman_repeat_t repeat_mode) +{ + bits_image_t *bits = &image->bits; + pixman_fixed_t *params = image->common.filter_params; + int cwidth = pixman_fixed_to_int (params[0]); + int cheight = pixman_fixed_to_int (params[1]); + int x_off = ((cwidth << 16) - pixman_fixed_1) >> 1; + int y_off = ((cheight << 16) - pixman_fixed_1) >> 1; + int x_phase_bits = pixman_fixed_to_int (params[2]); + int y_phase_bits = pixman_fixed_to_int (params[3]); + int x_phase_shift = 16 - x_phase_bits; + int y_phase_shift = 16 - y_phase_bits; + pixman_fixed_t vx, vy; + pixman_fixed_t ux, uy; + pixman_vector_t v; + int k; + + /* reference point is the center of the pixel */ + v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2; + v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2; + v.vector[2] = pixman_fixed_1; + + if (!pixman_transform_point_3d (image->common.transform, &v)) + return; + + ux = image->common.transform->matrix[0][0]; + uy = image->common.transform->matrix[1][0]; + + vx = v.vector[0]; + vy = v.vector[1]; + + for (k = 0; k < width; ++k) + { + pixman_fixed_t *y_params; + int satot, srtot, sgtot, sbtot; + pixman_fixed_t x, y; + int32_t x1, x2, y1, y2; + int32_t px, py; + int i, j; + + if (mask && !mask[k]) + goto next; + + /* Round x and y to the middle of the closest phase before continuing. This + * ensures that the convolution matrix is aligned right, since it was + * positioned relative to a particular phase (and not relative to whatever + * exact fraction we happen to get here). + */ + x = ((vx >> x_phase_shift) << x_phase_shift) + ((1 << x_phase_shift) >> 1); + y = ((vy >> y_phase_shift) << y_phase_shift) + ((1 << y_phase_shift) >> 1); + + px = (x & 0xffff) >> x_phase_shift; + py = (y & 0xffff) >> y_phase_shift; + + x1 = pixman_fixed_to_int (x - pixman_fixed_e - x_off); + y1 = pixman_fixed_to_int (y - pixman_fixed_e - y_off); + x2 = x1 + cwidth; + y2 = y1 + cheight; + + satot = srtot = sgtot = sbtot = 0; + + y_params = params + 4 + (1 << x_phase_bits) * cwidth + py * cheight; + + for (i = y1; i < y2; ++i) + { + pixman_fixed_t fy = *y_params++; + + if (fy) + { + pixman_fixed_t *x_params = params + 4 + px * cwidth; + + for (j = x1; j < x2; ++j) + { + pixman_fixed_t fx = *x_params++; + int rx = j; + int ry = i; + + if (fx) + { + pixman_fixed_t f; + uint32_t pixel, mask; + uint8_t *row; + + mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000; + + if (repeat_mode != PIXMAN_REPEAT_NONE) + { + repeat (repeat_mode, &rx, bits->width); + repeat (repeat_mode, &ry, bits->height); + + row = (uint8_t *)bits->bits + bits->rowstride * 4 * ry; + pixel = convert_pixel (row, rx) | mask; + } + else + { + if (rx < 0 || ry < 0 || rx >= bits->width || ry >= bits->height) + { + pixel = 0; + } + else + { + row = (uint8_t *)bits->bits + bits->rowstride * 4 * ry; + pixel = convert_pixel (row, rx) | mask; + } + } + + f = ((pixman_fixed_32_32_t)fx * fy + 0x8000) >> 16; + srtot += (int)RED_8 (pixel) * f; + sgtot += (int)GREEN_8 (pixel) * f; + sbtot += (int)BLUE_8 (pixel) * f; + satot += (int)ALPHA_8 (pixel) * f; + } + } + } + } + + satot = (satot + 0x8000) >> 16; + srtot = (srtot + 0x8000) >> 16; + sgtot = (sgtot + 0x8000) >> 16; + sbtot = (sbtot + 0x8000) >> 16; + + satot = CLIP (satot, 0, 0xff); + srtot = CLIP (srtot, 0, 0xff); + sgtot = CLIP (sgtot, 0, 0xff); + sbtot = CLIP (sbtot, 0, 0xff); + + buffer[k] = (satot << 24) | (srtot << 16) | (sgtot << 8) | (sbtot << 0); + + next: + vx += ux; + vy += uy; + } +} + +static const uint8_t zero[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; + +static force_inline void +bits_image_fetch_bilinear_affine (pixman_image_t * image, + int offset, + int line, + int width, + uint32_t * buffer, + const uint32_t * mask, + + convert_pixel_t convert_pixel, + pixman_format_code_t format, + pixman_repeat_t repeat_mode) +{ + pixman_fixed_t x, y; + pixman_fixed_t ux, uy; + pixman_vector_t v; + bits_image_t *bits = &image->bits; + int i; + + /* reference point is the center of the pixel */ + v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2; + v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2; + v.vector[2] = pixman_fixed_1; + + if (!pixman_transform_point_3d (image->common.transform, &v)) + return; + + ux = image->common.transform->matrix[0][0]; + uy = image->common.transform->matrix[1][0]; + + x = v.vector[0]; + y = v.vector[1]; + + for (i = 0; i < width; ++i) + { + int x1, y1, x2, y2; + uint32_t tl, tr, bl, br; + int32_t distx, disty; + int width = image->bits.width; + int height = image->bits.height; + const uint8_t *row1; + const uint8_t *row2; + + if (mask && !mask[i]) + goto next; + + x1 = x - pixman_fixed_1 / 2; + y1 = y - pixman_fixed_1 / 2; + + distx = pixman_fixed_to_bilinear_weight (x1); + disty = pixman_fixed_to_bilinear_weight (y1); + + y1 = pixman_fixed_to_int (y1); + y2 = y1 + 1; + x1 = pixman_fixed_to_int (x1); + x2 = x1 + 1; + + if (repeat_mode != PIXMAN_REPEAT_NONE) + { + uint32_t mask; + + mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000; + + repeat (repeat_mode, &x1, width); + repeat (repeat_mode, &y1, height); + repeat (repeat_mode, &x2, width); + repeat (repeat_mode, &y2, height); + + row1 = (uint8_t *)bits->bits + bits->rowstride * 4 * y1; + row2 = (uint8_t *)bits->bits + bits->rowstride * 4 * y2; + + tl = convert_pixel (row1, x1) | mask; + tr = convert_pixel (row1, x2) | mask; + bl = convert_pixel (row2, x1) | mask; + br = convert_pixel (row2, x2) | mask; + } + else + { + uint32_t mask1, mask2; + int bpp; + + /* Note: PIXMAN_FORMAT_BPP() returns an unsigned value, + * which means if you use it in expressions, those + * expressions become unsigned themselves. Since + * the variables below can be negative in some cases, + * that will lead to crashes on 64 bit architectures. + * + * So this line makes sure bpp is signed + */ + bpp = PIXMAN_FORMAT_BPP (format); + + if (x1 >= width || x2 < 0 || y1 >= height || y2 < 0) + { + buffer[i] = 0; + goto next; + } + + if (y2 == 0) + { + row1 = zero; + mask1 = 0; + } + else + { + row1 = (uint8_t *)bits->bits + bits->rowstride * 4 * y1; + row1 += bpp / 8 * x1; + + mask1 = PIXMAN_FORMAT_A (format)? 0 : 0xff000000; + } + + if (y1 == height - 1) + { + row2 = zero; + mask2 = 0; + } + else + { + row2 = (uint8_t *)bits->bits + bits->rowstride * 4 * y2; + row2 += bpp / 8 * x1; + + mask2 = PIXMAN_FORMAT_A (format)? 0 : 0xff000000; + } + + if (x2 == 0) + { + tl = 0; + bl = 0; + } + else + { + tl = convert_pixel (row1, 0) | mask1; + bl = convert_pixel (row2, 0) | mask2; + } + + if (x1 == width - 1) + { + tr = 0; + br = 0; + } + else + { + tr = convert_pixel (row1, 1) | mask1; + br = convert_pixel (row2, 1) | mask2; + } + } + + buffer[i] = bilinear_interpolation ( + tl, tr, bl, br, distx, disty); + + next: + x += ux; + y += uy; + } +} + +static force_inline void +bits_image_fetch_nearest_affine (pixman_image_t * image, + int offset, + int line, + int width, + uint32_t * buffer, + const uint32_t * mask, + + convert_pixel_t convert_pixel, + pixman_format_code_t format, + pixman_repeat_t repeat_mode) +{ + pixman_fixed_t x, y; + pixman_fixed_t ux, uy; + pixman_vector_t v; + bits_image_t *bits = &image->bits; + int i; + + /* reference point is the center of the pixel */ + v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2; + v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2; + v.vector[2] = pixman_fixed_1; + + if (!pixman_transform_point_3d (image->common.transform, &v)) + return; + + ux = image->common.transform->matrix[0][0]; + uy = image->common.transform->matrix[1][0]; + + x = v.vector[0]; + y = v.vector[1]; + + for (i = 0; i < width; ++i) + { + int width, height, x0, y0; + const uint8_t *row; + + if (mask && !mask[i]) + goto next; + + width = image->bits.width; + height = image->bits.height; + x0 = pixman_fixed_to_int (x - pixman_fixed_e); + y0 = pixman_fixed_to_int (y - pixman_fixed_e); + + if (repeat_mode == PIXMAN_REPEAT_NONE && + (y0 < 0 || y0 >= height || x0 < 0 || x0 >= width)) + { + buffer[i] = 0; + } + else + { + uint32_t mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000; + + if (repeat_mode != PIXMAN_REPEAT_NONE) + { + repeat (repeat_mode, &x0, width); + repeat (repeat_mode, &y0, height); + } + + row = (uint8_t *)bits->bits + bits->rowstride * 4 * y0; + + buffer[i] = convert_pixel (row, x0) | mask; + } + + next: + x += ux; + y += uy; + } +} + +static force_inline uint32_t +convert_a8r8g8b8 (const uint8_t *row, int x) +{ + return *(((uint32_t *)row) + x); +} + +static force_inline uint32_t +convert_x8r8g8b8 (const uint8_t *row, int x) +{ + return *(((uint32_t *)row) + x); +} + +static force_inline uint32_t +convert_a8 (const uint8_t *row, int x) +{ + return *(row + x) << 24; +} + +static force_inline uint32_t +convert_r5g6b5 (const uint8_t *row, int x) +{ + return convert_0565_to_0888 (*((uint16_t *)row + x)); +} + +#define MAKE_SEPARABLE_CONVOLUTION_FETCHER(name, format, repeat_mode) \ + static uint32_t * \ + bits_image_fetch_separable_convolution_affine_ ## name (pixman_iter_t *iter, \ + const uint32_t * mask) \ + { \ + bits_image_fetch_separable_convolution_affine ( \ + iter->image, \ + iter->x, iter->y++, \ + iter->width, \ + iter->buffer, mask, \ + convert_ ## format, \ + PIXMAN_ ## format, \ + repeat_mode); \ + \ + return iter->buffer; \ + } + +#define MAKE_BILINEAR_FETCHER(name, format, repeat_mode) \ + static uint32_t * \ + bits_image_fetch_bilinear_affine_ ## name (pixman_iter_t *iter, \ + const uint32_t * mask) \ + { \ + bits_image_fetch_bilinear_affine (iter->image, \ + iter->x, iter->y++, \ + iter->width, \ + iter->buffer, mask, \ + convert_ ## format, \ + PIXMAN_ ## format, \ + repeat_mode); \ + return iter->buffer; \ + } + +#define MAKE_NEAREST_FETCHER(name, format, repeat_mode) \ + static uint32_t * \ + bits_image_fetch_nearest_affine_ ## name (pixman_iter_t *iter, \ + const uint32_t * mask) \ + { \ + bits_image_fetch_nearest_affine (iter->image, \ + iter->x, iter->y++, \ + iter->width, \ + iter->buffer, mask, \ + convert_ ## format, \ + PIXMAN_ ## format, \ + repeat_mode); \ + return iter->buffer; \ + } + +#define MAKE_FETCHERS(name, format, repeat_mode) \ + MAKE_NEAREST_FETCHER (name, format, repeat_mode) \ + MAKE_BILINEAR_FETCHER (name, format, repeat_mode) \ + MAKE_SEPARABLE_CONVOLUTION_FETCHER (name, format, repeat_mode) + +MAKE_FETCHERS (pad_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_PAD) +MAKE_FETCHERS (none_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_NONE) +MAKE_FETCHERS (reflect_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_REFLECT) +MAKE_FETCHERS (normal_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_NORMAL) +MAKE_FETCHERS (pad_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_PAD) +MAKE_FETCHERS (none_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_NONE) +MAKE_FETCHERS (reflect_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_REFLECT) +MAKE_FETCHERS (normal_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_NORMAL) +MAKE_FETCHERS (pad_a8, a8, PIXMAN_REPEAT_PAD) +MAKE_FETCHERS (none_a8, a8, PIXMAN_REPEAT_NONE) +MAKE_FETCHERS (reflect_a8, a8, PIXMAN_REPEAT_REFLECT) +MAKE_FETCHERS (normal_a8, a8, PIXMAN_REPEAT_NORMAL) +MAKE_FETCHERS (pad_r5g6b5, r5g6b5, PIXMAN_REPEAT_PAD) +MAKE_FETCHERS (none_r5g6b5, r5g6b5, PIXMAN_REPEAT_NONE) +MAKE_FETCHERS (reflect_r5g6b5, r5g6b5, PIXMAN_REPEAT_REFLECT) +MAKE_FETCHERS (normal_r5g6b5, r5g6b5, PIXMAN_REPEAT_NORMAL) + +#define IMAGE_FLAGS \ + (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \ + FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST) + +static const pixman_iter_info_t fast_iters[] = +{ + { PIXMAN_r5g6b5, IMAGE_FLAGS, ITER_NARROW | ITER_SRC, + _pixman_iter_init_bits_stride, fast_fetch_r5g6b5, NULL }, + + { PIXMAN_r5g6b5, FAST_PATH_STD_DEST_FLAGS, + ITER_NARROW | ITER_DEST, + _pixman_iter_init_bits_stride, + fast_fetch_r5g6b5, fast_write_back_r5g6b5 }, + + { PIXMAN_r5g6b5, FAST_PATH_STD_DEST_FLAGS, + ITER_NARROW | ITER_DEST | ITER_IGNORE_RGB | ITER_IGNORE_ALPHA, + _pixman_iter_init_bits_stride, + fast_dest_fetch_noop, fast_write_back_r5g6b5 }, + + { PIXMAN_a8r8g8b8, + (FAST_PATH_STANDARD_FLAGS | + FAST_PATH_SCALE_TRANSFORM | + FAST_PATH_BILINEAR_FILTER | + FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR), + ITER_NARROW | ITER_SRC, + fast_bilinear_cover_iter_init, + NULL, NULL + }, + +#define FAST_BILINEAR_FLAGS \ + (FAST_PATH_NO_ALPHA_MAP | \ + FAST_PATH_NO_ACCESSORS | \ + FAST_PATH_HAS_TRANSFORM | \ + FAST_PATH_AFFINE_TRANSFORM | \ + FAST_PATH_X_UNIT_POSITIVE | \ + FAST_PATH_Y_UNIT_ZERO | \ + FAST_PATH_NONE_REPEAT | \ + FAST_PATH_BILINEAR_FILTER) + + { PIXMAN_a8r8g8b8, + FAST_BILINEAR_FLAGS, + ITER_NARROW | ITER_SRC, + NULL, bits_image_fetch_bilinear_no_repeat_8888, NULL + }, + + { PIXMAN_x8r8g8b8, + FAST_BILINEAR_FLAGS, + ITER_NARROW | ITER_SRC, + NULL, bits_image_fetch_bilinear_no_repeat_8888, NULL + }, + +#define GENERAL_BILINEAR_FLAGS \ + (FAST_PATH_NO_ALPHA_MAP | \ + FAST_PATH_NO_ACCESSORS | \ + FAST_PATH_HAS_TRANSFORM | \ + FAST_PATH_AFFINE_TRANSFORM | \ + FAST_PATH_BILINEAR_FILTER) + +#define GENERAL_NEAREST_FLAGS \ + (FAST_PATH_NO_ALPHA_MAP | \ + FAST_PATH_NO_ACCESSORS | \ + FAST_PATH_HAS_TRANSFORM | \ + FAST_PATH_AFFINE_TRANSFORM | \ + FAST_PATH_NEAREST_FILTER) + +#define GENERAL_SEPARABLE_CONVOLUTION_FLAGS \ + (FAST_PATH_NO_ALPHA_MAP | \ + FAST_PATH_NO_ACCESSORS | \ + FAST_PATH_HAS_TRANSFORM | \ + FAST_PATH_AFFINE_TRANSFORM | \ + FAST_PATH_SEPARABLE_CONVOLUTION_FILTER) + +#define SEPARABLE_CONVOLUTION_AFFINE_FAST_PATH(name, format, repeat) \ + { PIXMAN_ ## format, \ + GENERAL_SEPARABLE_CONVOLUTION_FLAGS | FAST_PATH_ ## repeat ## _REPEAT, \ + ITER_NARROW | ITER_SRC, \ + NULL, bits_image_fetch_separable_convolution_affine_ ## name, NULL \ + }, + +#define BILINEAR_AFFINE_FAST_PATH(name, format, repeat) \ + { PIXMAN_ ## format, \ + GENERAL_BILINEAR_FLAGS | FAST_PATH_ ## repeat ## _REPEAT, \ + ITER_NARROW | ITER_SRC, \ + NULL, bits_image_fetch_bilinear_affine_ ## name, NULL, \ + }, + +#define NEAREST_AFFINE_FAST_PATH(name, format, repeat) \ + { PIXMAN_ ## format, \ + GENERAL_NEAREST_FLAGS | FAST_PATH_ ## repeat ## _REPEAT, \ + ITER_NARROW | ITER_SRC, \ + NULL, bits_image_fetch_nearest_affine_ ## name, NULL \ + }, + +#define AFFINE_FAST_PATHS(name, format, repeat) \ + SEPARABLE_CONVOLUTION_AFFINE_FAST_PATH(name, format, repeat) \ + BILINEAR_AFFINE_FAST_PATH(name, format, repeat) \ + NEAREST_AFFINE_FAST_PATH(name, format, repeat) + + AFFINE_FAST_PATHS (pad_a8r8g8b8, a8r8g8b8, PAD) + AFFINE_FAST_PATHS (none_a8r8g8b8, a8r8g8b8, NONE) + AFFINE_FAST_PATHS (reflect_a8r8g8b8, a8r8g8b8, REFLECT) + AFFINE_FAST_PATHS (normal_a8r8g8b8, a8r8g8b8, NORMAL) + AFFINE_FAST_PATHS (pad_x8r8g8b8, x8r8g8b8, PAD) + AFFINE_FAST_PATHS (none_x8r8g8b8, x8r8g8b8, NONE) + AFFINE_FAST_PATHS (reflect_x8r8g8b8, x8r8g8b8, REFLECT) + AFFINE_FAST_PATHS (normal_x8r8g8b8, x8r8g8b8, NORMAL) + AFFINE_FAST_PATHS (pad_a8, a8, PAD) + AFFINE_FAST_PATHS (none_a8, a8, NONE) + AFFINE_FAST_PATHS (reflect_a8, a8, REFLECT) + AFFINE_FAST_PATHS (normal_a8, a8, NORMAL) + AFFINE_FAST_PATHS (pad_r5g6b5, r5g6b5, PAD) + AFFINE_FAST_PATHS (none_r5g6b5, r5g6b5, NONE) + AFFINE_FAST_PATHS (reflect_r5g6b5, r5g6b5, REFLECT) + AFFINE_FAST_PATHS (normal_r5g6b5, r5g6b5, NORMAL) + + { PIXMAN_null }, +}; + +pixman_implementation_t * +_pixman_implementation_create_fast_path (pixman_implementation_t *fallback) +{ + pixman_implementation_t *imp = _pixman_implementation_create (fallback, c_fast_paths); + + imp->fill = fast_path_fill; + imp->iter_info = fast_iters; + + return imp; +} diff --git a/src/pixman/pixman/pixman-filter.c b/src/pixman/pixman/pixman-filter.c new file mode 100644 index 0000000..b2bf53f --- /dev/null +++ b/src/pixman/pixman/pixman-filter.c @@ -0,0 +1,350 @@ +/* + * Copyright 2012, Red Hat, Inc. + * Copyright 2012, Soren Sandmann + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Author: Soren Sandmann <soren.sandmann@gmail.com> + */ +#include <string.h> +#include <stdlib.h> +#include <stdio.h> +#include <math.h> +#include <assert.h> +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif +#include "pixman-private.h" + +typedef double (* kernel_func_t) (double x); + +typedef struct +{ + pixman_kernel_t kernel; + kernel_func_t func; + double width; +} filter_info_t; + +static double +impulse_kernel (double x) +{ + return (x == 0.0)? 1.0 : 0.0; +} + +static double +box_kernel (double x) +{ + return 1; +} + +static double +linear_kernel (double x) +{ + return 1 - fabs (x); +} + +static double +gaussian_kernel (double x) +{ +#define SQRT2 (1.4142135623730950488016887242096980785696718753769480) +#define SIGMA (SQRT2 / 2.0) + + return exp (- x * x / (2 * SIGMA * SIGMA)) / (SIGMA * sqrt (2.0 * M_PI)); +} + +static double +sinc (double x) +{ + if (x == 0.0) + return 1.0; + else + return sin (M_PI * x) / (M_PI * x); +} + +static double +lanczos (double x, int n) +{ + return sinc (x) * sinc (x * (1.0 / n)); +} + +static double +lanczos2_kernel (double x) +{ + return lanczos (x, 2); +} + +static double +lanczos3_kernel (double x) +{ + return lanczos (x, 3); +} + +static double +nice_kernel (double x) +{ + return lanczos3_kernel (x * 0.75); +} + +static double +general_cubic (double x, double B, double C) +{ + double ax = fabs(x); + + if (ax < 1) + { + return ((12 - 9 * B - 6 * C) * ax * ax * ax + + (-18 + 12 * B + 6 * C) * ax * ax + (6 - 2 * B)) / 6; + } + else if (ax >= 1 && ax < 2) + { + return ((-B - 6 * C) * ax * ax * ax + + (6 * B + 30 * C) * ax * ax + (-12 * B - 48 * C) * + ax + (8 * B + 24 * C)) / 6; + } + else + { + return 0; + } +} + +static double +cubic_kernel (double x) +{ + /* This is the Mitchell-Netravali filter. + * + * (0.0, 0.5) would give us the Catmull-Rom spline, + * but that one seems to be indistinguishable from Lanczos2. + */ + return general_cubic (x, 1/3.0, 1/3.0); +} + +static const filter_info_t filters[] = +{ + { PIXMAN_KERNEL_IMPULSE, impulse_kernel, 0.0 }, + { PIXMAN_KERNEL_BOX, box_kernel, 1.0 }, + { PIXMAN_KERNEL_LINEAR, linear_kernel, 2.0 }, + { PIXMAN_KERNEL_CUBIC, cubic_kernel, 4.0 }, + { PIXMAN_KERNEL_GAUSSIAN, gaussian_kernel, 6 * SIGMA }, + { PIXMAN_KERNEL_LANCZOS2, lanczos2_kernel, 4.0 }, + { PIXMAN_KERNEL_LANCZOS3, lanczos3_kernel, 6.0 }, + { PIXMAN_KERNEL_LANCZOS3_STRETCHED, nice_kernel, 8.0 }, +}; + +/* This function scales @kernel2 by @scale, then + * aligns @x1 in @kernel1 with @x2 in @kernel2 and + * and integrates the product of the kernels across @width. + * + * This function assumes that the intervals are within + * the kernels in question. E.g., the caller must not + * try to integrate a linear kernel ouside of [-1:1] + */ +static double +integral (pixman_kernel_t kernel1, double x1, + pixman_kernel_t kernel2, double scale, double x2, + double width) +{ + /* If the integration interval crosses zero, break it into + * two separate integrals. This ensures that filters such + * as LINEAR that are not differentiable at 0 will still + * integrate properly. + */ + if (x1 < 0 && x1 + width > 0) + { + return + integral (kernel1, x1, kernel2, scale, x2, - x1) + + integral (kernel1, 0, kernel2, scale, x2 - x1, width + x1); + } + else if (x2 < 0 && x2 + width > 0) + { + return + integral (kernel1, x1, kernel2, scale, x2, - x2) + + integral (kernel1, x1 - x2, kernel2, scale, 0, width + x2); + } + else if (kernel1 == PIXMAN_KERNEL_IMPULSE) + { + assert (width == 0.0); + return filters[kernel2].func (x2 * scale); + } + else if (kernel2 == PIXMAN_KERNEL_IMPULSE) + { + assert (width == 0.0); + return filters[kernel1].func (x1); + } + else + { + /* Integration via Simpson's rule */ +#define N_SEGMENTS 128 +#define SAMPLE(a1, a2) \ + (filters[kernel1].func ((a1)) * filters[kernel2].func ((a2) * scale)) + + double s = 0.0; + double h = width / (double)N_SEGMENTS; + int i; + + s = SAMPLE (x1, x2); + + for (i = 1; i < N_SEGMENTS; i += 2) + { + double a1 = x1 + h * i; + double a2 = x2 + h * i; + + s += 2 * SAMPLE (a1, a2); + + if (i >= 2 && i < N_SEGMENTS - 1) + s += 4 * SAMPLE (a1, a2); + } + + s += SAMPLE (x1 + width, x2 + width); + + return h * s * (1.0 / 3.0); + } +} + +static pixman_fixed_t * +create_1d_filter (int *width, + pixman_kernel_t reconstruct, + pixman_kernel_t sample, + double scale, + int n_phases) +{ + pixman_fixed_t *params, *p; + double step; + double size; + int i; + + size = scale * filters[sample].width + filters[reconstruct].width; + *width = ceil (size); + + p = params = malloc (*width * n_phases * sizeof (pixman_fixed_t)); + if (!params) + return NULL; + + step = 1.0 / n_phases; + + for (i = 0; i < n_phases; ++i) + { + double frac = step / 2.0 + i * step; + pixman_fixed_t new_total; + int x, x1, x2; + double total; + + /* Sample convolution of reconstruction and sampling + * filter. See rounding.txt regarding the rounding + * and sample positions. + */ + + x1 = ceil (frac - *width / 2.0 - 0.5); + x2 = x1 + *width; + + total = 0; + for (x = x1; x < x2; ++x) + { + double pos = x + 0.5 - frac; + double rlow = - filters[reconstruct].width / 2.0; + double rhigh = rlow + filters[reconstruct].width; + double slow = pos - scale * filters[sample].width / 2.0; + double shigh = slow + scale * filters[sample].width; + double c = 0.0; + double ilow, ihigh; + + if (rhigh >= slow && rlow <= shigh) + { + ilow = MAX (slow, rlow); + ihigh = MIN (shigh, rhigh); + + c = integral (reconstruct, ilow, + sample, 1.0 / scale, ilow - pos, + ihigh - ilow); + } + + total += c; + *p++ = (pixman_fixed_t)(c * 65536.0 + 0.5); + } + + /* Normalize */ + p -= *width; + total = 1 / total; + new_total = 0; + for (x = x1; x < x2; ++x) + { + pixman_fixed_t t = (*p) * total + 0.5; + + new_total += t; + *p++ = t; + } + + if (new_total != pixman_fixed_1) + *(p - *width / 2) += (pixman_fixed_1 - new_total); + } + + return params; +} + +/* Create the parameter list for a SEPARABLE_CONVOLUTION filter + * with the given kernels and scale parameters + */ +PIXMAN_EXPORT pixman_fixed_t * +pixman_filter_create_separable_convolution (int *n_values, + pixman_fixed_t scale_x, + pixman_fixed_t scale_y, + pixman_kernel_t reconstruct_x, + pixman_kernel_t reconstruct_y, + pixman_kernel_t sample_x, + pixman_kernel_t sample_y, + int subsample_bits_x, + int subsample_bits_y) +{ + double sx = fabs (pixman_fixed_to_double (scale_x)); + double sy = fabs (pixman_fixed_to_double (scale_y)); + pixman_fixed_t *horz = NULL, *vert = NULL, *params = NULL; + int subsample_x, subsample_y; + int width, height; + + subsample_x = (1 << subsample_bits_x); + subsample_y = (1 << subsample_bits_y); + + horz = create_1d_filter (&width, reconstruct_x, sample_x, sx, subsample_x); + vert = create_1d_filter (&height, reconstruct_y, sample_y, sy, subsample_y); + + if (!horz || !vert) + goto out; + + *n_values = 4 + width * subsample_x + height * subsample_y; + + params = malloc (*n_values * sizeof (pixman_fixed_t)); + if (!params) + goto out; + + params[0] = pixman_int_to_fixed (width); + params[1] = pixman_int_to_fixed (height); + params[2] = pixman_int_to_fixed (subsample_bits_x); + params[3] = pixman_int_to_fixed (subsample_bits_y); + + memcpy (params + 4, horz, + width * subsample_x * sizeof (pixman_fixed_t)); + memcpy (params + 4 + width * subsample_x, vert, + height * subsample_y * sizeof (pixman_fixed_t)); + +out: + free (horz); + free (vert); + + return params; +} diff --git a/src/pixman/pixman/pixman-general.c b/src/pixman/pixman/pixman-general.c new file mode 100644 index 0000000..a653fa7 --- /dev/null +++ b/src/pixman/pixman/pixman-general.c @@ -0,0 +1,248 @@ +/* + * Copyright © 2009 Red Hat, Inc. + * Copyright © 2000 SuSE, Inc. + * Copyright © 2007 Red Hat, Inc. + * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc. + * 2005 Lars Knoll & Zack Rusin, Trolltech + * 2008 Aaron Plattner, NVIDIA Corporation + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of Red Hat not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. Red Hat makes no representations about the + * suitability of this software for any purpose. It is provided "as is" + * without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif +#include <stdlib.h> +#include <string.h> +#include <math.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "pixman-private.h" + +static void +general_iter_init (pixman_iter_t *iter, const pixman_iter_info_t *info) +{ + pixman_image_t *image = iter->image; + + switch (image->type) + { + case BITS: + if ((iter->iter_flags & ITER_SRC) == ITER_SRC) + _pixman_bits_image_src_iter_init (image, iter); + else + _pixman_bits_image_dest_iter_init (image, iter); + break; + + case LINEAR: + _pixman_linear_gradient_iter_init (image, iter); + break; + + case RADIAL: + _pixman_radial_gradient_iter_init (image, iter); + break; + + case CONICAL: + _pixman_conical_gradient_iter_init (image, iter); + break; + + case SOLID: + _pixman_log_error (FUNC, "Solid image not handled by noop"); + break; + + default: + _pixman_log_error (FUNC, "Pixman bug: unknown image type\n"); + break; + } +} + +static const pixman_iter_info_t general_iters[] = +{ + { PIXMAN_any, 0, 0, general_iter_init, NULL, NULL }, + { PIXMAN_null }, +}; + +typedef struct op_info_t op_info_t; +struct op_info_t +{ + uint8_t src, dst; +}; + +#define ITER_IGNORE_BOTH \ + (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB | ITER_LOCALIZED_ALPHA) + +static const op_info_t op_flags[PIXMAN_N_OPERATORS] = +{ + /* Src Dst */ + { ITER_IGNORE_BOTH, ITER_IGNORE_BOTH }, /* CLEAR */ + { ITER_LOCALIZED_ALPHA, ITER_IGNORE_BOTH }, /* SRC */ + { ITER_IGNORE_BOTH, ITER_LOCALIZED_ALPHA }, /* DST */ + { 0, ITER_LOCALIZED_ALPHA }, /* OVER */ + { ITER_LOCALIZED_ALPHA, 0 }, /* OVER_REVERSE */ + { ITER_LOCALIZED_ALPHA, ITER_IGNORE_RGB }, /* IN */ + { ITER_IGNORE_RGB, ITER_LOCALIZED_ALPHA }, /* IN_REVERSE */ + { ITER_LOCALIZED_ALPHA, ITER_IGNORE_RGB }, /* OUT */ + { ITER_IGNORE_RGB, ITER_LOCALIZED_ALPHA }, /* OUT_REVERSE */ + { 0, 0 }, /* ATOP */ + { 0, 0 }, /* ATOP_REVERSE */ + { 0, 0 }, /* XOR */ + { ITER_LOCALIZED_ALPHA, ITER_LOCALIZED_ALPHA }, /* ADD */ + { 0, 0 }, /* SATURATE */ +}; + +#define SCANLINE_BUFFER_LENGTH 8192 + +static void +general_composite_rect (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint8_t stack_scanline_buffer[3 * SCANLINE_BUFFER_LENGTH]; + uint8_t *scanline_buffer = (uint8_t *) stack_scanline_buffer; + uint8_t *src_buffer, *mask_buffer, *dest_buffer; + pixman_iter_t src_iter, mask_iter, dest_iter; + pixman_combine_32_func_t compose; + pixman_bool_t component_alpha; + iter_flags_t width_flag, src_iter_flags; + int Bpp; + int i; + + if ((src_image->common.flags & FAST_PATH_NARROW_FORMAT) && + (!mask_image || mask_image->common.flags & FAST_PATH_NARROW_FORMAT) && + (dest_image->common.flags & FAST_PATH_NARROW_FORMAT)) + { + width_flag = ITER_NARROW; + Bpp = 4; + } + else + { + width_flag = ITER_WIDE; + Bpp = 16; + } + +#define ALIGN(addr) \ + ((uint8_t *)((((uintptr_t)(addr)) + 15) & (~15))) + + src_buffer = ALIGN (scanline_buffer); + mask_buffer = ALIGN (src_buffer + width * Bpp); + dest_buffer = ALIGN (mask_buffer + width * Bpp); + + if (ALIGN (dest_buffer + width * Bpp) > + scanline_buffer + sizeof (stack_scanline_buffer)) + { + scanline_buffer = pixman_malloc_ab_plus_c (width, Bpp * 3, 32 * 3); + + if (!scanline_buffer) + return; + + src_buffer = ALIGN (scanline_buffer); + mask_buffer = ALIGN (src_buffer + width * Bpp); + dest_buffer = ALIGN (mask_buffer + width * Bpp); + } + + if (width_flag == ITER_WIDE) + { + /* To make sure there aren't any NANs in the buffers */ + memset (src_buffer, 0, width * Bpp); + memset (mask_buffer, 0, width * Bpp); + memset (dest_buffer, 0, width * Bpp); + } + + /* src iter */ + src_iter_flags = width_flag | op_flags[op].src | ITER_SRC; + + _pixman_implementation_iter_init (imp->toplevel, &src_iter, src_image, + src_x, src_y, width, height, + src_buffer, src_iter_flags, + info->src_flags); + + /* mask iter */ + if ((src_iter_flags & (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB)) == + (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB)) + { + /* If it doesn't matter what the source is, then it doesn't matter + * what the mask is + */ + mask_image = NULL; + } + + component_alpha = + mask_image && + mask_image->common.type == BITS && + mask_image->common.component_alpha && + PIXMAN_FORMAT_RGB (mask_image->bits.format); + + _pixman_implementation_iter_init ( + imp->toplevel, &mask_iter, + mask_image, mask_x, mask_y, width, height, mask_buffer, + ITER_SRC | width_flag | (component_alpha? 0 : ITER_IGNORE_RGB), + info->mask_flags); + + /* dest iter */ + _pixman_implementation_iter_init ( + imp->toplevel, &dest_iter, dest_image, dest_x, dest_y, width, height, + dest_buffer, ITER_DEST | width_flag | op_flags[op].dst, info->dest_flags); + + compose = _pixman_implementation_lookup_combiner ( + imp->toplevel, op, component_alpha, width_flag != ITER_WIDE); + + for (i = 0; i < height; ++i) + { + uint32_t *s, *m, *d; + + m = mask_iter.get_scanline (&mask_iter, NULL); + s = src_iter.get_scanline (&src_iter, m); + d = dest_iter.get_scanline (&dest_iter, NULL); + + compose (imp->toplevel, op, d, s, m, width); + + dest_iter.write_back (&dest_iter); + } + + if (src_iter.fini) + src_iter.fini (&src_iter); + if (mask_iter.fini) + mask_iter.fini (&mask_iter); + if (dest_iter.fini) + dest_iter.fini (&dest_iter); + + if (scanline_buffer != (uint8_t *) stack_scanline_buffer) + free (scanline_buffer); +} + +static const pixman_fast_path_t general_fast_path[] = +{ + { PIXMAN_OP_any, PIXMAN_any, 0, PIXMAN_any, 0, PIXMAN_any, 0, general_composite_rect }, + { PIXMAN_OP_NONE } +}; + +pixman_implementation_t * +_pixman_implementation_create_general (void) +{ + pixman_implementation_t *imp = _pixman_implementation_create (NULL, general_fast_path); + + _pixman_setup_combiner_functions_32 (imp); + _pixman_setup_combiner_functions_float (imp); + + imp->iter_info = general_iters; + + return imp; +} + diff --git a/src/pixman/pixman/pixman-glyph.c b/src/pixman/pixman/pixman-glyph.c new file mode 100644 index 0000000..96a349a --- /dev/null +++ b/src/pixman/pixman/pixman-glyph.c @@ -0,0 +1,676 @@ +/* + * Copyright 2010, 2012, Soren Sandmann <sandmann@cs.au.dk> + * Copyright 2010, 2011, 2012, Red Hat, Inc + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Author: Soren Sandmann <sandmann@cs.au.dk> + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif +#include "pixman-private.h" + +#include <stdlib.h> + +typedef struct glyph_metrics_t glyph_metrics_t; +typedef struct glyph_t glyph_t; + +#define TOMBSTONE ((glyph_t *)0x1) + +/* XXX: These numbers are arbitrary---we've never done any measurements. + */ +#define N_GLYPHS_HIGH_WATER (16384) +#define N_GLYPHS_LOW_WATER (8192) +#define HASH_SIZE (2 * N_GLYPHS_HIGH_WATER) +#define HASH_MASK (HASH_SIZE - 1) + +struct glyph_t +{ + void * font_key; + void * glyph_key; + int origin_x; + int origin_y; + pixman_image_t * image; + pixman_link_t mru_link; +}; + +struct pixman_glyph_cache_t +{ + int n_glyphs; + int n_tombstones; + int freeze_count; + pixman_list_t mru; + glyph_t * glyphs[HASH_SIZE]; +}; + +static void +free_glyph (glyph_t *glyph) +{ + pixman_list_unlink (&glyph->mru_link); + pixman_image_unref (glyph->image); + free (glyph); +} + +static unsigned int +hash (const void *font_key, const void *glyph_key) +{ + size_t key = (size_t)font_key + (size_t)glyph_key; + + /* This hash function is based on one found on Thomas Wang's + * web page at + * + * http://www.concentric.net/~Ttwang/tech/inthash.htm + * + */ + key = (key << 15) - key - 1; + key = key ^ (key >> 12); + key = key + (key << 2); + key = key ^ (key >> 4); + key = key + (key << 3) + (key << 11); + key = key ^ (key >> 16); + + return key; +} + +static glyph_t * +lookup_glyph (pixman_glyph_cache_t *cache, + void *font_key, + void *glyph_key) +{ + unsigned idx; + glyph_t *g; + + idx = hash (font_key, glyph_key); + while ((g = cache->glyphs[idx++ & HASH_MASK])) + { + if (g != TOMBSTONE && + g->font_key == font_key && + g->glyph_key == glyph_key) + { + return g; + } + } + + return NULL; +} + +static void +insert_glyph (pixman_glyph_cache_t *cache, + glyph_t *glyph) +{ + unsigned idx; + glyph_t **loc; + + idx = hash (glyph->font_key, glyph->glyph_key); + + /* Note: we assume that there is room in the table. If there isn't, + * this will be an infinite loop. + */ + do + { + loc = &cache->glyphs[idx++ & HASH_MASK]; + } while (*loc && *loc != TOMBSTONE); + + if (*loc == TOMBSTONE) + cache->n_tombstones--; + cache->n_glyphs++; + + *loc = glyph; +} + +static void +remove_glyph (pixman_glyph_cache_t *cache, + glyph_t *glyph) +{ + unsigned idx; + + idx = hash (glyph->font_key, glyph->glyph_key); + while (cache->glyphs[idx & HASH_MASK] != glyph) + idx++; + + cache->glyphs[idx & HASH_MASK] = TOMBSTONE; + cache->n_tombstones++; + cache->n_glyphs--; + + /* Eliminate tombstones if possible */ + if (cache->glyphs[(idx + 1) & HASH_MASK] == NULL) + { + while (cache->glyphs[idx & HASH_MASK] == TOMBSTONE) + { + cache->glyphs[idx & HASH_MASK] = NULL; + cache->n_tombstones--; + idx--; + } + } +} + +static void +clear_table (pixman_glyph_cache_t *cache) +{ + int i; + + for (i = 0; i < HASH_SIZE; ++i) + { + glyph_t *glyph = cache->glyphs[i]; + + if (glyph && glyph != TOMBSTONE) + free_glyph (glyph); + + cache->glyphs[i] = NULL; + } + + cache->n_glyphs = 0; + cache->n_tombstones = 0; +} + +PIXMAN_EXPORT pixman_glyph_cache_t * +pixman_glyph_cache_create (void) +{ + pixman_glyph_cache_t *cache; + + if (!(cache = malloc (sizeof *cache))) + return NULL; + + memset (cache->glyphs, 0, sizeof (cache->glyphs)); + cache->n_glyphs = 0; + cache->n_tombstones = 0; + cache->freeze_count = 0; + + pixman_list_init (&cache->mru); + + return cache; +} + +PIXMAN_EXPORT void +pixman_glyph_cache_destroy (pixman_glyph_cache_t *cache) +{ + return_if_fail (cache->freeze_count == 0); + + clear_table (cache); + + free (cache); +} + +PIXMAN_EXPORT void +pixman_glyph_cache_freeze (pixman_glyph_cache_t *cache) +{ + cache->freeze_count++; +} + +PIXMAN_EXPORT void +pixman_glyph_cache_thaw (pixman_glyph_cache_t *cache) +{ + if (--cache->freeze_count == 0 && + cache->n_glyphs + cache->n_tombstones > N_GLYPHS_HIGH_WATER) + { + if (cache->n_tombstones > N_GLYPHS_HIGH_WATER) + { + /* More than half the entries are + * tombstones. Just dump the whole table. + */ + clear_table (cache); + } + + while (cache->n_glyphs > N_GLYPHS_LOW_WATER) + { + glyph_t *glyph = CONTAINER_OF (glyph_t, mru_link, cache->mru.tail); + + remove_glyph (cache, glyph); + free_glyph (glyph); + } + } +} + +PIXMAN_EXPORT const void * +pixman_glyph_cache_lookup (pixman_glyph_cache_t *cache, + void *font_key, + void *glyph_key) +{ + return lookup_glyph (cache, font_key, glyph_key); +} + +PIXMAN_EXPORT const void * +pixman_glyph_cache_insert (pixman_glyph_cache_t *cache, + void *font_key, + void *glyph_key, + int origin_x, + int origin_y, + pixman_image_t *image) +{ + glyph_t *glyph; + int32_t width, height; + + return_val_if_fail (cache->freeze_count > 0, NULL); + return_val_if_fail (image->type == BITS, NULL); + + width = image->bits.width; + height = image->bits.height; + + if (cache->n_glyphs >= HASH_SIZE) + return NULL; + + if (!(glyph = malloc (sizeof *glyph))) + return NULL; + + glyph->font_key = font_key; + glyph->glyph_key = glyph_key; + glyph->origin_x = origin_x; + glyph->origin_y = origin_y; + + if (!(glyph->image = pixman_image_create_bits ( + image->bits.format, width, height, NULL, -1))) + { + free (glyph); + return NULL; + } + + pixman_image_composite32 (PIXMAN_OP_SRC, + image, NULL, glyph->image, 0, 0, 0, 0, 0, 0, + width, height); + + if (PIXMAN_FORMAT_A (glyph->image->bits.format) != 0 && + PIXMAN_FORMAT_RGB (glyph->image->bits.format) != 0) + { + pixman_image_set_component_alpha (glyph->image, TRUE); + } + + pixman_list_prepend (&cache->mru, &glyph->mru_link); + + _pixman_image_validate (glyph->image); + insert_glyph (cache, glyph); + + return glyph; +} + +PIXMAN_EXPORT void +pixman_glyph_cache_remove (pixman_glyph_cache_t *cache, + void *font_key, + void *glyph_key) +{ + glyph_t *glyph; + + if ((glyph = lookup_glyph (cache, font_key, glyph_key))) + { + remove_glyph (cache, glyph); + + free_glyph (glyph); + } +} + +PIXMAN_EXPORT void +pixman_glyph_get_extents (pixman_glyph_cache_t *cache, + int n_glyphs, + pixman_glyph_t *glyphs, + pixman_box32_t *extents) +{ + int i; + + extents->x1 = extents->y1 = INT32_MAX; + extents->x2 = extents->y2 = INT32_MIN; + + for (i = 0; i < n_glyphs; ++i) + { + glyph_t *glyph = (glyph_t *)glyphs[i].glyph; + int x1, y1, x2, y2; + + x1 = glyphs[i].x - glyph->origin_x; + y1 = glyphs[i].y - glyph->origin_y; + x2 = glyphs[i].x - glyph->origin_x + glyph->image->bits.width; + y2 = glyphs[i].y - glyph->origin_y + glyph->image->bits.height; + + if (x1 < extents->x1) + extents->x1 = x1; + if (y1 < extents->y1) + extents->y1 = y1; + if (x2 > extents->x2) + extents->x2 = x2; + if (y2 > extents->y2) + extents->y2 = y2; + } +} + +/* This function returns a format that is suitable for use as a mask for the + * set of glyphs in question. + */ +PIXMAN_EXPORT pixman_format_code_t +pixman_glyph_get_mask_format (pixman_glyph_cache_t *cache, + int n_glyphs, + const pixman_glyph_t *glyphs) +{ + pixman_format_code_t format = PIXMAN_a1; + int i; + + for (i = 0; i < n_glyphs; ++i) + { + const glyph_t *glyph = glyphs[i].glyph; + pixman_format_code_t glyph_format = glyph->image->bits.format; + + if (PIXMAN_FORMAT_TYPE (glyph_format) == PIXMAN_TYPE_A) + { + if (PIXMAN_FORMAT_A (glyph_format) > PIXMAN_FORMAT_A (format)) + format = glyph_format; + } + else + { + return PIXMAN_a8r8g8b8; + } + } + + return format; +} + +static pixman_bool_t +box32_intersect (pixman_box32_t *dest, + const pixman_box32_t *box1, + const pixman_box32_t *box2) +{ + dest->x1 = MAX (box1->x1, box2->x1); + dest->y1 = MAX (box1->y1, box2->y1); + dest->x2 = MIN (box1->x2, box2->x2); + dest->y2 = MIN (box1->y2, box2->y2); + + return dest->x2 > dest->x1 && dest->y2 > dest->y1; +} + +#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__) +__attribute__((__force_align_arg_pointer__)) +#endif +PIXMAN_EXPORT void +pixman_composite_glyphs_no_mask (pixman_op_t op, + pixman_image_t *src, + pixman_image_t *dest, + int32_t src_x, + int32_t src_y, + int32_t dest_x, + int32_t dest_y, + pixman_glyph_cache_t *cache, + int n_glyphs, + const pixman_glyph_t *glyphs) +{ + pixman_region32_t region; + pixman_format_code_t glyph_format = PIXMAN_null; + uint32_t glyph_flags = 0; + pixman_format_code_t dest_format; + uint32_t dest_flags; + pixman_composite_func_t func = NULL; + pixman_implementation_t *implementation = NULL; + pixman_composite_info_t info; + int i; + + _pixman_image_validate (src); + _pixman_image_validate (dest); + + dest_format = dest->common.extended_format_code; + dest_flags = dest->common.flags; + + pixman_region32_init (®ion); + if (!_pixman_compute_composite_region32 ( + ®ion, + src, NULL, dest, + src_x - dest_x, src_y - dest_y, 0, 0, 0, 0, + dest->bits.width, dest->bits.height)) + { + goto out; + } + + info.op = op; + info.src_image = src; + info.dest_image = dest; + info.src_flags = src->common.flags; + info.dest_flags = dest->common.flags; + + for (i = 0; i < n_glyphs; ++i) + { + glyph_t *glyph = (glyph_t *)glyphs[i].glyph; + pixman_image_t *glyph_img = glyph->image; + pixman_box32_t glyph_box; + pixman_box32_t *pbox; + uint32_t extra = FAST_PATH_SAMPLES_COVER_CLIP_NEAREST; + pixman_box32_t composite_box; + int n; + + glyph_box.x1 = dest_x + glyphs[i].x - glyph->origin_x; + glyph_box.y1 = dest_y + glyphs[i].y - glyph->origin_y; + glyph_box.x2 = glyph_box.x1 + glyph->image->bits.width; + glyph_box.y2 = glyph_box.y1 + glyph->image->bits.height; + + pbox = pixman_region32_rectangles (®ion, &n); + + info.mask_image = glyph_img; + + while (n--) + { + if (box32_intersect (&composite_box, pbox, &glyph_box)) + { + if (glyph_img->common.extended_format_code != glyph_format || + glyph_img->common.flags != glyph_flags) + { + glyph_format = glyph_img->common.extended_format_code; + glyph_flags = glyph_img->common.flags; + + _pixman_implementation_lookup_composite ( + get_implementation(), op, + src->common.extended_format_code, src->common.flags, + glyph_format, glyph_flags | extra, + dest_format, dest_flags, + &implementation, &func); + } + + info.src_x = src_x + composite_box.x1 - dest_x; + info.src_y = src_y + composite_box.y1 - dest_y; + info.mask_x = composite_box.x1 - (dest_x + glyphs[i].x - glyph->origin_x); + info.mask_y = composite_box.y1 - (dest_y + glyphs[i].y - glyph->origin_y); + info.dest_x = composite_box.x1; + info.dest_y = composite_box.y1; + info.width = composite_box.x2 - composite_box.x1; + info.height = composite_box.y2 - composite_box.y1; + + info.mask_flags = glyph_flags; + + func (implementation, &info); + } + + pbox++; + } + pixman_list_move_to_front (&cache->mru, &glyph->mru_link); + } + +out: + pixman_region32_fini (®ion); +} + +static void +add_glyphs (pixman_glyph_cache_t *cache, + pixman_image_t *dest, + int off_x, int off_y, + int n_glyphs, const pixman_glyph_t *glyphs) +{ + pixman_format_code_t glyph_format = PIXMAN_null; + uint32_t glyph_flags = 0; + pixman_composite_func_t func = NULL; + pixman_implementation_t *implementation = NULL; + pixman_format_code_t dest_format; + uint32_t dest_flags; + pixman_box32_t dest_box; + pixman_composite_info_t info; + pixman_image_t *white_img = NULL; + pixman_bool_t white_src = FALSE; + int i; + + _pixman_image_validate (dest); + + dest_format = dest->common.extended_format_code; + dest_flags = dest->common.flags; + + info.op = PIXMAN_OP_ADD; + info.dest_image = dest; + info.src_x = 0; + info.src_y = 0; + info.dest_flags = dest_flags; + + dest_box.x1 = 0; + dest_box.y1 = 0; + dest_box.x2 = dest->bits.width; + dest_box.y2 = dest->bits.height; + + for (i = 0; i < n_glyphs; ++i) + { + glyph_t *glyph = (glyph_t *)glyphs[i].glyph; + pixman_image_t *glyph_img = glyph->image; + pixman_box32_t glyph_box; + pixman_box32_t composite_box; + + if (glyph_img->common.extended_format_code != glyph_format || + glyph_img->common.flags != glyph_flags) + { + pixman_format_code_t src_format, mask_format; + + glyph_format = glyph_img->common.extended_format_code; + glyph_flags = glyph_img->common.flags; + + if (glyph_format == dest->bits.format) + { + src_format = glyph_format; + mask_format = PIXMAN_null; + info.src_flags = glyph_flags | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST; + info.mask_flags = FAST_PATH_IS_OPAQUE; + info.mask_image = NULL; + white_src = FALSE; + } + else + { + if (!white_img) + { + static const pixman_color_t white = { 0xffff, 0xffff, 0xffff, 0xffff }; + + if (!(white_img = pixman_image_create_solid_fill (&white))) + goto out; + + _pixman_image_validate (white_img); + } + + src_format = PIXMAN_solid; + mask_format = glyph_format; + info.src_flags = white_img->common.flags; + info.mask_flags = glyph_flags | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST; + info.src_image = white_img; + white_src = TRUE; + } + + _pixman_implementation_lookup_composite ( + get_implementation(), PIXMAN_OP_ADD, + src_format, info.src_flags, + mask_format, info.mask_flags, + dest_format, dest_flags, + &implementation, &func); + } + + glyph_box.x1 = glyphs[i].x - glyph->origin_x + off_x; + glyph_box.y1 = glyphs[i].y - glyph->origin_y + off_y; + glyph_box.x2 = glyph_box.x1 + glyph->image->bits.width; + glyph_box.y2 = glyph_box.y1 + glyph->image->bits.height; + + if (box32_intersect (&composite_box, &glyph_box, &dest_box)) + { + int src_x = composite_box.x1 - glyph_box.x1; + int src_y = composite_box.y1 - glyph_box.y1; + + if (white_src) + info.mask_image = glyph_img; + else + info.src_image = glyph_img; + + info.mask_x = info.src_x = src_x; + info.mask_y = info.src_y = src_y; + info.dest_x = composite_box.x1; + info.dest_y = composite_box.y1; + info.width = composite_box.x2 - composite_box.x1; + info.height = composite_box.y2 - composite_box.y1; + + func (implementation, &info); + + pixman_list_move_to_front (&cache->mru, &glyph->mru_link); + } + } + +out: + if (white_img) + pixman_image_unref (white_img); +} + +/* Conceptually, for each glyph, (white IN glyph) is PIXMAN_OP_ADDed to an + * infinitely big mask image at the position such that the glyph origin point + * is positioned at the (glyphs[i].x, glyphs[i].y) point. + * + * Then (mask_x, mask_y) in the infinite mask and (src_x, src_y) in the source + * image are both aligned with (dest_x, dest_y) in the destination image. Then + * these three images are composited within the + * + * (dest_x, dest_y, dst_x + width, dst_y + height) + * + * rectangle. + * + * TODO: + * - Trim the mask to the destination clip/image? + * - Trim composite region based on sources, when the op ignores 0s. + */ +#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__) +__attribute__((__force_align_arg_pointer__)) +#endif +PIXMAN_EXPORT void +pixman_composite_glyphs (pixman_op_t op, + pixman_image_t *src, + pixman_image_t *dest, + pixman_format_code_t mask_format, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height, + pixman_glyph_cache_t *cache, + int n_glyphs, + const pixman_glyph_t *glyphs) +{ + pixman_image_t *mask; + + if (!(mask = pixman_image_create_bits (mask_format, width, height, NULL, -1))) + return; + + if (PIXMAN_FORMAT_A (mask_format) != 0 && + PIXMAN_FORMAT_RGB (mask_format) != 0) + { + pixman_image_set_component_alpha (mask, TRUE); + } + + add_glyphs (cache, mask, - mask_x, - mask_y, n_glyphs, glyphs); + + pixman_image_composite32 (op, src, mask, dest, + src_x, src_y, + 0, 0, + dest_x, dest_y, + width, height); + + pixman_image_unref (mask); +} diff --git a/src/pixman/pixman/pixman-gradient-walker.c b/src/pixman/pixman/pixman-gradient-walker.c new file mode 100644 index 0000000..5944a55 --- /dev/null +++ b/src/pixman/pixman/pixman-gradient-walker.c @@ -0,0 +1,202 @@ +/* + * + * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc. + * 2005 Lars Knoll & Zack Rusin, Trolltech + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of Keith Packard not be used in + * advertising or publicity pertaining to distribution of the software without + * specific, written prior permission. Keith Packard makes no + * representations about the suitability of this software for any purpose. It + * is provided "as is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif +#include "pixman-private.h" + +void +_pixman_gradient_walker_init (pixman_gradient_walker_t *walker, + gradient_t * gradient, + pixman_repeat_t repeat) +{ + walker->num_stops = gradient->n_stops; + walker->stops = gradient->stops; + walker->left_x = 0; + walker->right_x = 0x10000; + walker->a_s = 0.0f; + walker->a_b = 0.0f; + walker->r_s = 0.0f; + walker->r_b = 0.0f; + walker->g_s = 0.0f; + walker->g_b = 0.0f; + walker->b_s = 0.0f; + walker->b_b = 0.0f; + walker->repeat = repeat; + + walker->need_reset = TRUE; +} + +static void +gradient_walker_reset (pixman_gradient_walker_t *walker, + pixman_fixed_48_16_t pos) +{ + int32_t x, left_x, right_x; + pixman_color_t *left_c, *right_c; + int n, count = walker->num_stops; + pixman_gradient_stop_t *stops = walker->stops; + float la, lr, lg, lb; + float ra, rr, rg, rb; + float lx, rx; + + if (walker->repeat == PIXMAN_REPEAT_NORMAL) + { + x = (int32_t)pos & 0xffff; + } + else if (walker->repeat == PIXMAN_REPEAT_REFLECT) + { + x = (int32_t)pos & 0xffff; + if ((int32_t)pos & 0x10000) + x = 0x10000 - x; + } + else + { + x = pos; + } + + for (n = 0; n < count; n++) + { + if (x < stops[n].x) + break; + } + + left_x = stops[n - 1].x; + left_c = &stops[n - 1].color; + + right_x = stops[n].x; + right_c = &stops[n].color; + + if (walker->repeat == PIXMAN_REPEAT_NORMAL) + { + left_x += (pos - x); + right_x += (pos - x); + } + else if (walker->repeat == PIXMAN_REPEAT_REFLECT) + { + if ((int32_t)pos & 0x10000) + { + pixman_color_t *tmp_c; + int32_t tmp_x; + + tmp_x = 0x10000 - right_x; + right_x = 0x10000 - left_x; + left_x = tmp_x; + + tmp_c = right_c; + right_c = left_c; + left_c = tmp_c; + + x = 0x10000 - x; + } + left_x += (pos - x); + right_x += (pos - x); + } + else if (walker->repeat == PIXMAN_REPEAT_NONE) + { + if (n == 0) + right_c = left_c; + else if (n == count) + left_c = right_c; + } + + /* The alpha channel is scaled to be in the [0, 255] interval, + * and the red/green/blue channels are scaled to be in [0, 1]. + * This ensures that after premultiplication all channels will + * be in the [0, 255] interval. + */ + la = (left_c->alpha * (1.0f/257.0f)); + lr = (left_c->red * (1.0f/257.0f)); + lg = (left_c->green * (1.0f/257.0f)); + lb = (left_c->blue * (1.0f/257.0f)); + + ra = (right_c->alpha * (1.0f/257.0f)); + rr = (right_c->red * (1.0f/257.0f)); + rg = (right_c->green * (1.0f/257.0f)); + rb = (right_c->blue * (1.0f/257.0f)); + + lx = left_x * (1.0f/65536.0f); + rx = right_x * (1.0f/65536.0f); + + if (FLOAT_IS_ZERO (rx - lx) || left_x == INT32_MIN || right_x == INT32_MAX) + { + walker->a_s = walker->r_s = walker->g_s = walker->b_s = 0.0f; + walker->a_b = (la + ra) / 2.0f; + walker->r_b = (lr + rr) / 510.0f; + walker->g_b = (lg + rg) / 510.0f; + walker->b_b = (lb + rb) / 510.0f; + } + else + { + float w_rec = 1.0f / (rx - lx); + + walker->a_b = (la * rx - ra * lx) * w_rec; + walker->r_b = (lr * rx - rr * lx) * w_rec * (1.0f/255.0f); + walker->g_b = (lg * rx - rg * lx) * w_rec * (1.0f/255.0f); + walker->b_b = (lb * rx - rb * lx) * w_rec * (1.0f/255.0f); + + walker->a_s = (ra - la) * w_rec; + walker->r_s = (rr - lr) * w_rec * (1.0f/255.0f); + walker->g_s = (rg - lg) * w_rec * (1.0f/255.0f); + walker->b_s = (rb - lb) * w_rec * (1.0f/255.0f); + } + + walker->left_x = left_x; + walker->right_x = right_x; + + walker->need_reset = FALSE; +} + +uint32_t +_pixman_gradient_walker_pixel (pixman_gradient_walker_t *walker, + pixman_fixed_48_16_t x) +{ + float a, r, g, b; + uint8_t a8, r8, g8, b8; + uint32_t v; + float y; + + if (walker->need_reset || x < walker->left_x || x >= walker->right_x) + gradient_walker_reset (walker, x); + + y = x * (1.0f / 65536.0f); + + a = walker->a_s * y + walker->a_b; + r = a * (walker->r_s * y + walker->r_b); + g = a * (walker->g_s * y + walker->g_b); + b = a * (walker->b_s * y + walker->b_b); + + a8 = a + 0.5f; + r8 = r + 0.5f; + g8 = g + 0.5f; + b8 = b + 0.5f; + + v = ((a8 << 24) & 0xff000000) | + ((r8 << 16) & 0x00ff0000) | + ((g8 << 8) & 0x0000ff00) | + ((b8 >> 0) & 0x000000ff); + + return v; +} diff --git a/src/pixman/pixman/pixman-image.c b/src/pixman/pixman/pixman-image.c new file mode 100644 index 0000000..1ff1a49 --- /dev/null +++ b/src/pixman/pixman/pixman-image.c @@ -0,0 +1,945 @@ +/* + * Copyright © 2000 SuSE, Inc. + * Copyright © 2007 Red Hat, Inc. + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of SuSE not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. SuSE makes no representations about the + * suitability of this software for any purpose. It is provided "as is" + * without express or implied warranty. + * + * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE + * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <assert.h> + +#include "pixman-private.h" + +static const pixman_color_t transparent_black = { 0, 0, 0, 0 }; + +static void +gradient_property_changed (pixman_image_t *image) +{ + gradient_t *gradient = &image->gradient; + int n = gradient->n_stops; + pixman_gradient_stop_t *stops = gradient->stops; + pixman_gradient_stop_t *begin = &(gradient->stops[-1]); + pixman_gradient_stop_t *end = &(gradient->stops[n]); + + switch (gradient->common.repeat) + { + default: + case PIXMAN_REPEAT_NONE: + begin->x = INT32_MIN; + begin->color = transparent_black; + end->x = INT32_MAX; + end->color = transparent_black; + break; + + case PIXMAN_REPEAT_NORMAL: + begin->x = stops[n - 1].x - pixman_fixed_1; + begin->color = stops[n - 1].color; + end->x = stops[0].x + pixman_fixed_1; + end->color = stops[0].color; + break; + + case PIXMAN_REPEAT_REFLECT: + begin->x = - stops[0].x; + begin->color = stops[0].color; + end->x = pixman_int_to_fixed (2) - stops[n - 1].x; + end->color = stops[n - 1].color; + break; + + case PIXMAN_REPEAT_PAD: + begin->x = INT32_MIN; + begin->color = stops[0].color; + end->x = INT32_MAX; + end->color = stops[n - 1].color; + break; + } +} + +pixman_bool_t +_pixman_init_gradient (gradient_t * gradient, + const pixman_gradient_stop_t *stops, + int n_stops) +{ + return_val_if_fail (n_stops > 0, FALSE); + + /* We allocate two extra stops, one before the beginning of the stop list, + * and one after the end. These stops are initialized to whatever color + * would be used for positions outside the range of the stop list. + * + * This saves a bit of computation in the gradient walker. + * + * The pointer we store in the gradient_t struct still points to the + * first user-supplied struct, so when freeing, we will have to + * subtract one. + */ + gradient->stops = + pixman_malloc_ab (n_stops + 2, sizeof (pixman_gradient_stop_t)); + if (!gradient->stops) + return FALSE; + + gradient->stops += 1; + memcpy (gradient->stops, stops, n_stops * sizeof (pixman_gradient_stop_t)); + gradient->n_stops = n_stops; + + gradient->common.property_changed = gradient_property_changed; + + return TRUE; +} + +void +_pixman_image_init (pixman_image_t *image) +{ + image_common_t *common = &image->common; + + pixman_region32_init (&common->clip_region); + + common->alpha_count = 0; + common->have_clip_region = FALSE; + common->clip_sources = FALSE; + common->transform = NULL; + common->repeat = PIXMAN_REPEAT_NONE; + common->filter = PIXMAN_FILTER_NEAREST; + common->filter_params = NULL; + common->n_filter_params = 0; + common->alpha_map = NULL; + common->component_alpha = FALSE; + common->ref_count = 1; + common->property_changed = NULL; + common->client_clip = FALSE; + common->destroy_func = NULL; + common->destroy_data = NULL; + common->dirty = TRUE; +} + +pixman_bool_t +_pixman_image_fini (pixman_image_t *image) +{ + image_common_t *common = (image_common_t *)image; + + common->ref_count--; + + if (common->ref_count == 0) + { + if (image->common.destroy_func) + image->common.destroy_func (image, image->common.destroy_data); + + pixman_region32_fini (&common->clip_region); + + free (common->transform); + free (common->filter_params); + + if (common->alpha_map) + pixman_image_unref ((pixman_image_t *)common->alpha_map); + + if (image->type == LINEAR || + image->type == RADIAL || + image->type == CONICAL) + { + if (image->gradient.stops) + { + /* See _pixman_init_gradient() for an explanation of the - 1 */ + free (image->gradient.stops - 1); + } + + /* This will trigger if someone adds a property_changed + * method to the linear/radial/conical gradient overwriting + * the general one. + */ + assert ( + image->common.property_changed == gradient_property_changed); + } + + if (image->type == BITS && image->bits.free_me) + free (image->bits.free_me); + + return TRUE; + } + + return FALSE; +} + +pixman_image_t * +_pixman_image_allocate (void) +{ + pixman_image_t *image = malloc (sizeof (pixman_image_t)); + + if (image) + _pixman_image_init (image); + + return image; +} + +static void +image_property_changed (pixman_image_t *image) +{ + image->common.dirty = TRUE; +} + +/* Ref Counting */ +PIXMAN_EXPORT pixman_image_t * +pixman_image_ref (pixman_image_t *image) +{ + image->common.ref_count++; + + return image; +} + +/* returns TRUE when the image is freed */ +PIXMAN_EXPORT pixman_bool_t +pixman_image_unref (pixman_image_t *image) +{ + if (_pixman_image_fini (image)) + { + free (image); + return TRUE; + } + + return FALSE; +} + +PIXMAN_EXPORT void +pixman_image_set_destroy_function (pixman_image_t * image, + pixman_image_destroy_func_t func, + void * data) +{ + image->common.destroy_func = func; + image->common.destroy_data = data; +} + +PIXMAN_EXPORT void * +pixman_image_get_destroy_data (pixman_image_t *image) +{ + return image->common.destroy_data; +} + +void +_pixman_image_reset_clip_region (pixman_image_t *image) +{ + image->common.have_clip_region = FALSE; +} + +/* Executive Summary: This function is a no-op that only exists + * for historical reasons. + * + * There used to be a bug in the X server where it would rely on + * out-of-bounds accesses when it was asked to composite with a + * window as the source. It would create a pixman image pointing + * to some bogus position in memory, but then set a clip region + * to the position where the actual bits were. + * + * Due to a bug in old versions of pixman, where it would not clip + * against the image bounds when a clip region was set, this would + * actually work. So when the pixman bug was fixed, a workaround was + * added to allow certain out-of-bound accesses. This function disabled + * those workarounds. + * + * Since 0.21.2, pixman doesn't do these workarounds anymore, so now + * this function is a no-op. + */ +PIXMAN_EXPORT void +pixman_disable_out_of_bounds_workaround (void) +{ +} + +static void +compute_image_info (pixman_image_t *image) +{ + pixman_format_code_t code; + uint32_t flags = 0; + + /* Transform */ + if (!image->common.transform) + { + flags |= (FAST_PATH_ID_TRANSFORM | + FAST_PATH_X_UNIT_POSITIVE | + FAST_PATH_Y_UNIT_ZERO | + FAST_PATH_AFFINE_TRANSFORM); + } + else + { + flags |= FAST_PATH_HAS_TRANSFORM; + + if (image->common.transform->matrix[2][0] == 0 && + image->common.transform->matrix[2][1] == 0 && + image->common.transform->matrix[2][2] == pixman_fixed_1) + { + flags |= FAST_PATH_AFFINE_TRANSFORM; + + if (image->common.transform->matrix[0][1] == 0 && + image->common.transform->matrix[1][0] == 0) + { + if (image->common.transform->matrix[0][0] == -pixman_fixed_1 && + image->common.transform->matrix[1][1] == -pixman_fixed_1) + { + flags |= FAST_PATH_ROTATE_180_TRANSFORM; + } + flags |= FAST_PATH_SCALE_TRANSFORM; + } + else if (image->common.transform->matrix[0][0] == 0 && + image->common.transform->matrix[1][1] == 0) + { + pixman_fixed_t m01 = image->common.transform->matrix[0][1]; + pixman_fixed_t m10 = image->common.transform->matrix[1][0]; + + if (m01 == -pixman_fixed_1 && m10 == pixman_fixed_1) + flags |= FAST_PATH_ROTATE_90_TRANSFORM; + else if (m01 == pixman_fixed_1 && m10 == -pixman_fixed_1) + flags |= FAST_PATH_ROTATE_270_TRANSFORM; + } + } + + if (image->common.transform->matrix[0][0] > 0) + flags |= FAST_PATH_X_UNIT_POSITIVE; + + if (image->common.transform->matrix[1][0] == 0) + flags |= FAST_PATH_Y_UNIT_ZERO; + } + + /* Filter */ + switch (image->common.filter) + { + case PIXMAN_FILTER_NEAREST: + case PIXMAN_FILTER_FAST: + flags |= (FAST_PATH_NEAREST_FILTER | FAST_PATH_NO_CONVOLUTION_FILTER); + break; + + case PIXMAN_FILTER_BILINEAR: + case PIXMAN_FILTER_GOOD: + case PIXMAN_FILTER_BEST: + flags |= (FAST_PATH_BILINEAR_FILTER | FAST_PATH_NO_CONVOLUTION_FILTER); + + /* Here we have a chance to optimize BILINEAR filter to NEAREST if + * they are equivalent for the currently used transformation matrix. + */ + if (flags & FAST_PATH_ID_TRANSFORM) + { + flags |= FAST_PATH_NEAREST_FILTER; + } + else if ( + /* affine and integer translation components in matrix ... */ + ((flags & FAST_PATH_AFFINE_TRANSFORM) && + !pixman_fixed_frac (image->common.transform->matrix[0][2] | + image->common.transform->matrix[1][2])) && + ( + /* ... combined with a simple rotation */ + (flags & (FAST_PATH_ROTATE_90_TRANSFORM | + FAST_PATH_ROTATE_180_TRANSFORM | + FAST_PATH_ROTATE_270_TRANSFORM)) || + /* ... or combined with a simple non-rotated translation */ + (image->common.transform->matrix[0][0] == pixman_fixed_1 && + image->common.transform->matrix[1][1] == pixman_fixed_1 && + image->common.transform->matrix[0][1] == 0 && + image->common.transform->matrix[1][0] == 0) + ) + ) + { + /* FIXME: there are some affine-test failures, showing that + * handling of BILINEAR and NEAREST filter is not quite + * equivalent when getting close to 32K for the translation + * components of the matrix. That's likely some bug, but for + * now just skip BILINEAR->NEAREST optimization in this case. + */ + pixman_fixed_t magic_limit = pixman_int_to_fixed (30000); + if (image->common.transform->matrix[0][2] <= magic_limit && + image->common.transform->matrix[1][2] <= magic_limit && + image->common.transform->matrix[0][2] >= -magic_limit && + image->common.transform->matrix[1][2] >= -magic_limit) + { + flags |= FAST_PATH_NEAREST_FILTER; + } + } + break; + + case PIXMAN_FILTER_CONVOLUTION: + break; + + case PIXMAN_FILTER_SEPARABLE_CONVOLUTION: + flags |= FAST_PATH_SEPARABLE_CONVOLUTION_FILTER; + break; + + default: + flags |= FAST_PATH_NO_CONVOLUTION_FILTER; + break; + } + + /* Repeat mode */ + switch (image->common.repeat) + { + case PIXMAN_REPEAT_NONE: + flags |= + FAST_PATH_NO_REFLECT_REPEAT | + FAST_PATH_NO_PAD_REPEAT | + FAST_PATH_NO_NORMAL_REPEAT; + break; + + case PIXMAN_REPEAT_REFLECT: + flags |= + FAST_PATH_NO_PAD_REPEAT | + FAST_PATH_NO_NONE_REPEAT | + FAST_PATH_NO_NORMAL_REPEAT; + break; + + case PIXMAN_REPEAT_PAD: + flags |= + FAST_PATH_NO_REFLECT_REPEAT | + FAST_PATH_NO_NONE_REPEAT | + FAST_PATH_NO_NORMAL_REPEAT; + break; + + default: + flags |= + FAST_PATH_NO_REFLECT_REPEAT | + FAST_PATH_NO_PAD_REPEAT | + FAST_PATH_NO_NONE_REPEAT; + break; + } + + /* Component alpha */ + if (image->common.component_alpha) + flags |= FAST_PATH_COMPONENT_ALPHA; + else + flags |= FAST_PATH_UNIFIED_ALPHA; + + flags |= (FAST_PATH_NO_ACCESSORS | FAST_PATH_NARROW_FORMAT); + + /* Type specific checks */ + switch (image->type) + { + case SOLID: + code = PIXMAN_solid; + + if (image->solid.color.alpha == 0xffff) + flags |= FAST_PATH_IS_OPAQUE; + break; + + case BITS: + if (image->bits.width == 1 && + image->bits.height == 1 && + image->common.repeat != PIXMAN_REPEAT_NONE) + { + code = PIXMAN_solid; + } + else + { + code = image->bits.format; + flags |= FAST_PATH_BITS_IMAGE; + } + + if (!PIXMAN_FORMAT_A (image->bits.format) && + PIXMAN_FORMAT_TYPE (image->bits.format) != PIXMAN_TYPE_GRAY && + PIXMAN_FORMAT_TYPE (image->bits.format) != PIXMAN_TYPE_COLOR) + { + flags |= FAST_PATH_SAMPLES_OPAQUE; + + if (image->common.repeat != PIXMAN_REPEAT_NONE) + flags |= FAST_PATH_IS_OPAQUE; + } + + if (image->bits.read_func || image->bits.write_func) + flags &= ~FAST_PATH_NO_ACCESSORS; + + if (PIXMAN_FORMAT_IS_WIDE (image->bits.format)) + flags &= ~FAST_PATH_NARROW_FORMAT; + break; + + case RADIAL: + code = PIXMAN_unknown; + + /* + * As explained in pixman-radial-gradient.c, every point of + * the plane has a valid associated radius (and thus will be + * colored) if and only if a is negative (i.e. one of the two + * circles contains the other one). + */ + + if (image->radial.a >= 0) + break; + + /* Fall through */ + + case CONICAL: + case LINEAR: + code = PIXMAN_unknown; + + if (image->common.repeat != PIXMAN_REPEAT_NONE) + { + int i; + + flags |= FAST_PATH_IS_OPAQUE; + for (i = 0; i < image->gradient.n_stops; ++i) + { + if (image->gradient.stops[i].color.alpha != 0xffff) + { + flags &= ~FAST_PATH_IS_OPAQUE; + break; + } + } + } + break; + + default: + code = PIXMAN_unknown; + break; + } + + /* Alpha maps are only supported for BITS images, so it's always + * safe to ignore their presense for non-BITS images + */ + if (!image->common.alpha_map || image->type != BITS) + { + flags |= FAST_PATH_NO_ALPHA_MAP; + } + else + { + if (PIXMAN_FORMAT_IS_WIDE (image->common.alpha_map->format)) + flags &= ~FAST_PATH_NARROW_FORMAT; + } + + /* Both alpha maps and convolution filters can introduce + * non-opaqueness in otherwise opaque images. Also + * an image with component alpha turned on is only opaque + * if all channels are opaque, so we simply turn it off + * unconditionally for those images. + */ + if (image->common.alpha_map || + image->common.filter == PIXMAN_FILTER_CONVOLUTION || + image->common.filter == PIXMAN_FILTER_SEPARABLE_CONVOLUTION || + image->common.component_alpha) + { + flags &= ~(FAST_PATH_IS_OPAQUE | FAST_PATH_SAMPLES_OPAQUE); + } + + image->common.flags = flags; + image->common.extended_format_code = code; +} + +void +_pixman_image_validate (pixman_image_t *image) +{ + if (image->common.dirty) + { + compute_image_info (image); + + /* It is important that property_changed is + * called *after* compute_image_info() because + * property_changed() can make use of the flags + * to set up accessors etc. + */ + if (image->common.property_changed) + image->common.property_changed (image); + + image->common.dirty = FALSE; + } + + if (image->common.alpha_map) + _pixman_image_validate ((pixman_image_t *)image->common.alpha_map); +} + +PIXMAN_EXPORT pixman_bool_t +pixman_image_set_clip_region32 (pixman_image_t * image, + pixman_region32_t *region) +{ + image_common_t *common = (image_common_t *)image; + pixman_bool_t result; + + if (region) + { + if ((result = pixman_region32_copy (&common->clip_region, region))) + image->common.have_clip_region = TRUE; + } + else + { + _pixman_image_reset_clip_region (image); + + result = TRUE; + } + + image_property_changed (image); + + return result; +} + +PIXMAN_EXPORT pixman_bool_t +pixman_image_set_clip_region (pixman_image_t * image, + pixman_region16_t *region) +{ + image_common_t *common = (image_common_t *)image; + pixman_bool_t result; + + if (region) + { + if ((result = pixman_region32_copy_from_region16 (&common->clip_region, region))) + image->common.have_clip_region = TRUE; + } + else + { + _pixman_image_reset_clip_region (image); + + result = TRUE; + } + + image_property_changed (image); + + return result; +} + +PIXMAN_EXPORT void +pixman_image_set_has_client_clip (pixman_image_t *image, + pixman_bool_t client_clip) +{ + image->common.client_clip = client_clip; +} + +PIXMAN_EXPORT pixman_bool_t +pixman_image_set_transform (pixman_image_t * image, + const pixman_transform_t *transform) +{ + static const pixman_transform_t id = + { + { { pixman_fixed_1, 0, 0 }, + { 0, pixman_fixed_1, 0 }, + { 0, 0, pixman_fixed_1 } } + }; + + image_common_t *common = (image_common_t *)image; + pixman_bool_t result; + + if (common->transform == transform) + return TRUE; + + if (!transform || memcmp (&id, transform, sizeof (pixman_transform_t)) == 0) + { + free (common->transform); + common->transform = NULL; + result = TRUE; + + goto out; + } + + if (common->transform && + memcmp (common->transform, transform, sizeof (pixman_transform_t)) == 0) + { + return TRUE; + } + + if (common->transform == NULL) + common->transform = malloc (sizeof (pixman_transform_t)); + + if (common->transform == NULL) + { + result = FALSE; + + goto out; + } + + memcpy (common->transform, transform, sizeof(pixman_transform_t)); + + result = TRUE; + +out: + image_property_changed (image); + + return result; +} + +PIXMAN_EXPORT void +pixman_image_set_repeat (pixman_image_t *image, + pixman_repeat_t repeat) +{ + if (image->common.repeat == repeat) + return; + + image->common.repeat = repeat; + + image_property_changed (image); +} + +PIXMAN_EXPORT pixman_bool_t +pixman_image_set_filter (pixman_image_t * image, + pixman_filter_t filter, + const pixman_fixed_t *params, + int n_params) +{ + image_common_t *common = (image_common_t *)image; + pixman_fixed_t *new_params; + + if (params == common->filter_params && filter == common->filter) + return TRUE; + + if (filter == PIXMAN_FILTER_SEPARABLE_CONVOLUTION) + { + int width = pixman_fixed_to_int (params[0]); + int height = pixman_fixed_to_int (params[1]); + int x_phase_bits = pixman_fixed_to_int (params[2]); + int y_phase_bits = pixman_fixed_to_int (params[3]); + int n_x_phases = (1 << x_phase_bits); + int n_y_phases = (1 << y_phase_bits); + + return_val_if_fail ( + n_params == 4 + n_x_phases * width + n_y_phases * height, FALSE); + } + + new_params = NULL; + if (params) + { + new_params = pixman_malloc_ab (n_params, sizeof (pixman_fixed_t)); + if (!new_params) + return FALSE; + + memcpy (new_params, + params, n_params * sizeof (pixman_fixed_t)); + } + + common->filter = filter; + + if (common->filter_params) + free (common->filter_params); + + common->filter_params = new_params; + common->n_filter_params = n_params; + + image_property_changed (image); + return TRUE; +} + +PIXMAN_EXPORT void +pixman_image_set_source_clipping (pixman_image_t *image, + pixman_bool_t clip_sources) +{ + if (image->common.clip_sources == clip_sources) + return; + + image->common.clip_sources = clip_sources; + + image_property_changed (image); +} + +/* Unlike all the other property setters, this function does not + * copy the content of indexed. Doing this copying is simply + * way, way too expensive. + */ +PIXMAN_EXPORT void +pixman_image_set_indexed (pixman_image_t * image, + const pixman_indexed_t *indexed) +{ + bits_image_t *bits = (bits_image_t *)image; + + if (bits->indexed == indexed) + return; + + bits->indexed = indexed; + + image_property_changed (image); +} + +PIXMAN_EXPORT void +pixman_image_set_alpha_map (pixman_image_t *image, + pixman_image_t *alpha_map, + int16_t x, + int16_t y) +{ + image_common_t *common = (image_common_t *)image; + + return_if_fail (!alpha_map || alpha_map->type == BITS); + + if (alpha_map && common->alpha_count > 0) + { + /* If this image is being used as an alpha map itself, + * then you can't give it an alpha map of its own. + */ + return; + } + + if (alpha_map && alpha_map->common.alpha_map) + { + /* If the image has an alpha map of its own, + * then it can't be used as an alpha map itself + */ + return; + } + + if (common->alpha_map != (bits_image_t *)alpha_map) + { + if (common->alpha_map) + { + common->alpha_map->common.alpha_count--; + + pixman_image_unref ((pixman_image_t *)common->alpha_map); + } + + if (alpha_map) + { + common->alpha_map = (bits_image_t *)pixman_image_ref (alpha_map); + + common->alpha_map->common.alpha_count++; + } + else + { + common->alpha_map = NULL; + } + } + + common->alpha_origin_x = x; + common->alpha_origin_y = y; + + image_property_changed (image); +} + +PIXMAN_EXPORT void +pixman_image_set_component_alpha (pixman_image_t *image, + pixman_bool_t component_alpha) +{ + if (image->common.component_alpha == component_alpha) + return; + + image->common.component_alpha = component_alpha; + + image_property_changed (image); +} + +PIXMAN_EXPORT pixman_bool_t +pixman_image_get_component_alpha (pixman_image_t *image) +{ + return image->common.component_alpha; +} + +PIXMAN_EXPORT void +pixman_image_set_accessors (pixman_image_t * image, + pixman_read_memory_func_t read_func, + pixman_write_memory_func_t write_func) +{ + return_if_fail (image != NULL); + + if (image->type == BITS) + { + image->bits.read_func = read_func; + image->bits.write_func = write_func; + + image_property_changed (image); + } +} + +PIXMAN_EXPORT uint32_t * +pixman_image_get_data (pixman_image_t *image) +{ + if (image->type == BITS) + return image->bits.bits; + + return NULL; +} + +PIXMAN_EXPORT int +pixman_image_get_width (pixman_image_t *image) +{ + if (image->type == BITS) + return image->bits.width; + + return 0; +} + +PIXMAN_EXPORT int +pixman_image_get_height (pixman_image_t *image) +{ + if (image->type == BITS) + return image->bits.height; + + return 0; +} + +PIXMAN_EXPORT int +pixman_image_get_stride (pixman_image_t *image) +{ + if (image->type == BITS) + return image->bits.rowstride * (int) sizeof (uint32_t); + + return 0; +} + +PIXMAN_EXPORT int +pixman_image_get_depth (pixman_image_t *image) +{ + if (image->type == BITS) + return PIXMAN_FORMAT_DEPTH (image->bits.format); + + return 0; +} + +PIXMAN_EXPORT pixman_format_code_t +pixman_image_get_format (pixman_image_t *image) +{ + if (image->type == BITS) + return image->bits.format; + + return PIXMAN_null; +} + +uint32_t +_pixman_image_get_solid (pixman_implementation_t *imp, + pixman_image_t * image, + pixman_format_code_t format) +{ + uint32_t result; + + if (image->type == SOLID) + { + result = image->solid.color_32; + } + else if (image->type == BITS) + { + if (image->bits.format == PIXMAN_a8r8g8b8) + result = image->bits.bits[0]; + else if (image->bits.format == PIXMAN_x8r8g8b8) + result = image->bits.bits[0] | 0xff000000; + else if (image->bits.format == PIXMAN_a8) + result = (*(uint8_t *)image->bits.bits) << 24; + else + goto otherwise; + } + else + { + pixman_iter_t iter; + + otherwise: + _pixman_implementation_iter_init ( + imp, &iter, image, 0, 0, 1, 1, + (uint8_t *)&result, + ITER_NARROW | ITER_SRC, image->common.flags); + + result = *iter.get_scanline (&iter, NULL); + + if (iter.fini) + iter.fini (&iter); + } + + /* If necessary, convert RGB <--> BGR. */ + if (PIXMAN_FORMAT_TYPE (format) != PIXMAN_TYPE_ARGB + && PIXMAN_FORMAT_TYPE (format) != PIXMAN_TYPE_ARGB_SRGB) + { + result = (((result & 0xff000000) >> 0) | + ((result & 0x00ff0000) >> 16) | + ((result & 0x0000ff00) >> 0) | + ((result & 0x000000ff) << 16)); + } + + return result; +} diff --git a/src/pixman/pixman/pixman-implementation.c b/src/pixman/pixman/pixman-implementation.c new file mode 100644 index 0000000..5884054 --- /dev/null +++ b/src/pixman/pixman/pixman-implementation.c @@ -0,0 +1,401 @@ +/* + * Copyright © 2009 Red Hat, Inc. + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of Red Hat not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. Red Hat makes no representations about the + * suitability of this software for any purpose. It is provided "as is" + * without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif +#include <stdlib.h> +#include "pixman-private.h" + +pixman_implementation_t * +_pixman_implementation_create (pixman_implementation_t *fallback, + const pixman_fast_path_t *fast_paths) +{ + pixman_implementation_t *imp; + + assert (fast_paths); + + if ((imp = malloc (sizeof (pixman_implementation_t)))) + { + pixman_implementation_t *d; + + memset (imp, 0, sizeof *imp); + + imp->fallback = fallback; + imp->fast_paths = fast_paths; + + /* Make sure the whole fallback chain has the right toplevel */ + for (d = imp; d != NULL; d = d->fallback) + d->toplevel = imp; + } + + return imp; +} + +#define N_CACHED_FAST_PATHS 8 + +typedef struct +{ + struct + { + pixman_implementation_t * imp; + pixman_fast_path_t fast_path; + } cache [N_CACHED_FAST_PATHS]; +} cache_t; + +PIXMAN_DEFINE_THREAD_LOCAL (cache_t, fast_path_cache); + +static void +dummy_composite_rect (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ +} + +void +_pixman_implementation_lookup_composite (pixman_implementation_t *toplevel, + pixman_op_t op, + pixman_format_code_t src_format, + uint32_t src_flags, + pixman_format_code_t mask_format, + uint32_t mask_flags, + pixman_format_code_t dest_format, + uint32_t dest_flags, + pixman_implementation_t **out_imp, + pixman_composite_func_t *out_func) +{ + pixman_implementation_t *imp; + cache_t *cache; + int i; + + /* Check cache for fast paths */ + cache = PIXMAN_GET_THREAD_LOCAL (fast_path_cache); + + for (i = 0; i < N_CACHED_FAST_PATHS; ++i) + { + const pixman_fast_path_t *info = &(cache->cache[i].fast_path); + + /* Note that we check for equality here, not whether + * the cached fast path matches. This is to prevent + * us from selecting an overly general fast path + * when a more specific one would work. + */ + if (info->op == op && + info->src_format == src_format && + info->mask_format == mask_format && + info->dest_format == dest_format && + info->src_flags == src_flags && + info->mask_flags == mask_flags && + info->dest_flags == dest_flags && + info->func) + { + *out_imp = cache->cache[i].imp; + *out_func = cache->cache[i].fast_path.func; + + goto update_cache; + } + } + + for (imp = toplevel; imp != NULL; imp = imp->fallback) + { + const pixman_fast_path_t *info = imp->fast_paths; + + while (info->op != PIXMAN_OP_NONE) + { + if ((info->op == op || info->op == PIXMAN_OP_any) && + /* Formats */ + ((info->src_format == src_format) || + (info->src_format == PIXMAN_any)) && + ((info->mask_format == mask_format) || + (info->mask_format == PIXMAN_any)) && + ((info->dest_format == dest_format) || + (info->dest_format == PIXMAN_any)) && + /* Flags */ + (info->src_flags & src_flags) == info->src_flags && + (info->mask_flags & mask_flags) == info->mask_flags && + (info->dest_flags & dest_flags) == info->dest_flags) + { + *out_imp = imp; + *out_func = info->func; + + /* Set i to the last spot in the cache so that the + * move-to-front code below will work + */ + i = N_CACHED_FAST_PATHS - 1; + + goto update_cache; + } + + ++info; + } + } + + /* We should never reach this point */ + _pixman_log_error ( + FUNC, + "No composite function found\n" + "\n" + "The most likely cause of this is that this system has issues with\n" + "thread local storage\n"); + + *out_imp = NULL; + *out_func = dummy_composite_rect; + return; + +update_cache: + if (i) + { + while (i--) + cache->cache[i + 1] = cache->cache[i]; + + cache->cache[0].imp = *out_imp; + cache->cache[0].fast_path.op = op; + cache->cache[0].fast_path.src_format = src_format; + cache->cache[0].fast_path.src_flags = src_flags; + cache->cache[0].fast_path.mask_format = mask_format; + cache->cache[0].fast_path.mask_flags = mask_flags; + cache->cache[0].fast_path.dest_format = dest_format; + cache->cache[0].fast_path.dest_flags = dest_flags; + cache->cache[0].fast_path.func = *out_func; + } +} + +static void +dummy_combine (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * pd, + const uint32_t * ps, + const uint32_t * pm, + int w) +{ +} + +pixman_combine_32_func_t +_pixman_implementation_lookup_combiner (pixman_implementation_t *imp, + pixman_op_t op, + pixman_bool_t component_alpha, + pixman_bool_t narrow) +{ + while (imp) + { + pixman_combine_32_func_t f = NULL; + + switch ((narrow << 1) | component_alpha) + { + case 0: /* not narrow, not component alpha */ + f = (pixman_combine_32_func_t)imp->combine_float[op]; + break; + + case 1: /* not narrow, component_alpha */ + f = (pixman_combine_32_func_t)imp->combine_float_ca[op]; + break; + + case 2: /* narrow, not component alpha */ + f = imp->combine_32[op]; + break; + + case 3: /* narrow, component_alpha */ + f = imp->combine_32_ca[op]; + break; + } + + if (f) + return f; + + imp = imp->fallback; + } + + /* We should never reach this point */ + _pixman_log_error (FUNC, "No known combine function\n"); + return dummy_combine; +} + +pixman_bool_t +_pixman_implementation_blt (pixman_implementation_t * imp, + uint32_t * src_bits, + uint32_t * dst_bits, + int src_stride, + int dst_stride, + int src_bpp, + int dst_bpp, + int src_x, + int src_y, + int dest_x, + int dest_y, + int width, + int height) +{ + while (imp) + { + if (imp->blt && + (*imp->blt) (imp, src_bits, dst_bits, src_stride, dst_stride, + src_bpp, dst_bpp, src_x, src_y, dest_x, dest_y, + width, height)) + { + return TRUE; + } + + imp = imp->fallback; + } + + return FALSE; +} + +pixman_bool_t +_pixman_implementation_fill (pixman_implementation_t *imp, + uint32_t * bits, + int stride, + int bpp, + int x, + int y, + int width, + int height, + uint32_t filler) +{ + while (imp) + { + if (imp->fill && + ((*imp->fill) (imp, bits, stride, bpp, x, y, width, height, filler))) + { + return TRUE; + } + + imp = imp->fallback; + } + + return FALSE; +} + +static uint32_t * +get_scanline_null (pixman_iter_t *iter, const uint32_t *mask) +{ + return NULL; +} + +void +_pixman_implementation_iter_init (pixman_implementation_t *imp, + pixman_iter_t *iter, + pixman_image_t *image, + int x, + int y, + int width, + int height, + uint8_t *buffer, + iter_flags_t iter_flags, + uint32_t image_flags) +{ + pixman_format_code_t format; + + iter->image = image; + iter->buffer = (uint32_t *)buffer; + iter->x = x; + iter->y = y; + iter->width = width; + iter->height = height; + iter->iter_flags = iter_flags; + iter->image_flags = image_flags; + iter->fini = NULL; + + if (!iter->image) + { + iter->get_scanline = get_scanline_null; + return; + } + + format = iter->image->common.extended_format_code; + + while (imp) + { + if (imp->iter_info) + { + const pixman_iter_info_t *info; + + for (info = imp->iter_info; info->format != PIXMAN_null; ++info) + { + if ((info->format == PIXMAN_any || info->format == format) && + (info->image_flags & image_flags) == info->image_flags && + (info->iter_flags & iter_flags) == info->iter_flags) + { + iter->get_scanline = info->get_scanline; + iter->write_back = info->write_back; + + if (info->initializer) + info->initializer (iter, info); + return; + } + } + } + + imp = imp->fallback; + } +} + +pixman_bool_t +_pixman_disabled (const char *name) +{ + const char *env; + + if ((env = getenv ("PIXMAN_DISABLE"))) + { + do + { + const char *end; + int len; + + if ((end = strchr (env, ' '))) + len = end - env; + else + len = strlen (env); + + if (strlen (name) == len && strncmp (name, env, len) == 0) + { + printf ("pixman: Disabled %s implementation\n", name); + return TRUE; + } + + env += len; + } + while (*env++); + } + + return FALSE; +} + +pixman_implementation_t * +_pixman_choose_implementation (void) +{ + pixman_implementation_t *imp; + + imp = _pixman_implementation_create_general(); + + if (!_pixman_disabled ("fast")) + imp = _pixman_implementation_create_fast_path (imp); + + imp = _pixman_x86_get_implementations (imp); + imp = _pixman_arm_get_implementations (imp); + imp = _pixman_ppc_get_implementations (imp); + imp = _pixman_mips_get_implementations (imp); + + imp = _pixman_implementation_create_noop (imp); + + return imp; +} diff --git a/src/pixman/pixman/pixman-inlines.h b/src/pixman/pixman/pixman-inlines.h new file mode 100644 index 0000000..dd1c2f1 --- /dev/null +++ b/src/pixman/pixman/pixman-inlines.h @@ -0,0 +1,1339 @@ +/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */ +/* + * Copyright © 2000 SuSE, Inc. + * Copyright © 2007 Red Hat, Inc. + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of SuSE not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. SuSE makes no representations about the + * suitability of this software for any purpose. It is provided "as is" + * without express or implied warranty. + * + * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE + * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Author: Keith Packard, SuSE, Inc. + */ + +#ifndef PIXMAN_FAST_PATH_H__ +#define PIXMAN_FAST_PATH_H__ + +#include "pixman-private.h" + +#define PIXMAN_REPEAT_COVER -1 + +/* Flags describing input parameters to fast path macro template. + * Turning on some flag values may indicate that + * "some property X is available so template can use this" or + * "some property X should be handled by template". + * + * FLAG_HAVE_SOLID_MASK + * Input mask is solid so template should handle this. + * + * FLAG_HAVE_NON_SOLID_MASK + * Input mask is bits mask so template should handle this. + * + * FLAG_HAVE_SOLID_MASK and FLAG_HAVE_NON_SOLID_MASK are mutually + * exclusive. (It's not allowed to turn both flags on) + */ +#define FLAG_NONE (0) +#define FLAG_HAVE_SOLID_MASK (1 << 1) +#define FLAG_HAVE_NON_SOLID_MASK (1 << 2) + +/* To avoid too short repeated scanline function calls, extend source + * scanlines having width less than below constant value. + */ +#define REPEAT_NORMAL_MIN_WIDTH 64 + +static force_inline pixman_bool_t +repeat (pixman_repeat_t repeat, int *c, int size) +{ + if (repeat == PIXMAN_REPEAT_NONE) + { + if (*c < 0 || *c >= size) + return FALSE; + } + else if (repeat == PIXMAN_REPEAT_NORMAL) + { + while (*c >= size) + *c -= size; + while (*c < 0) + *c += size; + } + else if (repeat == PIXMAN_REPEAT_PAD) + { + *c = CLIP (*c, 0, size - 1); + } + else /* REFLECT */ + { + *c = MOD (*c, size * 2); + if (*c >= size) + *c = size * 2 - *c - 1; + } + return TRUE; +} + +static force_inline int +pixman_fixed_to_bilinear_weight (pixman_fixed_t x) +{ + return (x >> (16 - BILINEAR_INTERPOLATION_BITS)) & + ((1 << BILINEAR_INTERPOLATION_BITS) - 1); +} + +#if BILINEAR_INTERPOLATION_BITS <= 4 +/* Inspired by Filter_32_opaque from Skia */ +static force_inline uint32_t +bilinear_interpolation (uint32_t tl, uint32_t tr, + uint32_t bl, uint32_t br, + int distx, int disty) +{ + int distxy, distxiy, distixy, distixiy; + uint32_t lo, hi; + + distx <<= (4 - BILINEAR_INTERPOLATION_BITS); + disty <<= (4 - BILINEAR_INTERPOLATION_BITS); + + distxy = distx * disty; + distxiy = (distx << 4) - distxy; /* distx * (16 - disty) */ + distixy = (disty << 4) - distxy; /* disty * (16 - distx) */ + distixiy = + 16 * 16 - (disty << 4) - + (distx << 4) + distxy; /* (16 - distx) * (16 - disty) */ + + lo = (tl & 0xff00ff) * distixiy; + hi = ((tl >> 8) & 0xff00ff) * distixiy; + + lo += (tr & 0xff00ff) * distxiy; + hi += ((tr >> 8) & 0xff00ff) * distxiy; + + lo += (bl & 0xff00ff) * distixy; + hi += ((bl >> 8) & 0xff00ff) * distixy; + + lo += (br & 0xff00ff) * distxy; + hi += ((br >> 8) & 0xff00ff) * distxy; + + return ((lo >> 8) & 0xff00ff) | (hi & ~0xff00ff); +} + +#else +#if SIZEOF_LONG > 4 + +static force_inline uint32_t +bilinear_interpolation (uint32_t tl, uint32_t tr, + uint32_t bl, uint32_t br, + int distx, int disty) +{ + uint64_t distxy, distxiy, distixy, distixiy; + uint64_t tl64, tr64, bl64, br64; + uint64_t f, r; + + distx <<= (8 - BILINEAR_INTERPOLATION_BITS); + disty <<= (8 - BILINEAR_INTERPOLATION_BITS); + + distxy = distx * disty; + distxiy = distx * (256 - disty); + distixy = (256 - distx) * disty; + distixiy = (256 - distx) * (256 - disty); + + /* Alpha and Blue */ + tl64 = tl & 0xff0000ff; + tr64 = tr & 0xff0000ff; + bl64 = bl & 0xff0000ff; + br64 = br & 0xff0000ff; + + f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy; + r = f & 0x0000ff0000ff0000ull; + + /* Red and Green */ + tl64 = tl; + tl64 = ((tl64 << 16) & 0x000000ff00000000ull) | (tl64 & 0x0000ff00ull); + + tr64 = tr; + tr64 = ((tr64 << 16) & 0x000000ff00000000ull) | (tr64 & 0x0000ff00ull); + + bl64 = bl; + bl64 = ((bl64 << 16) & 0x000000ff00000000ull) | (bl64 & 0x0000ff00ull); + + br64 = br; + br64 = ((br64 << 16) & 0x000000ff00000000ull) | (br64 & 0x0000ff00ull); + + f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy; + r |= ((f >> 16) & 0x000000ff00000000ull) | (f & 0xff000000ull); + + return (uint32_t)(r >> 16); +} + +#else + +static force_inline uint32_t +bilinear_interpolation (uint32_t tl, uint32_t tr, + uint32_t bl, uint32_t br, + int distx, int disty) +{ + int distxy, distxiy, distixy, distixiy; + uint32_t f, r; + + distx <<= (8 - BILINEAR_INTERPOLATION_BITS); + disty <<= (8 - BILINEAR_INTERPOLATION_BITS); + + distxy = distx * disty; + distxiy = (distx << 8) - distxy; /* distx * (256 - disty) */ + distixy = (disty << 8) - distxy; /* disty * (256 - distx) */ + distixiy = + 256 * 256 - (disty << 8) - + (distx << 8) + distxy; /* (256 - distx) * (256 - disty) */ + + /* Blue */ + r = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy + + (bl & 0x000000ff) * distixy + (br & 0x000000ff) * distxy; + + /* Green */ + f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy + + (bl & 0x0000ff00) * distixy + (br & 0x0000ff00) * distxy; + r |= f & 0xff000000; + + tl >>= 16; + tr >>= 16; + bl >>= 16; + br >>= 16; + r >>= 16; + + /* Red */ + f = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy + + (bl & 0x000000ff) * distixy + (br & 0x000000ff) * distxy; + r |= f & 0x00ff0000; + + /* Alpha */ + f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy + + (bl & 0x0000ff00) * distixy + (br & 0x0000ff00) * distxy; + r |= f & 0xff000000; + + return r; +} + +#endif +#endif // BILINEAR_INTERPOLATION_BITS <= 4 + +/* + * For each scanline fetched from source image with PAD repeat: + * - calculate how many pixels need to be padded on the left side + * - calculate how many pixels need to be padded on the right side + * - update width to only count pixels which are fetched from the image + * All this information is returned via 'width', 'left_pad', 'right_pad' + * arguments. The code is assuming that 'unit_x' is positive. + * + * Note: 64-bit math is used in order to avoid potential overflows, which + * is probably excessive in many cases. This particular function + * may need its own correctness test and performance tuning. + */ +static force_inline void +pad_repeat_get_scanline_bounds (int32_t source_image_width, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + int32_t * width, + int32_t * left_pad, + int32_t * right_pad) +{ + int64_t max_vx = (int64_t) source_image_width << 16; + int64_t tmp; + if (vx < 0) + { + tmp = ((int64_t) unit_x - 1 - vx) / unit_x; + if (tmp > *width) + { + *left_pad = *width; + *width = 0; + } + else + { + *left_pad = (int32_t) tmp; + *width -= (int32_t) tmp; + } + } + else + { + *left_pad = 0; + } + tmp = ((int64_t) unit_x - 1 - vx + max_vx) / unit_x - *left_pad; + if (tmp < 0) + { + *right_pad = *width; + *width = 0; + } + else if (tmp >= *width) + { + *right_pad = 0; + } + else + { + *right_pad = *width - (int32_t) tmp; + *width = (int32_t) tmp; + } +} + +/* A macroified version of specialized nearest scalers for some + * common 8888 and 565 formats. It supports SRC and OVER ops. + * + * There are two repeat versions, one that handles repeat normal, + * and one without repeat handling that only works if the src region + * used is completely covered by the pre-repeated source samples. + * + * The loops are unrolled to process two pixels per iteration for better + * performance on most CPU architectures (superscalar processors + * can issue several operations simultaneously, other processors can hide + * instructions latencies by pipelining operations). Unrolling more + * does not make much sense because the compiler will start running out + * of spare registers soon. + */ + +#define GET_8888_ALPHA(s) ((s) >> 24) + /* This is not actually used since we don't have an OVER with + 565 source, but it is needed to build. */ +#define GET_0565_ALPHA(s) 0xff +#define GET_x888_ALPHA(s) 0xff + +#define FAST_NEAREST_SCANLINE(scanline_func_name, SRC_FORMAT, DST_FORMAT, \ + src_type_t, dst_type_t, OP, repeat_mode) \ +static force_inline void \ +scanline_func_name (dst_type_t *dst, \ + const src_type_t *src, \ + int32_t w, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x, \ + pixman_fixed_t src_width_fixed, \ + pixman_bool_t fully_transparent_src) \ +{ \ + uint32_t d; \ + src_type_t s1, s2; \ + uint8_t a1, a2; \ + int x1, x2; \ + \ + if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER && fully_transparent_src) \ + return; \ + \ + if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER) \ + abort(); \ + \ + while ((w -= 2) >= 0) \ + { \ + x1 = pixman_fixed_to_int (vx); \ + vx += unit_x; \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ + { \ + /* This works because we know that unit_x is positive */ \ + while (vx >= 0) \ + vx -= src_width_fixed; \ + } \ + s1 = *(src + x1); \ + \ + x2 = pixman_fixed_to_int (vx); \ + vx += unit_x; \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ + { \ + /* This works because we know that unit_x is positive */ \ + while (vx >= 0) \ + vx -= src_width_fixed; \ + } \ + s2 = *(src + x2); \ + \ + if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \ + { \ + a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \ + a2 = GET_ ## SRC_FORMAT ## _ALPHA(s2); \ + \ + if (a1 == 0xff) \ + { \ + *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1); \ + } \ + else if (s1) \ + { \ + d = convert_ ## DST_FORMAT ## _to_8888 (*dst); \ + s1 = convert_ ## SRC_FORMAT ## _to_8888 (s1); \ + a1 ^= 0xff; \ + UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \ + *dst = convert_8888_to_ ## DST_FORMAT (d); \ + } \ + dst++; \ + \ + if (a2 == 0xff) \ + { \ + *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s2); \ + } \ + else if (s2) \ + { \ + d = convert_## DST_FORMAT ## _to_8888 (*dst); \ + s2 = convert_## SRC_FORMAT ## _to_8888 (s2); \ + a2 ^= 0xff; \ + UN8x4_MUL_UN8_ADD_UN8x4 (d, a2, s2); \ + *dst = convert_8888_to_ ## DST_FORMAT (d); \ + } \ + dst++; \ + } \ + else /* PIXMAN_OP_SRC */ \ + { \ + *dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1); \ + *dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s2); \ + } \ + } \ + \ + if (w & 1) \ + { \ + x1 = pixman_fixed_to_int (vx); \ + s1 = *(src + x1); \ + \ + if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \ + { \ + a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \ + \ + if (a1 == 0xff) \ + { \ + *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1); \ + } \ + else if (s1) \ + { \ + d = convert_## DST_FORMAT ## _to_8888 (*dst); \ + s1 = convert_ ## SRC_FORMAT ## _to_8888 (s1); \ + a1 ^= 0xff; \ + UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \ + *dst = convert_8888_to_ ## DST_FORMAT (d); \ + } \ + dst++; \ + } \ + else /* PIXMAN_OP_SRC */ \ + { \ + *dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1); \ + } \ + } \ +} + +#define FAST_NEAREST_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t, \ + dst_type_t, repeat_mode, have_mask, mask_is_solid) \ +static void \ +fast_composite_scaled_nearest ## scale_func_name (pixman_implementation_t *imp, \ + pixman_composite_info_t *info) \ +{ \ + PIXMAN_COMPOSITE_ARGS (info); \ + dst_type_t *dst_line; \ + mask_type_t *mask_line; \ + src_type_t *src_first_line; \ + int y; \ + pixman_fixed_t src_width_fixed = pixman_int_to_fixed (src_image->bits.width); \ + pixman_fixed_t max_vy; \ + pixman_vector_t v; \ + pixman_fixed_t vx, vy; \ + pixman_fixed_t unit_x, unit_y; \ + int32_t left_pad, right_pad; \ + \ + src_type_t *src; \ + dst_type_t *dst; \ + mask_type_t solid_mask; \ + const mask_type_t *mask = &solid_mask; \ + int src_stride, mask_stride, dst_stride; \ + \ + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1); \ + if (have_mask) \ + { \ + if (mask_is_solid) \ + solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); \ + else \ + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t, \ + mask_stride, mask_line, 1); \ + } \ + /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \ + * transformed from destination space to source space */ \ + PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \ + \ + /* reference point is the center of the pixel */ \ + v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \ + v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \ + v.vector[2] = pixman_fixed_1; \ + \ + if (!pixman_transform_point_3d (src_image->common.transform, &v)) \ + return; \ + \ + unit_x = src_image->common.transform->matrix[0][0]; \ + unit_y = src_image->common.transform->matrix[1][1]; \ + \ + /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */ \ + v.vector[0] -= pixman_fixed_e; \ + v.vector[1] -= pixman_fixed_e; \ + \ + vx = v.vector[0]; \ + vy = v.vector[1]; \ + \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ + { \ + max_vy = pixman_int_to_fixed (src_image->bits.height); \ + \ + /* Clamp repeating positions inside the actual samples */ \ + repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); \ + repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \ + } \ + \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD || \ + PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \ + { \ + pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, \ + &width, &left_pad, &right_pad); \ + vx += left_pad * unit_x; \ + } \ + \ + while (--height >= 0) \ + { \ + dst = dst_line; \ + dst_line += dst_stride; \ + if (have_mask && !mask_is_solid) \ + { \ + mask = mask_line; \ + mask_line += mask_stride; \ + } \ + \ + y = pixman_fixed_to_int (vy); \ + vy += unit_y; \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ + repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \ + { \ + repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height); \ + src = src_first_line + src_stride * y; \ + if (left_pad > 0) \ + { \ + scanline_func (mask, dst, \ + src + src_image->bits.width - src_image->bits.width + 1, \ + left_pad, -pixman_fixed_e, 0, src_width_fixed, FALSE); \ + } \ + if (width > 0) \ + { \ + scanline_func (mask + (mask_is_solid ? 0 : left_pad), \ + dst + left_pad, src + src_image->bits.width, width, \ + vx - src_width_fixed, unit_x, src_width_fixed, FALSE); \ + } \ + if (right_pad > 0) \ + { \ + scanline_func (mask + (mask_is_solid ? 0 : left_pad + width), \ + dst + left_pad + width, src + src_image->bits.width, \ + right_pad, -pixman_fixed_e, 0, src_width_fixed, FALSE); \ + } \ + } \ + else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \ + { \ + static const src_type_t zero[1] = { 0 }; \ + if (y < 0 || y >= src_image->bits.height) \ + { \ + scanline_func (mask, dst, zero + 1, left_pad + width + right_pad, \ + -pixman_fixed_e, 0, src_width_fixed, TRUE); \ + continue; \ + } \ + src = src_first_line + src_stride * y; \ + if (left_pad > 0) \ + { \ + scanline_func (mask, dst, zero + 1, left_pad, \ + -pixman_fixed_e, 0, src_width_fixed, TRUE); \ + } \ + if (width > 0) \ + { \ + scanline_func (mask + (mask_is_solid ? 0 : left_pad), \ + dst + left_pad, src + src_image->bits.width, width, \ + vx - src_width_fixed, unit_x, src_width_fixed, FALSE); \ + } \ + if (right_pad > 0) \ + { \ + scanline_func (mask + (mask_is_solid ? 0 : left_pad + width), \ + dst + left_pad + width, zero + 1, right_pad, \ + -pixman_fixed_e, 0, src_width_fixed, TRUE); \ + } \ + } \ + else \ + { \ + src = src_first_line + src_stride * y; \ + scanline_func (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed, \ + unit_x, src_width_fixed, FALSE); \ + } \ + } \ +} + +/* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */ +#define FAST_NEAREST_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t, \ + dst_type_t, repeat_mode, have_mask, mask_is_solid) \ + FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t, \ + dst_type_t, repeat_mode, have_mask, mask_is_solid) + +#define FAST_NEAREST_MAINLOOP_NOMASK(scale_func_name, scanline_func, src_type_t, dst_type_t, \ + repeat_mode) \ + static force_inline void \ + scanline_func##scale_func_name##_wrapper ( \ + const uint8_t *mask, \ + dst_type_t *dst, \ + const src_type_t *src, \ + int32_t w, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x, \ + pixman_fixed_t max_vx, \ + pixman_bool_t fully_transparent_src) \ + { \ + scanline_func (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); \ + } \ + FAST_NEAREST_MAINLOOP_INT (scale_func_name, scanline_func##scale_func_name##_wrapper, \ + src_type_t, uint8_t, dst_type_t, repeat_mode, FALSE, FALSE) + +#define FAST_NEAREST_MAINLOOP(scale_func_name, scanline_func, src_type_t, dst_type_t, \ + repeat_mode) \ + FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name, scanline_func, src_type_t, \ + dst_type_t, repeat_mode) + +#define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT, \ + src_type_t, dst_type_t, OP, repeat_mode) \ + FAST_NEAREST_SCANLINE(scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \ + SRC_FORMAT, DST_FORMAT, src_type_t, dst_type_t, \ + OP, repeat_mode) \ + FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name ## _ ## OP, \ + scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \ + src_type_t, dst_type_t, repeat_mode) + + +#define SCALED_NEAREST_FLAGS \ + (FAST_PATH_SCALE_TRANSFORM | \ + FAST_PATH_NO_ALPHA_MAP | \ + FAST_PATH_NEAREST_FILTER | \ + FAST_PATH_NO_ACCESSORS | \ + FAST_PATH_NARROW_FORMAT) + +#define SIMPLE_NEAREST_FAST_PATH_NORMAL(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_NORMAL_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \ + } + +#define SIMPLE_NEAREST_FAST_PATH_PAD(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_PAD_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \ + } + +#define SIMPLE_NEAREST_FAST_PATH_NONE(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_NONE_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \ + } + +#define SIMPLE_NEAREST_FAST_PATH_COVER(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST, \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \ + } + +#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NORMAL(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_NORMAL_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \ + } + +#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_PAD_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \ + } + +#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_NONE_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \ + } + +#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST, \ + PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \ + } + +#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_NORMAL_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \ + } + +#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_PAD_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \ + } + +#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_NONE_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \ + } + +#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST, \ + PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \ + } + +/* Prefer the use of 'cover' variant, because it is faster */ +#define SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \ + SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \ + SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \ + SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func), \ + SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func) + +#define SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func) \ + SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func), \ + SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func), \ + SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func) + +#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH(op,s,d,func) \ + SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER (op,s,d,func), \ + SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \ + SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD (op,s,d,func) + +/*****************************************************************************/ + +/* + * Identify 5 zones in each scanline for bilinear scaling. Depending on + * whether 2 pixels to be interpolated are fetched from the image itself, + * from the padding area around it or from both image and padding area. + */ +static force_inline void +bilinear_pad_repeat_get_scanline_bounds (int32_t source_image_width, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + int32_t * left_pad, + int32_t * left_tz, + int32_t * width, + int32_t * right_tz, + int32_t * right_pad) +{ + int width1 = *width, left_pad1, right_pad1; + int width2 = *width, left_pad2, right_pad2; + + pad_repeat_get_scanline_bounds (source_image_width, vx, unit_x, + &width1, &left_pad1, &right_pad1); + pad_repeat_get_scanline_bounds (source_image_width, vx + pixman_fixed_1, + unit_x, &width2, &left_pad2, &right_pad2); + + *left_pad = left_pad2; + *left_tz = left_pad1 - left_pad2; + *right_tz = right_pad2 - right_pad1; + *right_pad = right_pad1; + *width -= *left_pad + *left_tz + *right_tz + *right_pad; +} + +/* + * Main loop template for single pass bilinear scaling. It needs to be + * provided with 'scanline_func' which should do the compositing operation. + * The needed function has the following prototype: + * + * scanline_func (dst_type_t * dst, + * const mask_type_ * mask, + * const src_type_t * src_top, + * const src_type_t * src_bottom, + * int32_t width, + * int weight_top, + * int weight_bottom, + * pixman_fixed_t vx, + * pixman_fixed_t unit_x, + * pixman_fixed_t max_vx, + * pixman_bool_t zero_src) + * + * Where: + * dst - destination scanline buffer for storing results + * mask - mask buffer (or single value for solid mask) + * src_top, src_bottom - two source scanlines + * width - number of pixels to process + * weight_top - weight of the top row for interpolation + * weight_bottom - weight of the bottom row for interpolation + * vx - initial position for fetching the first pair of + * pixels from the source buffer + * unit_x - position increment needed to move to the next pair + * of pixels + * max_vx - image size as a fixed point value, can be used for + * implementing NORMAL repeat (when it is supported) + * zero_src - boolean hint variable, which is set to TRUE when + * all source pixels are fetched from zero padding + * zone for NONE repeat + * + * Note: normally the sum of 'weight_top' and 'weight_bottom' is equal to + * BILINEAR_INTERPOLATION_RANGE, but sometimes it may be less than that + * for NONE repeat when handling fuzzy antialiased top or bottom image + * edges. Also both top and bottom weight variables are guaranteed to + * have value, which is less than BILINEAR_INTERPOLATION_RANGE. + * For example, the weights can fit into unsigned byte or be used + * with 8-bit SIMD multiplication instructions for 8-bit interpolation + * precision. + */ +#define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t, \ + dst_type_t, repeat_mode, flags) \ +static void \ +fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp, \ + pixman_composite_info_t *info) \ +{ \ + PIXMAN_COMPOSITE_ARGS (info); \ + dst_type_t *dst_line; \ + mask_type_t *mask_line; \ + src_type_t *src_first_line; \ + int y1, y2; \ + pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */ \ + pixman_vector_t v; \ + pixman_fixed_t vx, vy; \ + pixman_fixed_t unit_x, unit_y; \ + int32_t left_pad, left_tz, right_tz, right_pad; \ + \ + dst_type_t *dst; \ + mask_type_t solid_mask; \ + const mask_type_t *mask = &solid_mask; \ + int src_stride, mask_stride, dst_stride; \ + \ + int src_width; \ + pixman_fixed_t src_width_fixed; \ + int max_x; \ + pixman_bool_t need_src_extension; \ + \ + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1); \ + if (flags & FLAG_HAVE_SOLID_MASK) \ + { \ + solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); \ + mask_stride = 0; \ + } \ + else if (flags & FLAG_HAVE_NON_SOLID_MASK) \ + { \ + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t, \ + mask_stride, mask_line, 1); \ + } \ + \ + /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \ + * transformed from destination space to source space */ \ + PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \ + \ + /* reference point is the center of the pixel */ \ + v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \ + v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \ + v.vector[2] = pixman_fixed_1; \ + \ + if (!pixman_transform_point_3d (src_image->common.transform, &v)) \ + return; \ + \ + unit_x = src_image->common.transform->matrix[0][0]; \ + unit_y = src_image->common.transform->matrix[1][1]; \ + \ + v.vector[0] -= pixman_fixed_1 / 2; \ + v.vector[1] -= pixman_fixed_1 / 2; \ + \ + vy = v.vector[1]; \ + \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD || \ + PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \ + { \ + bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, \ + &left_pad, &left_tz, &width, &right_tz, &right_pad); \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \ + { \ + /* PAD repeat does not need special handling for 'transition zones' and */ \ + /* they can be combined with 'padding zones' safely */ \ + left_pad += left_tz; \ + right_pad += right_tz; \ + left_tz = right_tz = 0; \ + } \ + v.vector[0] += left_pad * unit_x; \ + } \ + \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ + { \ + vx = v.vector[0]; \ + repeat (PIXMAN_REPEAT_NORMAL, &vx, pixman_int_to_fixed(src_image->bits.width)); \ + max_x = pixman_fixed_to_int (vx + (width - 1) * (int64_t)unit_x) + 1; \ + \ + if (src_image->bits.width < REPEAT_NORMAL_MIN_WIDTH) \ + { \ + src_width = 0; \ + \ + while (src_width < REPEAT_NORMAL_MIN_WIDTH && src_width <= max_x) \ + src_width += src_image->bits.width; \ + \ + need_src_extension = TRUE; \ + } \ + else \ + { \ + src_width = src_image->bits.width; \ + need_src_extension = FALSE; \ + } \ + \ + src_width_fixed = pixman_int_to_fixed (src_width); \ + } \ + \ + while (--height >= 0) \ + { \ + int weight1, weight2; \ + dst = dst_line; \ + dst_line += dst_stride; \ + vx = v.vector[0]; \ + if (flags & FLAG_HAVE_NON_SOLID_MASK) \ + { \ + mask = mask_line; \ + mask_line += mask_stride; \ + } \ + \ + y1 = pixman_fixed_to_int (vy); \ + weight2 = pixman_fixed_to_bilinear_weight (vy); \ + if (weight2) \ + { \ + /* both weight1 and weight2 are smaller than BILINEAR_INTERPOLATION_RANGE */ \ + y2 = y1 + 1; \ + weight1 = BILINEAR_INTERPOLATION_RANGE - weight2; \ + } \ + else \ + { \ + /* set both top and bottom row to the same scanline and tweak weights */ \ + y2 = y1; \ + weight1 = weight2 = BILINEAR_INTERPOLATION_RANGE / 2; \ + } \ + vy += unit_y; \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \ + { \ + src_type_t *src1, *src2; \ + src_type_t buf1[2]; \ + src_type_t buf2[2]; \ + repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height); \ + repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height); \ + src1 = src_first_line + src_stride * y1; \ + src2 = src_first_line + src_stride * y2; \ + \ + if (left_pad > 0) \ + { \ + buf1[0] = buf1[1] = src1[0]; \ + buf2[0] = buf2[1] = src2[0]; \ + scanline_func (dst, mask, \ + buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, FALSE); \ + dst += left_pad; \ + if (flags & FLAG_HAVE_NON_SOLID_MASK) \ + mask += left_pad; \ + } \ + if (width > 0) \ + { \ + scanline_func (dst, mask, \ + src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE); \ + dst += width; \ + if (flags & FLAG_HAVE_NON_SOLID_MASK) \ + mask += width; \ + } \ + if (right_pad > 0) \ + { \ + buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; \ + buf2[0] = buf2[1] = src2[src_image->bits.width - 1]; \ + scanline_func (dst, mask, \ + buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, FALSE); \ + } \ + } \ + else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \ + { \ + src_type_t *src1, *src2; \ + src_type_t buf1[2]; \ + src_type_t buf2[2]; \ + /* handle top/bottom zero padding by just setting weights to 0 if needed */ \ + if (y1 < 0) \ + { \ + weight1 = 0; \ + y1 = 0; \ + } \ + if (y1 >= src_image->bits.height) \ + { \ + weight1 = 0; \ + y1 = src_image->bits.height - 1; \ + } \ + if (y2 < 0) \ + { \ + weight2 = 0; \ + y2 = 0; \ + } \ + if (y2 >= src_image->bits.height) \ + { \ + weight2 = 0; \ + y2 = src_image->bits.height - 1; \ + } \ + src1 = src_first_line + src_stride * y1; \ + src2 = src_first_line + src_stride * y2; \ + \ + if (left_pad > 0) \ + { \ + buf1[0] = buf1[1] = 0; \ + buf2[0] = buf2[1] = 0; \ + scanline_func (dst, mask, \ + buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, TRUE); \ + dst += left_pad; \ + if (flags & FLAG_HAVE_NON_SOLID_MASK) \ + mask += left_pad; \ + } \ + if (left_tz > 0) \ + { \ + buf1[0] = 0; \ + buf1[1] = src1[0]; \ + buf2[0] = 0; \ + buf2[1] = src2[0]; \ + scanline_func (dst, mask, \ + buf1, buf2, left_tz, weight1, weight2, \ + pixman_fixed_frac (vx), unit_x, 0, FALSE); \ + dst += left_tz; \ + if (flags & FLAG_HAVE_NON_SOLID_MASK) \ + mask += left_tz; \ + vx += left_tz * unit_x; \ + } \ + if (width > 0) \ + { \ + scanline_func (dst, mask, \ + src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE); \ + dst += width; \ + if (flags & FLAG_HAVE_NON_SOLID_MASK) \ + mask += width; \ + vx += width * unit_x; \ + } \ + if (right_tz > 0) \ + { \ + buf1[0] = src1[src_image->bits.width - 1]; \ + buf1[1] = 0; \ + buf2[0] = src2[src_image->bits.width - 1]; \ + buf2[1] = 0; \ + scanline_func (dst, mask, \ + buf1, buf2, right_tz, weight1, weight2, \ + pixman_fixed_frac (vx), unit_x, 0, FALSE); \ + dst += right_tz; \ + if (flags & FLAG_HAVE_NON_SOLID_MASK) \ + mask += right_tz; \ + } \ + if (right_pad > 0) \ + { \ + buf1[0] = buf1[1] = 0; \ + buf2[0] = buf2[1] = 0; \ + scanline_func (dst, mask, \ + buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, TRUE); \ + } \ + } \ + else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ + { \ + int32_t num_pixels; \ + int32_t width_remain; \ + src_type_t * src_line_top; \ + src_type_t * src_line_bottom; \ + src_type_t buf1[2]; \ + src_type_t buf2[2]; \ + src_type_t extended_src_line0[REPEAT_NORMAL_MIN_WIDTH*2]; \ + src_type_t extended_src_line1[REPEAT_NORMAL_MIN_WIDTH*2]; \ + int i, j; \ + \ + repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); \ + repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); \ + src_line_top = src_first_line + src_stride * y1; \ + src_line_bottom = src_first_line + src_stride * y2; \ + \ + if (need_src_extension) \ + { \ + for (i=0; i<src_width;) \ + { \ + for (j=0; j<src_image->bits.width; j++, i++) \ + { \ + extended_src_line0[i] = src_line_top[j]; \ + extended_src_line1[i] = src_line_bottom[j]; \ + } \ + } \ + \ + src_line_top = &extended_src_line0[0]; \ + src_line_bottom = &extended_src_line1[0]; \ + } \ + \ + /* Top & Bottom wrap around buffer */ \ + buf1[0] = src_line_top[src_width - 1]; \ + buf1[1] = src_line_top[0]; \ + buf2[0] = src_line_bottom[src_width - 1]; \ + buf2[1] = src_line_bottom[0]; \ + \ + width_remain = width; \ + \ + while (width_remain > 0) \ + { \ + /* We use src_width_fixed because it can make vx in original source range */ \ + repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); \ + \ + /* Wrap around part */ \ + if (pixman_fixed_to_int (vx) == src_width - 1) \ + { \ + /* for positive unit_x \ + * num_pixels = max(n) + 1, where vx + n*unit_x < src_width_fixed \ + * \ + * vx is in range [0, src_width_fixed - pixman_fixed_e] \ + * So we are safe from overflow. \ + */ \ + num_pixels = ((src_width_fixed - vx - pixman_fixed_e) / unit_x) + 1; \ + \ + if (num_pixels > width_remain) \ + num_pixels = width_remain; \ + \ + scanline_func (dst, mask, buf1, buf2, num_pixels, \ + weight1, weight2, pixman_fixed_frac(vx), \ + unit_x, src_width_fixed, FALSE); \ + \ + width_remain -= num_pixels; \ + vx += num_pixels * unit_x; \ + dst += num_pixels; \ + \ + if (flags & FLAG_HAVE_NON_SOLID_MASK) \ + mask += num_pixels; \ + \ + repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); \ + } \ + \ + /* Normal scanline composite */ \ + if (pixman_fixed_to_int (vx) != src_width - 1 && width_remain > 0) \ + { \ + /* for positive unit_x \ + * num_pixels = max(n) + 1, where vx + n*unit_x < (src_width_fixed - 1) \ + * \ + * vx is in range [0, src_width_fixed - pixman_fixed_e] \ + * So we are safe from overflow here. \ + */ \ + num_pixels = ((src_width_fixed - pixman_fixed_1 - vx - pixman_fixed_e) \ + / unit_x) + 1; \ + \ + if (num_pixels > width_remain) \ + num_pixels = width_remain; \ + \ + scanline_func (dst, mask, src_line_top, src_line_bottom, num_pixels, \ + weight1, weight2, vx, unit_x, src_width_fixed, FALSE); \ + \ + width_remain -= num_pixels; \ + vx += num_pixels * unit_x; \ + dst += num_pixels; \ + \ + if (flags & FLAG_HAVE_NON_SOLID_MASK) \ + mask += num_pixels; \ + } \ + } \ + } \ + else \ + { \ + scanline_func (dst, mask, src_first_line + src_stride * y1, \ + src_first_line + src_stride * y2, width, \ + weight1, weight2, vx, unit_x, max_vx, FALSE); \ + } \ + } \ +} + +/* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */ +#define FAST_BILINEAR_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t, \ + dst_type_t, repeat_mode, flags) \ + FAST_BILINEAR_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t,\ + dst_type_t, repeat_mode, flags) + +#define SCALED_BILINEAR_FLAGS \ + (FAST_PATH_SCALE_TRANSFORM | \ + FAST_PATH_NO_ALPHA_MAP | \ + FAST_PATH_BILINEAR_FILTER | \ + FAST_PATH_NO_ACCESSORS | \ + FAST_PATH_NARROW_FORMAT) + +#define SIMPLE_BILINEAR_FAST_PATH_PAD(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_BILINEAR_FLAGS | \ + FAST_PATH_PAD_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \ + } + +#define SIMPLE_BILINEAR_FAST_PATH_NONE(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_BILINEAR_FLAGS | \ + FAST_PATH_NONE_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \ + } + +#define SIMPLE_BILINEAR_FAST_PATH_COVER(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR, \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \ + } + +#define SIMPLE_BILINEAR_FAST_PATH_NORMAL(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_BILINEAR_FLAGS | \ + FAST_PATH_NORMAL_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op, \ + } + +#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_BILINEAR_FLAGS | \ + FAST_PATH_PAD_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \ + } + +#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_BILINEAR_FLAGS | \ + FAST_PATH_NONE_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \ + } + +#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR, \ + PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \ + } + +#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NORMAL(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_BILINEAR_FLAGS | \ + FAST_PATH_NORMAL_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op, \ + } + +#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_BILINEAR_FLAGS | \ + FAST_PATH_PAD_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \ + } + +#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_BILINEAR_FLAGS | \ + FAST_PATH_NONE_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \ + } + +#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR, \ + PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \ + } + +#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_BILINEAR_FLAGS | \ + FAST_PATH_NORMAL_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op, \ + } + +/* Prefer the use of 'cover' variant, because it is faster */ +#define SIMPLE_BILINEAR_FAST_PATH(op,s,d,func) \ + SIMPLE_BILINEAR_FAST_PATH_COVER (op,s,d,func), \ + SIMPLE_BILINEAR_FAST_PATH_NONE (op,s,d,func), \ + SIMPLE_BILINEAR_FAST_PATH_PAD (op,s,d,func), \ + SIMPLE_BILINEAR_FAST_PATH_NORMAL (op,s,d,func) + +#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH(op,s,d,func) \ + SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER (op,s,d,func), \ + SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE (op,s,d,func), \ + SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD (op,s,d,func), \ + SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NORMAL (op,s,d,func) + +#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH(op,s,d,func) \ + SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER (op,s,d,func), \ + SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \ + SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD (op,s,d,func), \ + SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NORMAL (op,s,d,func) + +#endif diff --git a/src/pixman/pixman/pixman-linear-gradient.c b/src/pixman/pixman/pixman-linear-gradient.c new file mode 100644 index 0000000..40c8c9f --- /dev/null +++ b/src/pixman/pixman/pixman-linear-gradient.c @@ -0,0 +1,287 @@ +/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */ +/* + * Copyright © 2000 SuSE, Inc. + * Copyright © 2007 Red Hat, Inc. + * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc. + * 2005 Lars Knoll & Zack Rusin, Trolltech + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of Keith Packard not be used in + * advertising or publicity pertaining to distribution of the software without + * specific, written prior permission. Keith Packard makes no + * representations about the suitability of this software for any purpose. It + * is provided "as is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif +#include <stdlib.h> +#include "pixman-private.h" + +static pixman_bool_t +linear_gradient_is_horizontal (pixman_image_t *image, + int x, + int y, + int width, + int height) +{ + linear_gradient_t *linear = (linear_gradient_t *)image; + pixman_vector_t v; + pixman_fixed_32_32_t l; + pixman_fixed_48_16_t dx, dy; + double inc; + + if (image->common.transform) + { + /* projective transformation */ + if (image->common.transform->matrix[2][0] != 0 || + image->common.transform->matrix[2][1] != 0 || + image->common.transform->matrix[2][2] == 0) + { + return FALSE; + } + + v.vector[0] = image->common.transform->matrix[0][1]; + v.vector[1] = image->common.transform->matrix[1][1]; + v.vector[2] = image->common.transform->matrix[2][2]; + } + else + { + v.vector[0] = 0; + v.vector[1] = pixman_fixed_1; + v.vector[2] = pixman_fixed_1; + } + + dx = linear->p2.x - linear->p1.x; + dy = linear->p2.y - linear->p1.y; + + l = dx * dx + dy * dy; + + if (l == 0) + return FALSE; + + /* + * compute how much the input of the gradient walked changes + * when moving vertically through the whole image + */ + inc = height * (double) pixman_fixed_1 * pixman_fixed_1 * + (dx * v.vector[0] + dy * v.vector[1]) / + (v.vector[2] * (double) l); + + /* check that casting to integer would result in 0 */ + if (-1 < inc && inc < 1) + return TRUE; + + return FALSE; +} + +static uint32_t * +linear_get_scanline_narrow (pixman_iter_t *iter, + const uint32_t *mask) +{ + pixman_image_t *image = iter->image; + int x = iter->x; + int y = iter->y; + int width = iter->width; + uint32_t * buffer = iter->buffer; + + pixman_vector_t v, unit; + pixman_fixed_32_32_t l; + pixman_fixed_48_16_t dx, dy; + gradient_t *gradient = (gradient_t *)image; + linear_gradient_t *linear = (linear_gradient_t *)image; + uint32_t *end = buffer + width; + pixman_gradient_walker_t walker; + + _pixman_gradient_walker_init (&walker, gradient, image->common.repeat); + + /* reference point is the center of the pixel */ + v.vector[0] = pixman_int_to_fixed (x) + pixman_fixed_1 / 2; + v.vector[1] = pixman_int_to_fixed (y) + pixman_fixed_1 / 2; + v.vector[2] = pixman_fixed_1; + + if (image->common.transform) + { + if (!pixman_transform_point_3d (image->common.transform, &v)) + return iter->buffer; + + unit.vector[0] = image->common.transform->matrix[0][0]; + unit.vector[1] = image->common.transform->matrix[1][0]; + unit.vector[2] = image->common.transform->matrix[2][0]; + } + else + { + unit.vector[0] = pixman_fixed_1; + unit.vector[1] = 0; + unit.vector[2] = 0; + } + + dx = linear->p2.x - linear->p1.x; + dy = linear->p2.y - linear->p1.y; + + l = dx * dx + dy * dy; + + if (l == 0 || unit.vector[2] == 0) + { + /* affine transformation only */ + pixman_fixed_32_32_t t, next_inc; + double inc; + + if (l == 0 || v.vector[2] == 0) + { + t = 0; + inc = 0; + } + else + { + double invden, v2; + + invden = pixman_fixed_1 * (double) pixman_fixed_1 / + (l * (double) v.vector[2]); + v2 = v.vector[2] * (1. / pixman_fixed_1); + t = ((dx * v.vector[0] + dy * v.vector[1]) - + (dx * linear->p1.x + dy * linear->p1.y) * v2) * invden; + inc = (dx * unit.vector[0] + dy * unit.vector[1]) * invden; + } + next_inc = 0; + + if (((pixman_fixed_32_32_t )(inc * width)) == 0) + { + register uint32_t color; + + color = _pixman_gradient_walker_pixel (&walker, t); + while (buffer < end) + *buffer++ = color; + } + else + { + int i; + + i = 0; + while (buffer < end) + { + if (!mask || *mask++) + { + *buffer = _pixman_gradient_walker_pixel (&walker, + t + next_inc); + } + i++; + next_inc = inc * i; + buffer++; + } + } + } + else + { + /* projective transformation */ + double t; + + t = 0; + + while (buffer < end) + { + if (!mask || *mask++) + { + if (v.vector[2] != 0) + { + double invden, v2; + + invden = pixman_fixed_1 * (double) pixman_fixed_1 / + (l * (double) v.vector[2]); + v2 = v.vector[2] * (1. / pixman_fixed_1); + t = ((dx * v.vector[0] + dy * v.vector[1]) - + (dx * linear->p1.x + dy * linear->p1.y) * v2) * invden; + } + + *buffer = _pixman_gradient_walker_pixel (&walker, t); + } + + ++buffer; + + v.vector[0] += unit.vector[0]; + v.vector[1] += unit.vector[1]; + v.vector[2] += unit.vector[2]; + } + } + + iter->y++; + + return iter->buffer; +} + +static uint32_t * +linear_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask) +{ + uint32_t *buffer = linear_get_scanline_narrow (iter, NULL); + + pixman_expand_to_float ( + (argb_t *)buffer, buffer, PIXMAN_a8r8g8b8, iter->width); + + return buffer; +} + +void +_pixman_linear_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter) +{ + if (linear_gradient_is_horizontal ( + iter->image, iter->x, iter->y, iter->width, iter->height)) + { + if (iter->iter_flags & ITER_NARROW) + linear_get_scanline_narrow (iter, NULL); + else + linear_get_scanline_wide (iter, NULL); + + iter->get_scanline = _pixman_iter_get_scanline_noop; + } + else + { + if (iter->iter_flags & ITER_NARROW) + iter->get_scanline = linear_get_scanline_narrow; + else + iter->get_scanline = linear_get_scanline_wide; + } +} + +PIXMAN_EXPORT pixman_image_t * +pixman_image_create_linear_gradient (const pixman_point_fixed_t * p1, + const pixman_point_fixed_t * p2, + const pixman_gradient_stop_t *stops, + int n_stops) +{ + pixman_image_t *image; + linear_gradient_t *linear; + + image = _pixman_image_allocate (); + + if (!image) + return NULL; + + linear = &image->linear; + + if (!_pixman_init_gradient (&linear->common, stops, n_stops)) + { + free (image); + return NULL; + } + + linear->p1 = *p1; + linear->p2 = *p2; + + image->type = LINEAR; + + return image; +} + diff --git a/src/pixman/pixman/pixman-matrix.c b/src/pixman/pixman/pixman-matrix.c new file mode 100644 index 0000000..4032c13 --- /dev/null +++ b/src/pixman/pixman/pixman-matrix.c @@ -0,0 +1,1073 @@ +/* + * Copyright © 2008 Keith Packard + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that copyright + * notice and this permission notice appear in supporting documentation, and + * that the name of the copyright holders not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. The copyright holders make no representations + * about the suitability of this software for any purpose. It is provided "as + * is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE + * OF THIS SOFTWARE. + */ + +/* + * Matrix interfaces + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <math.h> +#include <string.h> +#include "pixman-private.h" + +#define F(x) pixman_int_to_fixed (x) + +static force_inline int +count_leading_zeros (uint32_t x) +{ +#ifdef HAVE_BUILTIN_CLZ + return __builtin_clz (x); +#else + int n = 0; + while (x) + { + n++; + x >>= 1; + } + return 32 - n; +#endif +} + +/* + * Large signed/unsigned integer division with rounding for the platforms with + * only 64-bit integer data type supported (no 128-bit data type). + * + * Arguments: + * hi, lo - high and low 64-bit parts of the dividend + * div - 48-bit divisor + * + * Returns: lowest 64 bits of the result as a return value and highest 64 + * bits of the result to "result_hi" pointer + */ + +/* grade-school unsigned division (128-bit by 48-bit) with rounding to nearest */ +static force_inline uint64_t +rounded_udiv_128_by_48 (uint64_t hi, + uint64_t lo, + uint64_t div, + uint64_t *result_hi) +{ + uint64_t tmp, remainder, result_lo; + assert(div < ((uint64_t)1 << 48)); + + remainder = hi % div; + *result_hi = hi / div; + + tmp = (remainder << 16) + (lo >> 48); + result_lo = tmp / div; + remainder = tmp % div; + + tmp = (remainder << 16) + ((lo >> 32) & 0xFFFF); + result_lo = (result_lo << 16) + (tmp / div); + remainder = tmp % div; + + tmp = (remainder << 16) + ((lo >> 16) & 0xFFFF); + result_lo = (result_lo << 16) + (tmp / div); + remainder = tmp % div; + + tmp = (remainder << 16) + (lo & 0xFFFF); + result_lo = (result_lo << 16) + (tmp / div); + remainder = tmp % div; + + /* round to nearest */ + if (remainder * 2 >= div && ++result_lo == 0) + *result_hi += 1; + + return result_lo; +} + +/* signed division (128-bit by 49-bit) with rounding to nearest */ +static inline int64_t +rounded_sdiv_128_by_49 (int64_t hi, + uint64_t lo, + int64_t div, + int64_t *signed_result_hi) +{ + uint64_t result_lo, result_hi; + int sign = 0; + if (div < 0) + { + div = -div; + sign ^= 1; + } + if (hi < 0) + { + if (lo != 0) + hi++; + hi = -hi; + lo = -lo; + sign ^= 1; + } + result_lo = rounded_udiv_128_by_48 (hi, lo, div, &result_hi); + if (sign) + { + if (result_lo != 0) + result_hi++; + result_hi = -result_hi; + result_lo = -result_lo; + } + if (signed_result_hi) + { + *signed_result_hi = result_hi; + } + return result_lo; +} + +/* + * Multiply 64.16 fixed point value by (2^scalebits) and convert + * to 128-bit integer. + */ +static force_inline void +fixed_64_16_to_int128 (int64_t hi, + int64_t lo, + int64_t *rhi, + int64_t *rlo, + int scalebits) +{ + /* separate integer and fractional parts */ + hi += lo >> 16; + lo &= 0xFFFF; + + if (scalebits <= 0) + { + *rlo = hi >> (-scalebits); + *rhi = *rlo >> 63; + } + else + { + *rhi = hi >> (64 - scalebits); + *rlo = (uint64_t)hi << scalebits; + if (scalebits < 16) + *rlo += lo >> (16 - scalebits); + else + *rlo += lo << (scalebits - 16); + } +} + +/* + * Convert 112.16 fixed point value to 48.16 with clamping for the out + * of range values. + */ +static force_inline pixman_fixed_48_16_t +fixed_112_16_to_fixed_48_16 (int64_t hi, int64_t lo, pixman_bool_t *clampflag) +{ + if ((lo >> 63) != hi) + { + *clampflag = TRUE; + return hi >= 0 ? INT64_MAX : INT64_MIN; + } + else + { + return lo; + } +} + +/* + * Transform a point with 31.16 fixed point coordinates from the destination + * space to a point with 48.16 fixed point coordinates in the source space. + * No overflows are possible for affine transformations and the results are + * accurate including the least significant bit. Projective transformations + * may overflow, in this case the results are just clamped to return maximum + * or minimum 48.16 values (so that the caller can at least handle the NONE + * and PAD repeats correctly) and the return value is FALSE to indicate that + * such clamping has happened. + */ +PIXMAN_EXPORT pixman_bool_t +pixman_transform_point_31_16 (const pixman_transform_t *t, + const pixman_vector_48_16_t *v, + pixman_vector_48_16_t *result) +{ + pixman_bool_t clampflag = FALSE; + int i; + int64_t tmp[3][2], divint; + uint16_t divfrac; + + /* input vector values must have no more than 31 bits (including sign) + * in the integer part */ + assert (v->v[0] < ((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[0] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[1] < ((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[1] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[2] < ((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[2] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); + + for (i = 0; i < 3; i++) + { + tmp[i][0] = (int64_t)t->matrix[i][0] * (v->v[0] >> 16); + tmp[i][1] = (int64_t)t->matrix[i][0] * (v->v[0] & 0xFFFF); + tmp[i][0] += (int64_t)t->matrix[i][1] * (v->v[1] >> 16); + tmp[i][1] += (int64_t)t->matrix[i][1] * (v->v[1] & 0xFFFF); + tmp[i][0] += (int64_t)t->matrix[i][2] * (v->v[2] >> 16); + tmp[i][1] += (int64_t)t->matrix[i][2] * (v->v[2] & 0xFFFF); + } + + /* + * separate 64-bit integer and 16-bit fractional parts for the divisor, + * which is also scaled by 65536 after fixed point multiplication. + */ + divint = tmp[2][0] + (tmp[2][1] >> 16); + divfrac = tmp[2][1] & 0xFFFF; + + if (divint == pixman_fixed_1 && divfrac == 0) + { + /* + * this is a simple affine transformation + */ + result->v[0] = tmp[0][0] + ((tmp[0][1] + 0x8000) >> 16); + result->v[1] = tmp[1][0] + ((tmp[1][1] + 0x8000) >> 16); + result->v[2] = pixman_fixed_1; + } + else if (divint == 0 && divfrac == 0) + { + /* + * handle zero divisor (if the values are non-zero, set the + * results to maximum positive or minimum negative) + */ + clampflag = TRUE; + + result->v[0] = tmp[0][0] + ((tmp[0][1] + 0x8000) >> 16); + result->v[1] = tmp[1][0] + ((tmp[1][1] + 0x8000) >> 16); + + if (result->v[0] > 0) + result->v[0] = INT64_MAX; + else if (result->v[0] < 0) + result->v[0] = INT64_MIN; + + if (result->v[1] > 0) + result->v[1] = INT64_MAX; + else if (result->v[1] < 0) + result->v[1] = INT64_MIN; + } + else + { + /* + * projective transformation, analyze the top 32 bits of the divisor + */ + int32_t hi32divbits = divint >> 32; + if (hi32divbits < 0) + hi32divbits = ~hi32divbits; + + if (hi32divbits == 0) + { + /* the divisor is small, we can actually keep all the bits */ + int64_t hi, rhi, lo, rlo; + int64_t div = (divint << 16) + divfrac; + + fixed_64_16_to_int128 (tmp[0][0], tmp[0][1], &hi, &lo, 32); + rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi); + result->v[0] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag); + + fixed_64_16_to_int128 (tmp[1][0], tmp[1][1], &hi, &lo, 32); + rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi); + result->v[1] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag); + } + else + { + /* the divisor needs to be reduced to 48 bits */ + int64_t hi, rhi, lo, rlo, div; + int shift = 32 - count_leading_zeros (hi32divbits); + fixed_64_16_to_int128 (divint, divfrac, &hi, &div, 16 - shift); + + fixed_64_16_to_int128 (tmp[0][0], tmp[0][1], &hi, &lo, 32 - shift); + rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi); + result->v[0] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag); + + fixed_64_16_to_int128 (tmp[1][0], tmp[1][1], &hi, &lo, 32 - shift); + rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi); + result->v[1] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag); + } + } + result->v[2] = pixman_fixed_1; + return !clampflag; +} + +PIXMAN_EXPORT void +pixman_transform_point_31_16_affine (const pixman_transform_t *t, + const pixman_vector_48_16_t *v, + pixman_vector_48_16_t *result) +{ + int64_t hi0, lo0, hi1, lo1; + + /* input vector values must have no more than 31 bits (including sign) + * in the integer part */ + assert (v->v[0] < ((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[0] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[1] < ((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[1] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); + + hi0 = (int64_t)t->matrix[0][0] * (v->v[0] >> 16); + lo0 = (int64_t)t->matrix[0][0] * (v->v[0] & 0xFFFF); + hi0 += (int64_t)t->matrix[0][1] * (v->v[1] >> 16); + lo0 += (int64_t)t->matrix[0][1] * (v->v[1] & 0xFFFF); + hi0 += (int64_t)t->matrix[0][2]; + + hi1 = (int64_t)t->matrix[1][0] * (v->v[0] >> 16); + lo1 = (int64_t)t->matrix[1][0] * (v->v[0] & 0xFFFF); + hi1 += (int64_t)t->matrix[1][1] * (v->v[1] >> 16); + lo1 += (int64_t)t->matrix[1][1] * (v->v[1] & 0xFFFF); + hi1 += (int64_t)t->matrix[1][2]; + + result->v[0] = hi0 + ((lo0 + 0x8000) >> 16); + result->v[1] = hi1 + ((lo1 + 0x8000) >> 16); + result->v[2] = pixman_fixed_1; +} + +PIXMAN_EXPORT void +pixman_transform_point_31_16_3d (const pixman_transform_t *t, + const pixman_vector_48_16_t *v, + pixman_vector_48_16_t *result) +{ + int i; + int64_t tmp[3][2]; + + /* input vector values must have no more than 31 bits (including sign) + * in the integer part */ + assert (v->v[0] < ((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[0] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[1] < ((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[1] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[2] < ((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[2] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); + + for (i = 0; i < 3; i++) + { + tmp[i][0] = (int64_t)t->matrix[i][0] * (v->v[0] >> 16); + tmp[i][1] = (int64_t)t->matrix[i][0] * (v->v[0] & 0xFFFF); + tmp[i][0] += (int64_t)t->matrix[i][1] * (v->v[1] >> 16); + tmp[i][1] += (int64_t)t->matrix[i][1] * (v->v[1] & 0xFFFF); + tmp[i][0] += (int64_t)t->matrix[i][2] * (v->v[2] >> 16); + tmp[i][1] += (int64_t)t->matrix[i][2] * (v->v[2] & 0xFFFF); + } + + result->v[0] = tmp[0][0] + ((tmp[0][1] + 0x8000) >> 16); + result->v[1] = tmp[1][0] + ((tmp[1][1] + 0x8000) >> 16); + result->v[2] = tmp[2][0] + ((tmp[2][1] + 0x8000) >> 16); +} + +PIXMAN_EXPORT void +pixman_transform_init_identity (struct pixman_transform *matrix) +{ + int i; + + memset (matrix, '\0', sizeof (struct pixman_transform)); + for (i = 0; i < 3; i++) + matrix->matrix[i][i] = F (1); +} + +typedef pixman_fixed_32_32_t pixman_fixed_34_30_t; + +PIXMAN_EXPORT pixman_bool_t +pixman_transform_point_3d (const struct pixman_transform *transform, + struct pixman_vector * vector) +{ + pixman_vector_48_16_t tmp; + tmp.v[0] = vector->vector[0]; + tmp.v[1] = vector->vector[1]; + tmp.v[2] = vector->vector[2]; + + pixman_transform_point_31_16_3d (transform, &tmp, &tmp); + + vector->vector[0] = tmp.v[0]; + vector->vector[1] = tmp.v[1]; + vector->vector[2] = tmp.v[2]; + + return vector->vector[0] == tmp.v[0] && + vector->vector[1] == tmp.v[1] && + vector->vector[2] == tmp.v[2]; +} + +PIXMAN_EXPORT pixman_bool_t +pixman_transform_point (const struct pixman_transform *transform, + struct pixman_vector * vector) +{ + pixman_vector_48_16_t tmp; + tmp.v[0] = vector->vector[0]; + tmp.v[1] = vector->vector[1]; + tmp.v[2] = vector->vector[2]; + + if (!pixman_transform_point_31_16 (transform, &tmp, &tmp)) + return FALSE; + + vector->vector[0] = tmp.v[0]; + vector->vector[1] = tmp.v[1]; + vector->vector[2] = tmp.v[2]; + + return vector->vector[0] == tmp.v[0] && + vector->vector[1] == tmp.v[1] && + vector->vector[2] == tmp.v[2]; +} + +PIXMAN_EXPORT pixman_bool_t +pixman_transform_multiply (struct pixman_transform * dst, + const struct pixman_transform *l, + const struct pixman_transform *r) +{ + struct pixman_transform d; + int dx, dy; + int o; + + for (dy = 0; dy < 3; dy++) + { + for (dx = 0; dx < 3; dx++) + { + pixman_fixed_48_16_t v; + pixman_fixed_32_32_t partial; + + v = 0; + for (o = 0; o < 3; o++) + { + partial = + (pixman_fixed_32_32_t) l->matrix[dy][o] * + (pixman_fixed_32_32_t) r->matrix[o][dx]; + + v += (partial + 0x8000) >> 16; + } + + if (v > pixman_max_fixed_48_16 || v < pixman_min_fixed_48_16) + return FALSE; + + d.matrix[dy][dx] = (pixman_fixed_t) v; + } + } + + *dst = d; + return TRUE; +} + +PIXMAN_EXPORT void +pixman_transform_init_scale (struct pixman_transform *t, + pixman_fixed_t sx, + pixman_fixed_t sy) +{ + memset (t, '\0', sizeof (struct pixman_transform)); + + t->matrix[0][0] = sx; + t->matrix[1][1] = sy; + t->matrix[2][2] = F (1); +} + +static pixman_fixed_t +fixed_inverse (pixman_fixed_t x) +{ + return (pixman_fixed_t) ((((pixman_fixed_48_16_t) F (1)) * F (1)) / x); +} + +PIXMAN_EXPORT pixman_bool_t +pixman_transform_scale (struct pixman_transform *forward, + struct pixman_transform *reverse, + pixman_fixed_t sx, + pixman_fixed_t sy) +{ + struct pixman_transform t; + + if (sx == 0 || sy == 0) + return FALSE; + + if (forward) + { + pixman_transform_init_scale (&t, sx, sy); + if (!pixman_transform_multiply (forward, &t, forward)) + return FALSE; + } + + if (reverse) + { + pixman_transform_init_scale (&t, fixed_inverse (sx), + fixed_inverse (sy)); + if (!pixman_transform_multiply (reverse, reverse, &t)) + return FALSE; + } + + return TRUE; +} + +PIXMAN_EXPORT void +pixman_transform_init_rotate (struct pixman_transform *t, + pixman_fixed_t c, + pixman_fixed_t s) +{ + memset (t, '\0', sizeof (struct pixman_transform)); + + t->matrix[0][0] = c; + t->matrix[0][1] = -s; + t->matrix[1][0] = s; + t->matrix[1][1] = c; + t->matrix[2][2] = F (1); +} + +PIXMAN_EXPORT pixman_bool_t +pixman_transform_rotate (struct pixman_transform *forward, + struct pixman_transform *reverse, + pixman_fixed_t c, + pixman_fixed_t s) +{ + struct pixman_transform t; + + if (forward) + { + pixman_transform_init_rotate (&t, c, s); + if (!pixman_transform_multiply (forward, &t, forward)) + return FALSE; + } + + if (reverse) + { + pixman_transform_init_rotate (&t, c, -s); + if (!pixman_transform_multiply (reverse, reverse, &t)) + return FALSE; + } + + return TRUE; +} + +PIXMAN_EXPORT void +pixman_transform_init_translate (struct pixman_transform *t, + pixman_fixed_t tx, + pixman_fixed_t ty) +{ + memset (t, '\0', sizeof (struct pixman_transform)); + + t->matrix[0][0] = F (1); + t->matrix[0][2] = tx; + t->matrix[1][1] = F (1); + t->matrix[1][2] = ty; + t->matrix[2][2] = F (1); +} + +PIXMAN_EXPORT pixman_bool_t +pixman_transform_translate (struct pixman_transform *forward, + struct pixman_transform *reverse, + pixman_fixed_t tx, + pixman_fixed_t ty) +{ + struct pixman_transform t; + + if (forward) + { + pixman_transform_init_translate (&t, tx, ty); + + if (!pixman_transform_multiply (forward, &t, forward)) + return FALSE; + } + + if (reverse) + { + pixman_transform_init_translate (&t, -tx, -ty); + + if (!pixman_transform_multiply (reverse, reverse, &t)) + return FALSE; + } + return TRUE; +} + +PIXMAN_EXPORT pixman_bool_t +pixman_transform_bounds (const struct pixman_transform *matrix, + struct pixman_box16 * b) + +{ + struct pixman_vector v[4]; + int i; + int x1, y1, x2, y2; + + v[0].vector[0] = F (b->x1); + v[0].vector[1] = F (b->y1); + v[0].vector[2] = F (1); + + v[1].vector[0] = F (b->x2); + v[1].vector[1] = F (b->y1); + v[1].vector[2] = F (1); + + v[2].vector[0] = F (b->x2); + v[2].vector[1] = F (b->y2); + v[2].vector[2] = F (1); + + v[3].vector[0] = F (b->x1); + v[3].vector[1] = F (b->y2); + v[3].vector[2] = F (1); + + for (i = 0; i < 4; i++) + { + if (!pixman_transform_point (matrix, &v[i])) + return FALSE; + + x1 = pixman_fixed_to_int (v[i].vector[0]); + y1 = pixman_fixed_to_int (v[i].vector[1]); + x2 = pixman_fixed_to_int (pixman_fixed_ceil (v[i].vector[0])); + y2 = pixman_fixed_to_int (pixman_fixed_ceil (v[i].vector[1])); + + if (i == 0) + { + b->x1 = x1; + b->y1 = y1; + b->x2 = x2; + b->y2 = y2; + } + else + { + if (x1 < b->x1) b->x1 = x1; + if (y1 < b->y1) b->y1 = y1; + if (x2 > b->x2) b->x2 = x2; + if (y2 > b->y2) b->y2 = y2; + } + } + + return TRUE; +} + +PIXMAN_EXPORT pixman_bool_t +pixman_transform_invert (struct pixman_transform * dst, + const struct pixman_transform *src) +{ + struct pixman_f_transform m; + + pixman_f_transform_from_pixman_transform (&m, src); + + if (!pixman_f_transform_invert (&m, &m)) + return FALSE; + + if (!pixman_transform_from_pixman_f_transform (dst, &m)) + return FALSE; + + return TRUE; +} + +static pixman_bool_t +within_epsilon (pixman_fixed_t a, + pixman_fixed_t b, + pixman_fixed_t epsilon) +{ + pixman_fixed_t t = a - b; + + if (t < 0) + t = -t; + + return t <= epsilon; +} + +#define EPSILON (pixman_fixed_t) (2) + +#define IS_SAME(a, b) (within_epsilon (a, b, EPSILON)) +#define IS_ZERO(a) (within_epsilon (a, 0, EPSILON)) +#define IS_ONE(a) (within_epsilon (a, F (1), EPSILON)) +#define IS_UNIT(a) \ + (within_epsilon (a, F (1), EPSILON) || \ + within_epsilon (a, F (-1), EPSILON) || \ + IS_ZERO (a)) +#define IS_INT(a) (IS_ZERO (pixman_fixed_frac (a))) + +PIXMAN_EXPORT pixman_bool_t +pixman_transform_is_identity (const struct pixman_transform *t) +{ + return (IS_SAME (t->matrix[0][0], t->matrix[1][1]) && + IS_SAME (t->matrix[0][0], t->matrix[2][2]) && + !IS_ZERO (t->matrix[0][0]) && + IS_ZERO (t->matrix[0][1]) && + IS_ZERO (t->matrix[0][2]) && + IS_ZERO (t->matrix[1][0]) && + IS_ZERO (t->matrix[1][2]) && + IS_ZERO (t->matrix[2][0]) && + IS_ZERO (t->matrix[2][1])); +} + +PIXMAN_EXPORT pixman_bool_t +pixman_transform_is_scale (const struct pixman_transform *t) +{ + return (!IS_ZERO (t->matrix[0][0]) && + IS_ZERO (t->matrix[0][1]) && + IS_ZERO (t->matrix[0][2]) && + + IS_ZERO (t->matrix[1][0]) && + !IS_ZERO (t->matrix[1][1]) && + IS_ZERO (t->matrix[1][2]) && + + IS_ZERO (t->matrix[2][0]) && + IS_ZERO (t->matrix[2][1]) && + !IS_ZERO (t->matrix[2][2])); +} + +PIXMAN_EXPORT pixman_bool_t +pixman_transform_is_int_translate (const struct pixman_transform *t) +{ + return (IS_ONE (t->matrix[0][0]) && + IS_ZERO (t->matrix[0][1]) && + IS_INT (t->matrix[0][2]) && + + IS_ZERO (t->matrix[1][0]) && + IS_ONE (t->matrix[1][1]) && + IS_INT (t->matrix[1][2]) && + + IS_ZERO (t->matrix[2][0]) && + IS_ZERO (t->matrix[2][1]) && + IS_ONE (t->matrix[2][2])); +} + +PIXMAN_EXPORT pixman_bool_t +pixman_transform_is_inverse (const struct pixman_transform *a, + const struct pixman_transform *b) +{ + struct pixman_transform t; + + if (!pixman_transform_multiply (&t, a, b)) + return FALSE; + + return pixman_transform_is_identity (&t); +} + +PIXMAN_EXPORT void +pixman_f_transform_from_pixman_transform (struct pixman_f_transform * ft, + const struct pixman_transform *t) +{ + int i, j; + + for (j = 0; j < 3; j++) + { + for (i = 0; i < 3; i++) + ft->m[j][i] = pixman_fixed_to_double (t->matrix[j][i]); + } +} + +PIXMAN_EXPORT pixman_bool_t +pixman_transform_from_pixman_f_transform (struct pixman_transform * t, + const struct pixman_f_transform *ft) +{ + int i, j; + + for (j = 0; j < 3; j++) + { + for (i = 0; i < 3; i++) + { + double d = ft->m[j][i]; + if (d < -32767.0 || d > 32767.0) + return FALSE; + d = d * 65536.0 + 0.5; + t->matrix[j][i] = (pixman_fixed_t) floor (d); + } + } + + return TRUE; +} + +PIXMAN_EXPORT pixman_bool_t +pixman_f_transform_invert (struct pixman_f_transform * dst, + const struct pixman_f_transform *src) +{ + static const int a[3] = { 2, 2, 1 }; + static const int b[3] = { 1, 0, 0 }; + pixman_f_transform_t d; + double det; + int i, j; + + det = 0; + for (i = 0; i < 3; i++) + { + double p; + int ai = a[i]; + int bi = b[i]; + p = src->m[i][0] * (src->m[ai][2] * src->m[bi][1] - + src->m[ai][1] * src->m[bi][2]); + if (i == 1) + p = -p; + det += p; + } + + if (det == 0) + return FALSE; + + det = 1 / det; + for (j = 0; j < 3; j++) + { + for (i = 0; i < 3; i++) + { + double p; + int ai = a[i]; + int aj = a[j]; + int bi = b[i]; + int bj = b[j]; + + p = (src->m[ai][aj] * src->m[bi][bj] - + src->m[ai][bj] * src->m[bi][aj]); + + if (((i + j) & 1) != 0) + p = -p; + + d.m[j][i] = det * p; + } + } + + *dst = d; + + return TRUE; +} + +PIXMAN_EXPORT pixman_bool_t +pixman_f_transform_point (const struct pixman_f_transform *t, + struct pixman_f_vector * v) +{ + struct pixman_f_vector result; + int i, j; + double a; + + for (j = 0; j < 3; j++) + { + a = 0; + for (i = 0; i < 3; i++) + a += t->m[j][i] * v->v[i]; + result.v[j] = a; + } + + if (!result.v[2]) + return FALSE; + + for (j = 0; j < 2; j++) + v->v[j] = result.v[j] / result.v[2]; + + v->v[2] = 1; + + return TRUE; +} + +PIXMAN_EXPORT void +pixman_f_transform_point_3d (const struct pixman_f_transform *t, + struct pixman_f_vector * v) +{ + struct pixman_f_vector result; + int i, j; + double a; + + for (j = 0; j < 3; j++) + { + a = 0; + for (i = 0; i < 3; i++) + a += t->m[j][i] * v->v[i]; + result.v[j] = a; + } + + *v = result; +} + +PIXMAN_EXPORT void +pixman_f_transform_multiply (struct pixman_f_transform * dst, + const struct pixman_f_transform *l, + const struct pixman_f_transform *r) +{ + struct pixman_f_transform d; + int dx, dy; + int o; + + for (dy = 0; dy < 3; dy++) + { + for (dx = 0; dx < 3; dx++) + { + double v = 0; + for (o = 0; o < 3; o++) + v += l->m[dy][o] * r->m[o][dx]; + d.m[dy][dx] = v; + } + } + + *dst = d; +} + +PIXMAN_EXPORT void +pixman_f_transform_init_scale (struct pixman_f_transform *t, + double sx, + double sy) +{ + t->m[0][0] = sx; + t->m[0][1] = 0; + t->m[0][2] = 0; + t->m[1][0] = 0; + t->m[1][1] = sy; + t->m[1][2] = 0; + t->m[2][0] = 0; + t->m[2][1] = 0; + t->m[2][2] = 1; +} + +PIXMAN_EXPORT pixman_bool_t +pixman_f_transform_scale (struct pixman_f_transform *forward, + struct pixman_f_transform *reverse, + double sx, + double sy) +{ + struct pixman_f_transform t; + + if (sx == 0 || sy == 0) + return FALSE; + + if (forward) + { + pixman_f_transform_init_scale (&t, sx, sy); + pixman_f_transform_multiply (forward, &t, forward); + } + + if (reverse) + { + pixman_f_transform_init_scale (&t, 1 / sx, 1 / sy); + pixman_f_transform_multiply (reverse, reverse, &t); + } + + return TRUE; +} + +PIXMAN_EXPORT void +pixman_f_transform_init_rotate (struct pixman_f_transform *t, + double c, + double s) +{ + t->m[0][0] = c; + t->m[0][1] = -s; + t->m[0][2] = 0; + t->m[1][0] = s; + t->m[1][1] = c; + t->m[1][2] = 0; + t->m[2][0] = 0; + t->m[2][1] = 0; + t->m[2][2] = 1; +} + +PIXMAN_EXPORT pixman_bool_t +pixman_f_transform_rotate (struct pixman_f_transform *forward, + struct pixman_f_transform *reverse, + double c, + double s) +{ + struct pixman_f_transform t; + + if (forward) + { + pixman_f_transform_init_rotate (&t, c, s); + pixman_f_transform_multiply (forward, &t, forward); + } + + if (reverse) + { + pixman_f_transform_init_rotate (&t, c, -s); + pixman_f_transform_multiply (reverse, reverse, &t); + } + + return TRUE; +} + +PIXMAN_EXPORT void +pixman_f_transform_init_translate (struct pixman_f_transform *t, + double tx, + double ty) +{ + t->m[0][0] = 1; + t->m[0][1] = 0; + t->m[0][2] = tx; + t->m[1][0] = 0; + t->m[1][1] = 1; + t->m[1][2] = ty; + t->m[2][0] = 0; + t->m[2][1] = 0; + t->m[2][2] = 1; +} + +PIXMAN_EXPORT pixman_bool_t +pixman_f_transform_translate (struct pixman_f_transform *forward, + struct pixman_f_transform *reverse, + double tx, + double ty) +{ + struct pixman_f_transform t; + + if (forward) + { + pixman_f_transform_init_translate (&t, tx, ty); + pixman_f_transform_multiply (forward, &t, forward); + } + + if (reverse) + { + pixman_f_transform_init_translate (&t, -tx, -ty); + pixman_f_transform_multiply (reverse, reverse, &t); + } + + return TRUE; +} + +PIXMAN_EXPORT pixman_bool_t +pixman_f_transform_bounds (const struct pixman_f_transform *t, + struct pixman_box16 * b) +{ + struct pixman_f_vector v[4]; + int i; + int x1, y1, x2, y2; + + v[0].v[0] = b->x1; + v[0].v[1] = b->y1; + v[0].v[2] = 1; + v[1].v[0] = b->x2; + v[1].v[1] = b->y1; + v[1].v[2] = 1; + v[2].v[0] = b->x2; + v[2].v[1] = b->y2; + v[2].v[2] = 1; + v[3].v[0] = b->x1; + v[3].v[1] = b->y2; + v[3].v[2] = 1; + + for (i = 0; i < 4; i++) + { + if (!pixman_f_transform_point (t, &v[i])) + return FALSE; + + x1 = floor (v[i].v[0]); + y1 = floor (v[i].v[1]); + x2 = ceil (v[i].v[0]); + y2 = ceil (v[i].v[1]); + + if (i == 0) + { + b->x1 = x1; + b->y1 = y1; + b->x2 = x2; + b->y2 = y2; + } + else + { + if (x1 < b->x1) b->x1 = x1; + if (y1 < b->y1) b->y1 = y1; + if (x2 > b->x2) b->x2 = x2; + if (y2 > b->y2) b->y2 = y2; + } + } + + return TRUE; +} + +PIXMAN_EXPORT void +pixman_f_transform_init_identity (struct pixman_f_transform *t) +{ + int i, j; + + for (j = 0; j < 3; j++) + { + for (i = 0; i < 3; i++) + t->m[j][i] = i == j ? 1 : 0; + } +} diff --git a/src/pixman/pixman/pixman-mips-dspr2-asm.S b/src/pixman/pixman/pixman-mips-dspr2-asm.S new file mode 100644 index 0000000..866e93e --- /dev/null +++ b/src/pixman/pixman/pixman-mips-dspr2-asm.S @@ -0,0 +1,4283 @@ +/* + * Copyright (c) 2012 + * MIPS Technologies, Inc., California. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Author: Nemanja Lukic (nlukic@mips.com) + */ + +#include "pixman-private.h" +#include "pixman-mips-dspr2-asm.h" + +LEAF_MIPS_DSPR2(pixman_fill_buff16_mips) +/* + * a0 - *dest + * a1 - count (bytes) + * a2 - value to fill buffer with + */ + + beqz a1, 3f + andi t1, a0, 0x0002 + beqz t1, 0f /* check if address is 4-byte aligned */ + nop + sh a2, 0(a0) + addiu a0, a0, 2 + addiu a1, a1, -2 +0: + srl t1, a1, 5 /* t1 how many multiples of 32 bytes */ + replv.ph a2, a2 /* replicate fill value (16bit) in a2 */ + beqz t1, 2f + nop +1: + addiu t1, t1, -1 + beqz t1, 11f + addiu a1, a1, -32 + pref 30, 32(a0) + sw a2, 0(a0) + sw a2, 4(a0) + sw a2, 8(a0) + sw a2, 12(a0) + sw a2, 16(a0) + sw a2, 20(a0) + sw a2, 24(a0) + sw a2, 28(a0) + b 1b + addiu a0, a0, 32 +11: + sw a2, 0(a0) + sw a2, 4(a0) + sw a2, 8(a0) + sw a2, 12(a0) + sw a2, 16(a0) + sw a2, 20(a0) + sw a2, 24(a0) + sw a2, 28(a0) + addiu a0, a0, 32 +2: + blez a1, 3f + addiu a1, a1, -2 + sh a2, 0(a0) + b 2b + addiu a0, a0, 2 +3: + jr ra + nop + +END(pixman_fill_buff16_mips) + +LEAF_MIPS32R2(pixman_fill_buff32_mips) +/* + * a0 - *dest + * a1 - count (bytes) + * a2 - value to fill buffer with + */ + + beqz a1, 3f + nop + srl t1, a1, 5 /* t1 how many multiples of 32 bytes */ + beqz t1, 2f + nop +1: + addiu t1, t1, -1 + beqz t1, 11f + addiu a1, a1, -32 + pref 30, 32(a0) + sw a2, 0(a0) + sw a2, 4(a0) + sw a2, 8(a0) + sw a2, 12(a0) + sw a2, 16(a0) + sw a2, 20(a0) + sw a2, 24(a0) + sw a2, 28(a0) + b 1b + addiu a0, a0, 32 +11: + sw a2, 0(a0) + sw a2, 4(a0) + sw a2, 8(a0) + sw a2, 12(a0) + sw a2, 16(a0) + sw a2, 20(a0) + sw a2, 24(a0) + sw a2, 28(a0) + addiu a0, a0, 32 +2: + blez a1, 3f + addiu a1, a1, -4 + sw a2, 0(a0) + b 2b + addiu a0, a0, 4 +3: + jr ra + nop + +END(pixman_fill_buff32_mips) + +LEAF_MIPS_DSPR2(pixman_composite_src_8888_0565_asm_mips) +/* + * a0 - dst (r5g6b5) + * a1 - src (a8r8g8b8) + * a2 - w + */ + + beqz a2, 3f + nop + addiu t1, a2, -1 + beqz t1, 2f + nop + li t4, 0xf800f800 + li t5, 0x07e007e0 + li t6, 0x001f001f +1: + lw t0, 0(a1) + lw t1, 4(a1) + addiu a1, a1, 8 + addiu a2, a2, -2 + + CONVERT_2x8888_TO_2x0565 t0, t1, t2, t3, t4, t5, t6, t7, t8 + + sh t2, 0(a0) + sh t3, 2(a0) + + addiu t2, a2, -1 + bgtz t2, 1b + addiu a0, a0, 4 +2: + beqz a2, 3f + nop + lw t0, 0(a1) + + CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3 + + sh t1, 0(a0) +3: + j ra + nop + +END(pixman_composite_src_8888_0565_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_src_0565_8888_asm_mips) +/* + * a0 - dst (a8r8g8b8) + * a1 - src (r5g6b5) + * a2 - w + */ + + beqz a2, 3f + nop + addiu t1, a2, -1 + beqz t1, 2f + nop + li t4, 0x07e007e0 + li t5, 0x001F001F +1: + lhu t0, 0(a1) + lhu t1, 2(a1) + addiu a1, a1, 4 + addiu a2, a2, -2 + + CONVERT_2x0565_TO_2x8888 t0, t1, t2, t3, t4, t5, t6, t7, t8, t9 + + sw t2, 0(a0) + sw t3, 4(a0) + + addiu t2, a2, -1 + bgtz t2, 1b + addiu a0, a0, 8 +2: + beqz a2, 3f + nop + lhu t0, 0(a1) + + CONVERT_1x0565_TO_1x8888 t0, t1, t2, t3 + + sw t1, 0(a0) +3: + j ra + nop + +END(pixman_composite_src_0565_8888_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_src_x888_8888_asm_mips) +/* + * a0 - dst (a8r8g8b8) + * a1 - src (x8r8g8b8) + * a2 - w + */ + + beqz a2, 4f + nop + li t9, 0xff000000 + srl t8, a2, 3 /* t1 = how many multiples of 8 src pixels */ + beqz t8, 3f /* branch if less than 8 src pixels */ + nop +1: + addiu t8, t8, -1 + beqz t8, 2f + addiu a2, a2, -8 + pref 0, 32(a1) + lw t0, 0(a1) + lw t1, 4(a1) + lw t2, 8(a1) + lw t3, 12(a1) + lw t4, 16(a1) + lw t5, 20(a1) + lw t6, 24(a1) + lw t7, 28(a1) + addiu a1, a1, 32 + or t0, t0, t9 + or t1, t1, t9 + or t2, t2, t9 + or t3, t3, t9 + or t4, t4, t9 + or t5, t5, t9 + or t6, t6, t9 + or t7, t7, t9 + pref 30, 32(a0) + sw t0, 0(a0) + sw t1, 4(a0) + sw t2, 8(a0) + sw t3, 12(a0) + sw t4, 16(a0) + sw t5, 20(a0) + sw t6, 24(a0) + sw t7, 28(a0) + b 1b + addiu a0, a0, 32 +2: + lw t0, 0(a1) + lw t1, 4(a1) + lw t2, 8(a1) + lw t3, 12(a1) + lw t4, 16(a1) + lw t5, 20(a1) + lw t6, 24(a1) + lw t7, 28(a1) + addiu a1, a1, 32 + or t0, t0, t9 + or t1, t1, t9 + or t2, t2, t9 + or t3, t3, t9 + or t4, t4, t9 + or t5, t5, t9 + or t6, t6, t9 + or t7, t7, t9 + sw t0, 0(a0) + sw t1, 4(a0) + sw t2, 8(a0) + sw t3, 12(a0) + sw t4, 16(a0) + sw t5, 20(a0) + sw t6, 24(a0) + sw t7, 28(a0) + beqz a2, 4f + addiu a0, a0, 32 +3: + lw t0, 0(a1) + addiu a1, a1, 4 + addiu a2, a2, -1 + or t1, t0, t9 + sw t1, 0(a0) + bnez a2, 3b + addiu a0, a0, 4 +4: + jr ra + nop + +END(pixman_composite_src_x888_8888_asm_mips) + +#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) +LEAF_MIPS_DSPR2(pixman_composite_src_0888_8888_rev_asm_mips) +/* + * a0 - dst (a8r8g8b8) + * a1 - src (b8g8r8) + * a2 - w + */ + + beqz a2, 6f + nop + + lui t8, 0xff00; + srl t9, a2, 2 /* t9 = how many multiples of 4 src pixels */ + beqz t9, 4f /* branch if less than 4 src pixels */ + nop + + li t0, 0x1 + li t1, 0x2 + li t2, 0x3 + andi t3, a1, 0x3 + beq t3, t0, 1f + nop + beq t3, t1, 2f + nop + beq t3, t2, 3f + nop + +0: + beqz t9, 4f + addiu t9, t9, -1 + lw t0, 0(a1) /* t0 = R2 | B1 | G1 | R1 */ + lw t1, 4(a1) /* t1 = G3 | R3 | B2 | G2 */ + lw t2, 8(a1) /* t2 = B4 | G4 | R4 | B3 */ + + addiu a1, a1, 12 + addiu a2, a2, -4 + + wsbh t0, t0 /* t0 = B1 | R2 | R1 | G1 */ + wsbh t1, t1 /* t1 = R3 | G3 | G2 | B2 */ + wsbh t2, t2 /* t2 = G4 | B4 | B3 | R4 */ + + packrl.ph t3, t1, t0 /* t3 = G2 | B2 | B1 | R2 */ + packrl.ph t4, t0, t0 /* t4 = R1 | G1 | B1 | R2 */ + rotr t3, t3, 16 /* t3 = B1 | R2 | G2 | B2 */ + or t3, t3, t8 /* t3 = FF | R2 | G2 | B2 */ + srl t4, t4, 8 /* t4 = 0 | R1 | G1 | B1 */ + or t4, t4, t8 /* t4 = FF | R1 | G1 | B1 */ + packrl.ph t5, t2, t1 /* t5 = B3 | R4 | R3 | G3 */ + rotr t5, t5, 24 /* t5 = R4 | R3 | G3 | B3 */ + or t5, t5, t8 /* t5 = FF | R3 | G3 | B3 */ + rotr t2, t2, 16 /* t2 = B3 | R4 | G4 | B4 */ + or t2, t2, t8 /* t5 = FF | R3 | G3 | B3 */ + + sw t4, 0(a0) + sw t3, 4(a0) + sw t5, 8(a0) + sw t2, 12(a0) + b 0b + addiu a0, a0, 16 + +1: + lbu t6, 0(a1) /* t6 = 0 | 0 | 0 | R1 */ + lhu t7, 1(a1) /* t7 = 0 | 0 | B1 | G1 */ + sll t6, t6, 16 /* t6 = 0 | R1 | 0 | 0 */ + wsbh t7, t7 /* t7 = 0 | 0 | G1 | B1 */ + or t7, t6, t7 /* t7 = 0 | R1 | G1 | B1 */ +11: + beqz t9, 4f + addiu t9, t9, -1 + lw t0, 3(a1) /* t0 = R3 | B2 | G2 | R2 */ + lw t1, 7(a1) /* t1 = G4 | R4 | B3 | G3 */ + lw t2, 11(a1) /* t2 = B5 | G5 | R5 | B4 */ + + addiu a1, a1, 12 + addiu a2, a2, -4 + + wsbh t0, t0 /* t0 = B2 | R3 | R2 | G2 */ + wsbh t1, t1 /* t1 = R4 | G4 | G3 | B3 */ + wsbh t2, t2 /* t2 = G5 | B5 | B4 | R5 */ + + packrl.ph t3, t1, t0 /* t3 = G3 | B3 | B2 | R3 */ + packrl.ph t4, t2, t1 /* t4 = B4 | R5 | R4 | G4 */ + rotr t0, t0, 24 /* t0 = R3 | R2 | G2 | B2 */ + rotr t3, t3, 16 /* t3 = B2 | R3 | G3 | B3 */ + rotr t4, t4, 24 /* t4 = R5 | R4 | G4 | B4 */ + or t7, t7, t8 /* t7 = FF | R1 | G1 | B1 */ + or t0, t0, t8 /* t0 = FF | R2 | G2 | B2 */ + or t3, t3, t8 /* t1 = FF | R3 | G3 | B3 */ + or t4, t4, t8 /* t3 = FF | R4 | G4 | B4 */ + + sw t7, 0(a0) + sw t0, 4(a0) + sw t3, 8(a0) + sw t4, 12(a0) + rotr t7, t2, 16 /* t7 = xx | R5 | G5 | B5 */ + b 11b + addiu a0, a0, 16 + +2: + lhu t7, 0(a1) /* t7 = 0 | 0 | G1 | R1 */ + wsbh t7, t7 /* t7 = 0 | 0 | R1 | G1 */ +21: + beqz t9, 4f + addiu t9, t9, -1 + lw t0, 2(a1) /* t0 = B2 | G2 | R2 | B1 */ + lw t1, 6(a1) /* t1 = R4 | B3 | G3 | R3 */ + lw t2, 10(a1) /* t2 = G5 | R5 | B4 | G4 */ + + addiu a1, a1, 12 + addiu a2, a2, -4 + + wsbh t0, t0 /* t0 = G2 | B2 | B1 | R2 */ + wsbh t1, t1 /* t1 = B3 | R4 | R3 | G3 */ + wsbh t2, t2 /* t2 = R5 | G5 | G4 | B4 */ + + precr_sra.ph.w t7, t0, 0 /* t7 = R1 | G1 | B1 | R2 */ + rotr t0, t0, 16 /* t0 = B1 | R2 | G2 | B2 */ + packrl.ph t3, t2, t1 /* t3 = G4 | B4 | B3 | R4 */ + rotr t1, t1, 24 /* t1 = R4 | R3 | G3 | B3 */ + srl t7, t7, 8 /* t7 = 0 | R1 | G1 | B1 */ + rotr t3, t3, 16 /* t3 = B3 | R4 | G4 | B4 */ + or t7, t7, t8 /* t7 = FF | R1 | G1 | B1 */ + or t0, t0, t8 /* t0 = FF | R2 | G2 | B2 */ + or t1, t1, t8 /* t1 = FF | R3 | G3 | B3 */ + or t3, t3, t8 /* t3 = FF | R4 | G4 | B4 */ + + sw t7, 0(a0) + sw t0, 4(a0) + sw t1, 8(a0) + sw t3, 12(a0) + srl t7, t2, 16 /* t7 = 0 | 0 | R5 | G5 */ + b 21b + addiu a0, a0, 16 + +3: + lbu t7, 0(a1) /* t7 = 0 | 0 | 0 | R1 */ +31: + beqz t9, 4f + addiu t9, t9, -1 + lw t0, 1(a1) /* t0 = G2 | R2 | B1 | G1 */ + lw t1, 5(a1) /* t1 = B3 | G3 | R3 | B2 */ + lw t2, 9(a1) /* t2 = R5 | B4 | G4 | R4 */ + + addiu a1, a1, 12 + addiu a2, a2, -4 + + wsbh t0, t0 /* t0 = R2 | G2 | G1 | B1 */ + wsbh t1, t1 /* t1 = G3 | B3 | B2 | R3 */ + wsbh t2, t2 /* t2 = B4 | R5 | R4 | G4 */ + + precr_sra.ph.w t7, t0, 0 /* t7 = xx | R1 | G1 | B1 */ + packrl.ph t3, t1, t0 /* t3 = B2 | R3 | R2 | G2 */ + rotr t1, t1, 16 /* t1 = B2 | R3 | G3 | B3 */ + rotr t4, t2, 24 /* t4 = R5 | R4 | G4 | B4 */ + rotr t3, t3, 24 /* t3 = R3 | R2 | G2 | B2 */ + or t7, t7, t8 /* t7 = FF | R1 | G1 | B1 */ + or t3, t3, t8 /* t3 = FF | R2 | G2 | B2 */ + or t1, t1, t8 /* t1 = FF | R3 | G3 | B3 */ + or t4, t4, t8 /* t4 = FF | R4 | G4 | B4 */ + + sw t7, 0(a0) + sw t3, 4(a0) + sw t1, 8(a0) + sw t4, 12(a0) + srl t7, t2, 16 /* t7 = 0 | 0 | xx | R5 */ + b 31b + addiu a0, a0, 16 + +4: + beqz a2, 6f + nop +5: + lbu t0, 0(a1) /* t0 = 0 | 0 | 0 | R */ + lbu t1, 1(a1) /* t1 = 0 | 0 | 0 | G */ + lbu t2, 2(a1) /* t2 = 0 | 0 | 0 | B */ + addiu a1, a1, 3 + + sll t0, t0, 16 /* t2 = 0 | R | 0 | 0 */ + sll t1, t1, 8 /* t1 = 0 | 0 | G | 0 */ + + or t2, t2, t1 /* t2 = 0 | 0 | G | B */ + or t2, t2, t0 /* t2 = 0 | R | G | B */ + or t2, t2, t8 /* t2 = FF | R | G | B */ + + sw t2, 0(a0) + addiu a2, a2, -1 + bnez a2, 5b + addiu a0, a0, 4 +6: + j ra + nop + +END(pixman_composite_src_0888_8888_rev_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_src_0888_0565_rev_asm_mips) +/* + * a0 - dst (r5g6b5) + * a1 - src (b8g8r8) + * a2 - w + */ + + SAVE_REGS_ON_STACK 0, v0, v1 + beqz a2, 6f + nop + + li t6, 0xf800f800 + li t7, 0x07e007e0 + li t8, 0x001F001F + srl t9, a2, 2 /* t9 = how many multiples of 4 src pixels */ + beqz t9, 4f /* branch if less than 4 src pixels */ + nop + + li t0, 0x1 + li t1, 0x2 + li t2, 0x3 + andi t3, a1, 0x3 + beq t3, t0, 1f + nop + beq t3, t1, 2f + nop + beq t3, t2, 3f + nop + +0: + beqz t9, 4f + addiu t9, t9, -1 + lw t0, 0(a1) /* t0 = R2 | B1 | G1 | R1 */ + lw t1, 4(a1) /* t1 = G3 | R3 | B2 | G2 */ + lw t2, 8(a1) /* t2 = B4 | G4 | R4 | B3 */ + + addiu a1, a1, 12 + addiu a2, a2, -4 + + wsbh t0, t0 /* t0 = B1 | R2 | R1 | G1 */ + wsbh t1, t1 /* t1 = R3 | G3 | G2 | B2 */ + wsbh t2, t2 /* t2 = G4 | B4 | B3 | R4 */ + + packrl.ph t3, t1, t0 /* t3 = G2 | B2 | B1 | R2 */ + packrl.ph t4, t0, t0 /* t4 = R1 | G1 | B1 | R2 */ + rotr t3, t3, 16 /* t3 = B1 | R2 | G2 | B2 */ + srl t4, t4, 8 /* t4 = 0 | R1 | G1 | B1 */ + packrl.ph t5, t2, t1 /* t5 = B3 | R4 | R3 | G3 */ + rotr t5, t5, 24 /* t5 = R4 | R3 | G3 | B3 */ + rotr t2, t2, 16 /* t2 = B3 | R4 | G4 | B4 */ + + CONVERT_2x8888_TO_2x0565 t4, t3, t4, t3, t6, t7, t8, v0, v1 + CONVERT_2x8888_TO_2x0565 t5, t2, t5, t2, t6, t7, t8, v0, v1 + + sh t4, 0(a0) + sh t3, 2(a0) + sh t5, 4(a0) + sh t2, 6(a0) + b 0b + addiu a0, a0, 8 + +1: + lbu t4, 0(a1) /* t4 = 0 | 0 | 0 | R1 */ + lhu t5, 1(a1) /* t5 = 0 | 0 | B1 | G1 */ + sll t4, t4, 16 /* t4 = 0 | R1 | 0 | 0 */ + wsbh t5, t5 /* t5 = 0 | 0 | G1 | B1 */ + or t5, t4, t5 /* t5 = 0 | R1 | G1 | B1 */ +11: + beqz t9, 4f + addiu t9, t9, -1 + lw t0, 3(a1) /* t0 = R3 | B2 | G2 | R2 */ + lw t1, 7(a1) /* t1 = G4 | R4 | B3 | G3 */ + lw t2, 11(a1) /* t2 = B5 | G5 | R5 | B4 */ + + addiu a1, a1, 12 + addiu a2, a2, -4 + + wsbh t0, t0 /* t0 = B2 | R3 | R2 | G2 */ + wsbh t1, t1 /* t1 = R4 | G4 | G3 | B3 */ + wsbh t2, t2 /* t2 = G5 | B5 | B4 | R5 */ + + packrl.ph t3, t1, t0 /* t3 = G3 | B3 | B2 | R3 */ + packrl.ph t4, t2, t1 /* t4 = B4 | R5 | R4 | G4 */ + rotr t0, t0, 24 /* t0 = R3 | R2 | G2 | B2 */ + rotr t3, t3, 16 /* t3 = B2 | R3 | G3 | B3 */ + rotr t4, t4, 24 /* t4 = R5 | R4 | G4 | B4 */ + + CONVERT_2x8888_TO_2x0565 t5, t0, t5, t0, t6, t7, t8, v0, v1 + CONVERT_2x8888_TO_2x0565 t3, t4, t3, t4, t6, t7, t8, v0, v1 + + sh t5, 0(a0) + sh t0, 2(a0) + sh t3, 4(a0) + sh t4, 6(a0) + rotr t5, t2, 16 /* t5 = xx | R5 | G5 | B5 */ + b 11b + addiu a0, a0, 8 + +2: + lhu t5, 0(a1) /* t5 = 0 | 0 | G1 | R1 */ + wsbh t5, t5 /* t5 = 0 | 0 | R1 | G1 */ +21: + beqz t9, 4f + addiu t9, t9, -1 + lw t0, 2(a1) /* t0 = B2 | G2 | R2 | B1 */ + lw t1, 6(a1) /* t1 = R4 | B3 | G3 | R3 */ + lw t2, 10(a1) /* t2 = G5 | R5 | B4 | G4 */ + + addiu a1, a1, 12 + addiu a2, a2, -4 + + wsbh t0, t0 /* t0 = G2 | B2 | B1 | R2 */ + wsbh t1, t1 /* t1 = B3 | R4 | R3 | G3 */ + wsbh t2, t2 /* t2 = R5 | G5 | G4 | B4 */ + + precr_sra.ph.w t5, t0, 0 /* t5 = R1 | G1 | B1 | R2 */ + rotr t0, t0, 16 /* t0 = B1 | R2 | G2 | B2 */ + packrl.ph t3, t2, t1 /* t3 = G4 | B4 | B3 | R4 */ + rotr t1, t1, 24 /* t1 = R4 | R3 | G3 | B3 */ + srl t5, t5, 8 /* t5 = 0 | R1 | G1 | B1 */ + rotr t3, t3, 16 /* t3 = B3 | R4 | G4 | B4 */ + + CONVERT_2x8888_TO_2x0565 t5, t0, t5, t0, t6, t7, t8, v0, v1 + CONVERT_2x8888_TO_2x0565 t1, t3, t1, t3, t6, t7, t8, v0, v1 + + sh t5, 0(a0) + sh t0, 2(a0) + sh t1, 4(a0) + sh t3, 6(a0) + srl t5, t2, 16 /* t5 = 0 | 0 | R5 | G5 */ + b 21b + addiu a0, a0, 8 + +3: + lbu t5, 0(a1) /* t5 = 0 | 0 | 0 | R1 */ +31: + beqz t9, 4f + addiu t9, t9, -1 + lw t0, 1(a1) /* t0 = G2 | R2 | B1 | G1 */ + lw t1, 5(a1) /* t1 = B3 | G3 | R3 | B2 */ + lw t2, 9(a1) /* t2 = R5 | B4 | G4 | R4 */ + + addiu a1, a1, 12 + addiu a2, a2, -4 + + wsbh t0, t0 /* t0 = R2 | G2 | G1 | B1 */ + wsbh t1, t1 /* t1 = G3 | B3 | B2 | R3 */ + wsbh t2, t2 /* t2 = B4 | R5 | R4 | G4 */ + + precr_sra.ph.w t5, t0, 0 /* t5 = xx | R1 | G1 | B1 */ + packrl.ph t3, t1, t0 /* t3 = B2 | R3 | R2 | G2 */ + rotr t1, t1, 16 /* t1 = B2 | R3 | G3 | B3 */ + rotr t4, t2, 24 /* t4 = R5 | R4 | G4 | B4 */ + rotr t3, t3, 24 /* t3 = R3 | R2 | G2 | B2 */ + + CONVERT_2x8888_TO_2x0565 t5, t3, t5, t3, t6, t7, t8, v0, v1 + CONVERT_2x8888_TO_2x0565 t1, t4, t1, t4, t6, t7, t8, v0, v1 + + sh t5, 0(a0) + sh t3, 2(a0) + sh t1, 4(a0) + sh t4, 6(a0) + srl t5, t2, 16 /* t5 = 0 | 0 | xx | R5 */ + b 31b + addiu a0, a0, 8 + +4: + beqz a2, 6f + nop +5: + lbu t0, 0(a1) /* t0 = 0 | 0 | 0 | R */ + lbu t1, 1(a1) /* t1 = 0 | 0 | 0 | G */ + lbu t2, 2(a1) /* t2 = 0 | 0 | 0 | B */ + addiu a1, a1, 3 + + sll t0, t0, 16 /* t2 = 0 | R | 0 | 0 */ + sll t1, t1, 8 /* t1 = 0 | 0 | G | 0 */ + + or t2, t2, t1 /* t2 = 0 | 0 | G | B */ + or t2, t2, t0 /* t2 = 0 | R | G | B */ + + CONVERT_1x8888_TO_1x0565 t2, t3, t4, t5 + + sh t3, 0(a0) + addiu a2, a2, -1 + bnez a2, 5b + addiu a0, a0, 2 +6: + RESTORE_REGS_FROM_STACK 0, v0, v1 + j ra + nop + +END(pixman_composite_src_0888_0565_rev_asm_mips) +#endif + +LEAF_MIPS_DSPR2(pixman_composite_src_pixbuf_8888_asm_mips) +/* + * a0 - dst (a8b8g8r8) + * a1 - src (a8r8g8b8) + * a2 - w + */ + + SAVE_REGS_ON_STACK 0, v0 + li v0, 0x00ff00ff + + beqz a2, 3f + nop + addiu t1, a2, -1 + beqz t1, 2f + nop +1: + lw t0, 0(a1) + lw t1, 4(a1) + addiu a1, a1, 8 + addiu a2, a2, -2 + srl t2, t0, 24 + srl t3, t1, 24 + + MIPS_2xUN8x4_MUL_2xUN8 t0, t1, t2, t3, t0, t1, v0, t4, t5, t6, t7, t8, t9 + + sll t0, t0, 8 + sll t1, t1, 8 + andi t2, t2, 0xff + andi t3, t3, 0xff + or t0, t0, t2 + or t1, t1, t3 + wsbh t0, t0 + wsbh t1, t1 + rotr t0, t0, 16 + rotr t1, t1, 16 + sw t0, 0(a0) + sw t1, 4(a0) + + addiu t2, a2, -1 + bgtz t2, 1b + addiu a0, a0, 8 +2: + beqz a2, 3f + nop + lw t0, 0(a1) + srl t1, t0, 24 + + MIPS_UN8x4_MUL_UN8 t0, t1, t0, v0, t3, t4, t5 + + sll t0, t0, 8 + andi t1, t1, 0xff + or t0, t0, t1 + wsbh t0, t0 + rotr t0, t0, 16 + sw t0, 0(a0) +3: + RESTORE_REGS_FROM_STACK 0, v0 + j ra + nop + +END(pixman_composite_src_pixbuf_8888_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_src_rpixbuf_8888_asm_mips) +/* + * a0 - dst (a8r8g8b8) + * a1 - src (a8r8g8b8) + * a2 - w + */ + + SAVE_REGS_ON_STACK 0, v0 + li v0, 0x00ff00ff + + beqz a2, 3f + nop + addiu t1, a2, -1 + beqz t1, 2f + nop +1: + lw t0, 0(a1) + lw t1, 4(a1) + addiu a1, a1, 8 + addiu a2, a2, -2 + srl t2, t0, 24 + srl t3, t1, 24 + + MIPS_2xUN8x4_MUL_2xUN8 t0, t1, t2, t3, t0, t1, v0, t4, t5, t6, t7, t8, t9 + + sll t0, t0, 8 + sll t1, t1, 8 + andi t2, t2, 0xff + andi t3, t3, 0xff + or t0, t0, t2 + or t1, t1, t3 + rotr t0, t0, 8 + rotr t1, t1, 8 + sw t0, 0(a0) + sw t1, 4(a0) + + addiu t2, a2, -1 + bgtz t2, 1b + addiu a0, a0, 8 +2: + beqz a2, 3f + nop + lw t0, 0(a1) + srl t1, t0, 24 + + MIPS_UN8x4_MUL_UN8 t0, t1, t0, v0, t3, t4, t5 + + sll t0, t0, 8 + andi t1, t1, 0xff + or t0, t0, t1 + rotr t0, t0, 8 + sw t0, 0(a0) +3: + RESTORE_REGS_FROM_STACK 0, v0 + j ra + nop + +END(pixman_composite_src_rpixbuf_8888_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_src_n_8_8888_asm_mips) +/* + * a0 - dst (a8r8g8b8) + * a1 - src (32bit constant) + * a2 - mask (a8) + * a3 - w + */ + + + SAVE_REGS_ON_STACK 0, v0 + li v0, 0x00ff00ff + + beqz a3, 3f + nop + addiu t1, a3, -1 + beqz t1, 2f + nop + +1: + /* a1 = source (32bit constant) */ + lbu t0, 0(a2) /* t2 = mask (a8) */ + lbu t1, 1(a2) /* t3 = mask (a8) */ + addiu a2, a2, 2 + + MIPS_2xUN8x4_MUL_2xUN8 a1, a1, t0, t1, t2, t3, v0, t4, t5, t6, t7, t8, t9 + + sw t2, 0(a0) + sw t3, 4(a0) + addiu a3, a3, -2 + addiu t2, a3, -1 + bgtz t2, 1b + addiu a0, a0, 8 + + beqz a3, 3f + nop + +2: + lbu t0, 0(a2) + addiu a2, a2, 1 + + MIPS_UN8x4_MUL_UN8 a1, t0, t1, v0, t3, t4, t5 + + sw t1, 0(a0) + addiu a3, a3, -1 + addiu a0, a0, 4 + +3: + RESTORE_REGS_FROM_STACK 0, v0 + j ra + nop + +END(pixman_composite_src_n_8_8888_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_src_n_8_8_asm_mips) +/* + * a0 - dst (a8) + * a1 - src (32bit constant) + * a2 - mask (a8) + * a3 - w + */ + + li t9, 0x00ff00ff + beqz a3, 3f + nop + srl t7, a3, 2 /* t7 = how many multiples of 4 dst pixels */ + beqz t7, 1f /* branch if less than 4 src pixels */ + nop + + srl t8, a1, 24 + replv.ph t8, t8 + +0: + beqz t7, 1f + addiu t7, t7, -1 + lbu t0, 0(a2) + lbu t1, 1(a2) + lbu t2, 2(a2) + lbu t3, 3(a2) + + addiu a2, a2, 4 + + precr_sra.ph.w t1, t0, 0 + precr_sra.ph.w t3, t2, 0 + precr.qb.ph t0, t3, t1 + + muleu_s.ph.qbl t2, t0, t8 + muleu_s.ph.qbr t3, t0, t8 + shra_r.ph t4, t2, 8 + shra_r.ph t5, t3, 8 + and t4, t4, t9 + and t5, t5, t9 + addq.ph t2, t2, t4 + addq.ph t3, t3, t5 + shra_r.ph t2, t2, 8 + shra_r.ph t3, t3, 8 + precr.qb.ph t2, t2, t3 + + sb t2, 0(a0) + srl t2, t2, 8 + sb t2, 1(a0) + srl t2, t2, 8 + sb t2, 2(a0) + srl t2, t2, 8 + sb t2, 3(a0) + addiu a3, a3, -4 + b 0b + addiu a0, a0, 4 + +1: + beqz a3, 3f + nop + srl t8, a1, 24 +2: + lbu t0, 0(a2) + addiu a2, a2, 1 + + mul t2, t0, t8 + shra_r.ph t3, t2, 8 + andi t3, t3, 0x00ff + addq.ph t2, t2, t3 + shra_r.ph t2, t2, 8 + + sb t2, 0(a0) + addiu a3, a3, -1 + bnez a3, 2b + addiu a0, a0, 1 + +3: + j ra + nop + +END(pixman_composite_src_n_8_8_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_8888_ca_asm_mips) +/* + * a0 - dst (a8r8g8b8) + * a1 - src (32bit constant) + * a2 - mask (a8r8g8b8) + * a3 - w + */ + + beqz a3, 8f + nop + SAVE_REGS_ON_STACK 8, s0, s1, s2, s3, s4, s5 + + li t6, 0xff + addiu t7, zero, -1 /* t7 = 0xffffffff */ + srl t8, a1, 24 /* t8 = srca */ + li t9, 0x00ff00ff + + addiu t1, a3, -1 + beqz t1, 4f /* last pixel */ + nop + +0: + lw t0, 0(a2) /* t0 = mask */ + lw t1, 4(a2) /* t1 = mask */ + addiu a3, a3, -2 /* w = w - 2 */ + or t2, t0, t1 + beqz t2, 3f /* if (t0 == 0) && (t1 == 0) */ + addiu a2, a2, 8 + and t2, t0, t1 + beq t2, t7, 1f /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */ + nop + +//if(ma) + lw t2, 0(a0) /* t2 = dst */ + lw t3, 4(a0) /* t3 = dst */ + MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, t4, t5, t9, s0, s1, s2, s3, s4, s5 + MIPS_2xUN8x4_MUL_2xUN8 t0, t1, t8, t8, t0, t1, t9, s0, s1, s2, s3, s4, s5 + not t0, t0 + not t1, t1 + MIPS_2xUN8x4_MUL_2xUN8x4 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5 + addu_s.qb t2, t4, t2 + addu_s.qb t3, t5, t3 + sw t2, 0(a0) + sw t3, 4(a0) + addiu t1, a3, -1 + bgtz t1, 0b + addiu a0, a0, 8 + b 4f + nop +1: +//if (t0 == 0xffffffff) && (t1 == 0xffffffff): + beq t8, t6, 2f /* if (srca == 0xff) */ + nop + lw t2, 0(a0) /* t2 = dst */ + lw t3, 4(a0) /* t3 = dst */ + not t0, a1 + not t1, a1 + srl t0, t0, 24 + srl t1, t1, 24 + MIPS_2xUN8x4_MUL_2xUN8 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5 + addu_s.qb t2, a1, t2 + addu_s.qb t3, a1, t3 + sw t2, 0(a0) + sw t3, 4(a0) + addiu t1, a3, -1 + bgtz t1, 0b + addiu a0, a0, 8 + b 4f + nop +2: + sw a1, 0(a0) + sw a1, 4(a0) +3: + addiu t1, a3, -1 + bgtz t1, 0b + addiu a0, a0, 8 + +4: + beqz a3, 7f + nop + /* a1 = src */ + lw t0, 0(a2) /* t0 = mask */ + beqz t0, 7f /* if (t0 == 0) */ + nop + beq t0, t7, 5f /* if (t0 == 0xffffffff) */ + nop +//if(ma) + lw t1, 0(a0) /* t1 = dst */ + MIPS_UN8x4_MUL_UN8x4 a1, t0, t2, t9, t3, t4, t5, s0 + MIPS_UN8x4_MUL_UN8 t0, t8, t0, t9, t3, t4, t5 + not t0, t0 + MIPS_UN8x4_MUL_UN8x4 t1, t0, t1, t9, t3, t4, t5, s0 + addu_s.qb t1, t2, t1 + sw t1, 0(a0) + RESTORE_REGS_FROM_STACK 8, s0, s1, s2, s3, s4, s5 + j ra + nop +5: +//if (t0 == 0xffffffff) + beq t8, t6, 6f /* if (srca == 0xff) */ + nop + lw t1, 0(a0) /* t1 = dst */ + not t0, a1 + srl t0, t0, 24 + MIPS_UN8x4_MUL_UN8 t1, t0, t1, t9, t2, t3, t4 + addu_s.qb t1, a1, t1 + sw t1, 0(a0) + RESTORE_REGS_FROM_STACK 8, s0, s1, s2, s3, s4, s5 + j ra + nop +6: + sw a1, 0(a0) +7: + RESTORE_REGS_FROM_STACK 8, s0, s1, s2, s3, s4, s5 +8: + j ra + nop + +END(pixman_composite_over_n_8888_8888_ca_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_0565_ca_asm_mips) +/* + * a0 - dst (r5g6b5) + * a1 - src (32bit constant) + * a2 - mask (a8r8g8b8) + * a3 - w + */ + + beqz a3, 8f + nop + SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8 + + li t6, 0xff + addiu t7, zero, -1 /* t7 = 0xffffffff */ + srl t8, a1, 24 /* t8 = srca */ + li t9, 0x00ff00ff + li s6, 0xf800f800 + li s7, 0x07e007e0 + li s8, 0x001F001F + + addiu t1, a3, -1 + beqz t1, 4f /* last pixel */ + nop + +0: + lw t0, 0(a2) /* t0 = mask */ + lw t1, 4(a2) /* t1 = mask */ + addiu a3, a3, -2 /* w = w - 2 */ + or t2, t0, t1 + beqz t2, 3f /* if (t0 == 0) && (t1 == 0) */ + addiu a2, a2, 8 + and t2, t0, t1 + beq t2, t7, 1f /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */ + nop + +//if(ma) + lhu t2, 0(a0) /* t2 = dst */ + lhu t3, 2(a0) /* t3 = dst */ + MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, t4, t5, t9, s0, s1, s2, s3, s4, s5 + MIPS_2xUN8x4_MUL_2xUN8 t0, t1, t8, t8, t0, t1, t9, s0, s1, s2, s3, s4, s5 + not t0, t0 + not t1, t1 + CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, s7, s8, s0, s1, s2, s3 + MIPS_2xUN8x4_MUL_2xUN8x4 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5 + addu_s.qb t2, t4, t2 + addu_s.qb t3, t5, t3 + CONVERT_2x8888_TO_2x0565 t2, t3, t2, t3, s6, s7, s8, s0, s1 + sh t2, 0(a0) + sh t3, 2(a0) + addiu t1, a3, -1 + bgtz t1, 0b + addiu a0, a0, 4 + b 4f + nop +1: +//if (t0 == 0xffffffff) && (t1 == 0xffffffff): + beq t8, t6, 2f /* if (srca == 0xff) */ + nop + lhu t2, 0(a0) /* t2 = dst */ + lhu t3, 2(a0) /* t3 = dst */ + not t0, a1 + not t1, a1 + srl t0, t0, 24 + srl t1, t1, 24 + CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, s7, s8, s0, s1, s2, s3 + MIPS_2xUN8x4_MUL_2xUN8 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5 + addu_s.qb t2, a1, t2 + addu_s.qb t3, a1, t3 + CONVERT_2x8888_TO_2x0565 t2, t3, t2, t3, s6, s7, s8, s0, s1 + sh t2, 0(a0) + sh t3, 2(a0) + addiu t1, a3, -1 + bgtz t1, 0b + addiu a0, a0, 4 + b 4f + nop +2: + CONVERT_1x8888_TO_1x0565 a1, t2, s0, s1 + sh t2, 0(a0) + sh t2, 2(a0) +3: + addiu t1, a3, -1 + bgtz t1, 0b + addiu a0, a0, 4 + +4: + beqz a3, 7f + nop + /* a1 = src */ + lw t0, 0(a2) /* t0 = mask */ + beqz t0, 7f /* if (t0 == 0) */ + nop + beq t0, t7, 5f /* if (t0 == 0xffffffff) */ + nop +//if(ma) + lhu t1, 0(a0) /* t1 = dst */ + MIPS_UN8x4_MUL_UN8x4 a1, t0, t2, t9, t3, t4, t5, s0 + MIPS_UN8x4_MUL_UN8 t0, t8, t0, t9, t3, t4, t5 + not t0, t0 + CONVERT_1x0565_TO_1x8888 t1, s1, s2, s3 + MIPS_UN8x4_MUL_UN8x4 s1, t0, s1, t9, t3, t4, t5, s0 + addu_s.qb s1, t2, s1 + CONVERT_1x8888_TO_1x0565 s1, t1, s0, s2 + sh t1, 0(a0) + RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8 + j ra + nop +5: +//if (t0 == 0xffffffff) + beq t8, t6, 6f /* if (srca == 0xff) */ + nop + lhu t1, 0(a0) /* t1 = dst */ + not t0, a1 + srl t0, t0, 24 + CONVERT_1x0565_TO_1x8888 t1, s1, s2, s3 + MIPS_UN8x4_MUL_UN8 s1, t0, s1, t9, t2, t3, t4 + addu_s.qb s1, a1, s1 + CONVERT_1x8888_TO_1x0565 s1, t1, s0, s2 + sh t1, 0(a0) + RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8 + j ra + nop +6: + CONVERT_1x8888_TO_1x0565 a1, t1, s0, s2 + sh t1, 0(a0) +7: + RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8 +8: + j ra + nop + +END(pixman_composite_over_n_8888_0565_ca_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_over_n_8_8_asm_mips) +/* + * a0 - dst (a8) + * a1 - src (32bit constant) + * a2 - mask (a8) + * a3 - w + */ + + SAVE_REGS_ON_STACK 0, v0 + li t9, 0x00ff00ff + beqz a3, 3f + nop + srl v0, a3, 2 /* v0 = how many multiples of 4 dst pixels */ + beqz v0, 1f /* branch if less than 4 src pixels */ + nop + + srl t8, a1, 24 + replv.ph t8, t8 + +0: + beqz v0, 1f + addiu v0, v0, -1 + lbu t0, 0(a2) + lbu t1, 1(a2) + lbu t2, 2(a2) + lbu t3, 3(a2) + lbu t4, 0(a0) + lbu t5, 1(a0) + lbu t6, 2(a0) + lbu t7, 3(a0) + + addiu a2, a2, 4 + + precr_sra.ph.w t1, t0, 0 + precr_sra.ph.w t3, t2, 0 + precr_sra.ph.w t5, t4, 0 + precr_sra.ph.w t7, t6, 0 + + precr.qb.ph t0, t3, t1 + precr.qb.ph t1, t7, t5 + + muleu_s.ph.qbl t2, t0, t8 + muleu_s.ph.qbr t3, t0, t8 + shra_r.ph t4, t2, 8 + shra_r.ph t5, t3, 8 + and t4, t4, t9 + and t5, t5, t9 + addq.ph t2, t2, t4 + addq.ph t3, t3, t5 + shra_r.ph t2, t2, 8 + shra_r.ph t3, t3, 8 + precr.qb.ph t0, t2, t3 + not t6, t0 + + preceu.ph.qbl t7, t6 + preceu.ph.qbr t6, t6 + + muleu_s.ph.qbl t2, t1, t7 + muleu_s.ph.qbr t3, t1, t6 + shra_r.ph t4, t2, 8 + shra_r.ph t5, t3, 8 + and t4, t4, t9 + and t5, t5, t9 + addq.ph t2, t2, t4 + addq.ph t3, t3, t5 + shra_r.ph t2, t2, 8 + shra_r.ph t3, t3, 8 + precr.qb.ph t1, t2, t3 + + addu_s.qb t2, t0, t1 + + sb t2, 0(a0) + srl t2, t2, 8 + sb t2, 1(a0) + srl t2, t2, 8 + sb t2, 2(a0) + srl t2, t2, 8 + sb t2, 3(a0) + addiu a3, a3, -4 + b 0b + addiu a0, a0, 4 + +1: + beqz a3, 3f + nop + srl t8, a1, 24 +2: + lbu t0, 0(a2) + lbu t1, 0(a0) + addiu a2, a2, 1 + + mul t2, t0, t8 + shra_r.ph t3, t2, 8 + andi t3, t3, 0x00ff + addq.ph t2, t2, t3 + shra_r.ph t2, t2, 8 + not t3, t2 + andi t3, t3, 0x00ff + + + mul t4, t1, t3 + shra_r.ph t5, t4, 8 + andi t5, t5, 0x00ff + addq.ph t4, t4, t5 + shra_r.ph t4, t4, 8 + andi t4, t4, 0x00ff + + addu_s.qb t2, t2, t4 + sb t2, 0(a0) + addiu a3, a3, -1 + bnez a3, 2b + addiu a0, a0, 1 + +3: + RESTORE_REGS_FROM_STACK 0, v0 + j ra + nop + +END(pixman_composite_over_n_8_8_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_over_n_8_8888_asm_mips) +/* + * a0 - dst (a8r8g8b8) + * a1 - src (32bit constant) + * a2 - mask (a8) + * a3 - w + */ + + SAVE_REGS_ON_STACK 4, s0, s1, s2, s3, s4 + beqz a3, 4f + nop + li t4, 0x00ff00ff + li t5, 0xff + addiu t0, a3, -1 + beqz t0, 3f /* last pixel */ + srl t6, a1, 24 /* t6 = srca */ + not s4, a1 + beq t5, t6, 2f /* if (srca == 0xff) */ + srl s4, s4, 24 +1: + /* a1 = src */ + lbu t0, 0(a2) /* t0 = mask */ + lbu t1, 1(a2) /* t1 = mask */ + or t2, t0, t1 + beqz t2, 111f /* if (t0 == 0) && (t1 == 0) */ + addiu a2, a2, 2 + and t3, t0, t1 + + lw t2, 0(a0) /* t2 = dst */ + beq t3, t5, 11f /* if (t0 == 0xff) && (t1 == 0xff) */ + lw t3, 4(a0) /* t3 = dst */ + + MIPS_2xUN8x4_MUL_2xUN8 a1, a1, t0, t1, s0, s1, t4, t6, t7, t8, t9, s2, s3 + not s2, s0 + not s3, s1 + srl s2, s2, 24 + srl s3, s3, 24 + MIPS_2xUN8x4_MUL_2xUN8 t2, t3, s2, s3, t2, t3, t4, t0, t1, t6, t7, t8, t9 + addu_s.qb s2, t2, s0 + addu_s.qb s3, t3, s1 + sw s2, 0(a0) + b 111f + sw s3, 4(a0) +11: + MIPS_2xUN8x4_MUL_2xUN8 t2, t3, s4, s4, t2, t3, t4, t0, t1, t6, t7, t8, t9 + addu_s.qb s2, t2, a1 + addu_s.qb s3, t3, a1 + sw s2, 0(a0) + sw s3, 4(a0) + +111: + addiu a3, a3, -2 + addiu t0, a3, -1 + bgtz t0, 1b + addiu a0, a0, 8 + b 3f + nop +2: + /* a1 = src */ + lbu t0, 0(a2) /* t0 = mask */ + lbu t1, 1(a2) /* t1 = mask */ + or t2, t0, t1 + beqz t2, 222f /* if (t0 == 0) && (t1 == 0) */ + addiu a2, a2, 2 + and t3, t0, t1 + beq t3, t5, 22f /* if (t0 == 0xff) && (t1 == 0xff) */ + nop + lw t2, 0(a0) /* t2 = dst */ + lw t3, 4(a0) /* t3 = dst */ + + OVER_2x8888_2x8_2x8888 a1, a1, t0, t1, t2, t3, \ + t6, t7, t4, t8, t9, s0, s1, s2, s3 + sw t6, 0(a0) + b 222f + sw t7, 4(a0) +22: + sw a1, 0(a0) + sw a1, 4(a0) +222: + addiu a3, a3, -2 + addiu t0, a3, -1 + bgtz t0, 2b + addiu a0, a0, 8 +3: + blez a3, 4f + nop + /* a1 = src */ + lbu t0, 0(a2) /* t0 = mask */ + beqz t0, 4f /* if (t0 == 0) */ + addiu a2, a2, 1 + move t3, a1 + beq t0, t5, 31f /* if (t0 == 0xff) */ + lw t1, 0(a0) /* t1 = dst */ + + MIPS_UN8x4_MUL_UN8 a1, t0, t3, t4, t6, t7, t8 +31: + not t2, t3 + srl t2, t2, 24 + MIPS_UN8x4_MUL_UN8 t1, t2, t1, t4, t6, t7, t8 + addu_s.qb t2, t1, t3 + sw t2, 0(a0) +4: + RESTORE_REGS_FROM_STACK 4, s0, s1, s2, s3, s4 + j ra + nop + +END(pixman_composite_over_n_8_8888_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_over_n_8_0565_asm_mips) +/* + * a0 - dst (r5g6b5) + * a1 - src (32bit constant) + * a2 - mask (a8) + * a3 - w + */ + SAVE_REGS_ON_STACK 24, v0, s0, s1, s2, s3, s4, s5, s6, s7, s8 + beqz a3, 4f + nop + li t4, 0x00ff00ff + li t5, 0xff + li t6, 0xf800f800 + li t7, 0x07e007e0 + li t8, 0x001F001F + addiu t1, a3, -1 + beqz t1, 3f /* last pixel */ + srl t0, a1, 24 /* t0 = srca */ + not v0, a1 + beq t0, t5, 2f /* if (srca == 0xff) */ + srl v0, v0, 24 +1: + /* a1 = src */ + lbu t0, 0(a2) /* t0 = mask */ + lbu t1, 1(a2) /* t1 = mask */ + or t2, t0, t1 + beqz t2, 111f /* if (t0 == 0) && (t1 == 0) */ + addiu a2, a2, 2 + lhu t2, 0(a0) /* t2 = dst */ + lhu t3, 2(a0) /* t3 = dst */ + CONVERT_2x0565_TO_2x8888 t2, t3, s0, s1, t7, t8, t9, s2, s3, s4 + and t9, t0, t1 + beq t9, t5, 11f /* if (t0 == 0xff) && (t1 == 0xff) */ + nop + + MIPS_2xUN8x4_MUL_2xUN8 a1, a1, t0, t1, s2, s3, t4, t9, s4, s5, s6, s7, s8 + not s4, s2 + not s5, s3 + srl s4, s4, 24 + srl s5, s5, 24 + MIPS_2xUN8x4_MUL_2xUN8 s0, s1, s4, s5, s0, s1, t4, t9, t0, t1, s6, s7, s8 + addu_s.qb s4, s2, s0 + addu_s.qb s5, s3, s1 + CONVERT_2x8888_TO_2x0565 s4, s5, t2, t3, t6, t7, t8, s0, s1 + sh t2, 0(a0) + b 111f + sh t3, 2(a0) +11: + MIPS_2xUN8x4_MUL_2xUN8 s0, s1, v0, v0, s0, s1, t4, t9, t0, t1, s6, s7, s8 + addu_s.qb s4, a1, s0 + addu_s.qb s5, a1, s1 + CONVERT_2x8888_TO_2x0565 s4, s5, t2, t3, t6, t7, t8, s0, s1 + sh t2, 0(a0) + sh t3, 2(a0) +111: + addiu a3, a3, -2 + addiu t0, a3, -1 + bgtz t0, 1b + addiu a0, a0, 4 + b 3f + nop +2: + CONVERT_1x8888_TO_1x0565 a1, s0, s1, s2 +21: + /* a1 = src */ + lbu t0, 0(a2) /* t0 = mask */ + lbu t1, 1(a2) /* t1 = mask */ + or t2, t0, t1 + beqz t2, 222f /* if (t0 == 0) && (t1 == 0) */ + addiu a2, a2, 2 + and t9, t0, t1 + move s2, s0 + beq t9, t5, 22f /* if (t0 == 0xff) && (t2 == 0xff) */ + move s3, s0 + lhu t2, 0(a0) /* t2 = dst */ + lhu t3, 2(a0) /* t3 = dst */ + + CONVERT_2x0565_TO_2x8888 t2, t3, s2, s3, t7, t8, s4, s5, s6, s7 + OVER_2x8888_2x8_2x8888 a1, a1, t0, t1, s2, s3, \ + t2, t3, t4, t9, s4, s5, s6, s7, s8 + CONVERT_2x8888_TO_2x0565 t2, t3, s2, s3, t6, t7, t8, s4, s5 +22: + sh s2, 0(a0) + sh s3, 2(a0) +222: + addiu a3, a3, -2 + addiu t0, a3, -1 + bgtz t0, 21b + addiu a0, a0, 4 +3: + blez a3, 4f + nop + /* a1 = src */ + lbu t0, 0(a2) /* t0 = mask */ + beqz t0, 4f /* if (t0 == 0) */ + nop + lhu t1, 0(a0) /* t1 = dst */ + CONVERT_1x0565_TO_1x8888 t1, t2, t3, t7 + beq t0, t5, 31f /* if (t0 == 0xff) */ + move t3, a1 + + MIPS_UN8x4_MUL_UN8 a1, t0, t3, t4, t7, t8, t9 +31: + not t6, t3 + srl t6, t6, 24 + MIPS_UN8x4_MUL_UN8 t2, t6, t2, t4, t7, t8, t9 + addu_s.qb t1, t2, t3 + CONVERT_1x8888_TO_1x0565 t1, t2, t3, t7 + sh t2, 0(a0) +4: + RESTORE_REGS_FROM_STACK 24, v0, s0, s1, s2, s3, s4, s5, s6, s7, s8 + j ra + nop + +END(pixman_composite_over_n_8_0565_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_over_8888_n_8888_asm_mips) +/* + * a0 - dst (a8r8g8b8) + * a1 - src (a8r8g8b8) + * a2 - mask (32bit constant) + * a3 - w + */ + + SAVE_REGS_ON_STACK 0, s0 + li t4, 0x00ff00ff + beqz a3, 3f + nop + addiu t1, a3, -1 + srl a2, a2, 24 + beqz t1, 2f + nop + +1: + lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ + lw t1, 4(a1) /* t1 = source (a8r8g8b8) */ + /* a2 = mask (32bit constant) */ + lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */ + lw t3, 4(a0) /* t3 = destination (a8r8g8b8) */ + addiu a1, a1, 8 + + OVER_2x8888_2x8_2x8888 t0, t1, a2, a2, t2, t3, \ + t5, t6, t4, t7, t8, t9, t0, t1, s0 + + sw t5, 0(a0) + sw t6, 4(a0) + addiu a3, a3, -2 + addiu t1, a3, -1 + bgtz t1, 1b + addiu a0, a0, 8 +2: + beqz a3, 3f + nop + lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ + /* a2 = mask (32bit constant) */ + lw t1, 0(a0) /* t1 = destination (a8r8g8b8) */ + + OVER_8888_8_8888 t0, a2, t1, t3, t4, t5, t6, t7, t8 + + sw t3, 0(a0) +3: + RESTORE_REGS_FROM_STACK 0, s0 + j ra + nop + +END(pixman_composite_over_8888_n_8888_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_over_8888_n_0565_asm_mips) +/* + * a0 - dst (r5g6b5) + * a1 - src (a8r8g8b8) + * a2 - mask (32bit constant) + * a3 - w + */ + + SAVE_REGS_ON_STACK 0, s0, s1, s2, s3 + li t6, 0x00ff00ff + li t7, 0xf800f800 + li t8, 0x07e007e0 + li t9, 0x001F001F + beqz a3, 3f + nop + srl a2, a2, 24 + addiu t1, a3, -1 + beqz t1, 2f + nop +1: + lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ + lw t1, 4(a1) /* t1 = source (a8r8g8b8) */ + /* a2 = mask (32bit constant) */ + lhu t2, 0(a0) /* t2 = destination (r5g6b5) */ + lhu t3, 2(a0) /* t2 = destination (r5g6b5) */ + addiu a1, a1, 8 + + CONVERT_2x0565_TO_2x8888 t2, t3, t4, t5, t8, t9, s0, s1, t2, t3 + OVER_2x8888_2x8_2x8888 t0, t1, a2, a2, t4, t5, \ + t2, t3, t6, t0, t1, s0, s1, s2, s3 + CONVERT_2x8888_TO_2x0565 t2, t3, t4, t5, t7, t8, t9, s0, s1 + + sh t4, 0(a0) + sh t5, 2(a0) + addiu a3, a3, -2 + addiu t1, a3, -1 + bgtz t1, 1b + addiu a0, a0, 4 +2: + beqz a3, 3f + nop + lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ + /* a2 = mask (32bit constant) */ + lhu t1, 0(a0) /* t1 = destination (r5g6b5) */ + + CONVERT_1x0565_TO_1x8888 t1, t2, t4, t5 + OVER_8888_8_8888 t0, a2, t2, t1, t6, t3, t4, t5, t7 + CONVERT_1x8888_TO_1x0565 t1, t3, t4, t5 + + sh t3, 0(a0) +3: + RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3 + j ra + nop + +END(pixman_composite_over_8888_n_0565_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_over_0565_n_0565_asm_mips) +/* + * a0 - dst (r5g6b5) + * a1 - src (r5g6b5) + * a2 - mask (32bit constant) + * a3 - w + */ + + SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5 + li t6, 0x00ff00ff + li t7, 0xf800f800 + li t8, 0x07e007e0 + li t9, 0x001F001F + beqz a3, 3f + nop + srl a2, a2, 24 + addiu t1, a3, -1 + beqz t1, 2f + nop +1: + lhu t0, 0(a1) /* t0 = source (r5g6b5) */ + lhu t1, 2(a1) /* t1 = source (r5g6b5) */ + /* a2 = mask (32bit constant) */ + lhu t2, 0(a0) /* t2 = destination (r5g6b5) */ + lhu t3, 2(a0) /* t3 = destination (r5g6b5) */ + addiu a1, a1, 4 + + CONVERT_2x0565_TO_2x8888 t0, t1, t4, t5, t8, t9, s0, s1, s2, s3 + CONVERT_2x0565_TO_2x8888 t2, t3, s0, s1, t8, t9, s2, s3, s4, s5 + OVER_2x8888_2x8_2x8888 t4, t5, a2, a2, s0, s1, \ + t0, t1, t6, s2, s3, s4, s5, t4, t5 + CONVERT_2x8888_TO_2x0565 t0, t1, s0, s1, t7, t8, t9, s2, s3 + + sh s0, 0(a0) + sh s1, 2(a0) + addiu a3, a3, -2 + addiu t1, a3, -1 + bgtz t1, 1b + addiu a0, a0, 4 +2: + beqz a3, 3f + nop + lhu t0, 0(a1) /* t0 = source (r5g6b5) */ + /* a2 = mask (32bit constant) */ + lhu t1, 0(a0) /* t1 = destination (r5g6b5) */ + + CONVERT_1x0565_TO_1x8888 t0, t2, t4, t5 + CONVERT_1x0565_TO_1x8888 t1, t3, t4, t5 + OVER_8888_8_8888 t2, a2, t3, t0, t6, t1, t4, t5, t7 + CONVERT_1x8888_TO_1x0565 t0, t3, t4, t5 + + sh t3, 0(a0) +3: + RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5 + j ra + nop + +END(pixman_composite_over_0565_n_0565_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_over_8888_8_8888_asm_mips) +/* + * a0 - dst (a8r8g8b8) + * a1 - src (a8r8g8b8) + * a2 - mask (a8) + * a3 - w + */ + + SAVE_REGS_ON_STACK 0, s0, s1 + li t4, 0x00ff00ff + beqz a3, 3f + nop + addiu t1, a3, -1 + beqz t1, 2f + nop +1: + lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ + lw t1, 4(a1) /* t1 = source (a8r8g8b8) */ + lbu t2, 0(a2) /* t2 = mask (a8) */ + lbu t3, 1(a2) /* t3 = mask (a8) */ + lw t5, 0(a0) /* t5 = destination (a8r8g8b8) */ + lw t6, 4(a0) /* t6 = destination (a8r8g8b8) */ + addiu a1, a1, 8 + addiu a2, a2, 2 + + OVER_2x8888_2x8_2x8888 t0, t1, t2, t3, t5, t6, \ + t7, t8, t4, t9, s0, s1, t0, t1, t2 + + sw t7, 0(a0) + sw t8, 4(a0) + addiu a3, a3, -2 + addiu t1, a3, -1 + bgtz t1, 1b + addiu a0, a0, 8 +2: + beqz a3, 3f + nop + lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ + lbu t1, 0(a2) /* t1 = mask (a8) */ + lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */ + + OVER_8888_8_8888 t0, t1, t2, t3, t4, t5, t6, t7, t8 + + sw t3, 0(a0) +3: + RESTORE_REGS_FROM_STACK 0, s0, s1 + j ra + nop + +END(pixman_composite_over_8888_8_8888_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_over_8888_8_0565_asm_mips) +/* + * a0 - dst (r5g6b5) + * a1 - src (a8r8g8b8) + * a2 - mask (a8) + * a3 - w + */ + + SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5 + li t6, 0x00ff00ff + li t7, 0xf800f800 + li t8, 0x07e007e0 + li t9, 0x001F001F + beqz a3, 3f + nop + addiu t1, a3, -1 + beqz t1, 2f + nop +1: + lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ + lw t1, 4(a1) /* t1 = source (a8r8g8b8) */ + lbu t2, 0(a2) /* t2 = mask (a8) */ + lbu t3, 1(a2) /* t3 = mask (a8) */ + lhu t4, 0(a0) /* t4 = destination (r5g6b5) */ + lhu t5, 2(a0) /* t5 = destination (r5g6b5) */ + addiu a1, a1, 8 + addiu a2, a2, 2 + + CONVERT_2x0565_TO_2x8888 t4, t5, s0, s1, t8, t9, s2, s3, s4, s5 + OVER_2x8888_2x8_2x8888 t0, t1, t2, t3, s0, s1, \ + t4, t5, t6, s2, s3, s4, s5, t0, t1 + CONVERT_2x8888_TO_2x0565 t4, t5, s0, s1, t7, t8, t9, s2, s3 + + sh s0, 0(a0) + sh s1, 2(a0) + addiu a3, a3, -2 + addiu t1, a3, -1 + bgtz t1, 1b + addiu a0, a0, 4 +2: + beqz a3, 3f + nop + lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ + lbu t1, 0(a2) /* t1 = mask (a8) */ + lhu t2, 0(a0) /* t2 = destination (r5g6b5) */ + + CONVERT_1x0565_TO_1x8888 t2, t3, t4, t5 + OVER_8888_8_8888 t0, t1, t3, t2, t6, t4, t5, t7, t8 + CONVERT_1x8888_TO_1x0565 t2, t3, t4, t5 + + sh t3, 0(a0) +3: + RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5 + j ra + nop + +END(pixman_composite_over_8888_8_0565_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_over_0565_8_0565_asm_mips) +/* + * a0 - dst (r5g6b5) + * a1 - src (r5g6b5) + * a2 - mask (a8) + * a3 - w + */ + + SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5 + li t4, 0xf800f800 + li t5, 0x07e007e0 + li t6, 0x001F001F + li t7, 0x00ff00ff + beqz a3, 3f + nop + addiu t1, a3, -1 + beqz t1, 2f + nop +1: + lhu t0, 0(a1) /* t0 = source (r5g6b5) */ + lhu t1, 2(a1) /* t1 = source (r5g6b5) */ + lbu t2, 0(a2) /* t2 = mask (a8) */ + lbu t3, 1(a2) /* t3 = mask (a8) */ + lhu t8, 0(a0) /* t8 = destination (r5g6b5) */ + lhu t9, 2(a0) /* t9 = destination (r5g6b5) */ + addiu a1, a1, 4 + addiu a2, a2, 2 + + CONVERT_2x0565_TO_2x8888 t0, t1, s0, s1, t5, t6, s2, s3, s4, s5 + CONVERT_2x0565_TO_2x8888 t8, t9, s2, s3, t5, t6, s4, s5, t0, t1 + OVER_2x8888_2x8_2x8888 s0, s1, t2, t3, s2, s3, \ + t0, t1, t7, s4, s5, t8, t9, s0, s1 + CONVERT_2x8888_TO_2x0565 t0, t1, s0, s1, t4, t5, t6, s2, s3 + + sh s0, 0(a0) + sh s1, 2(a0) + addiu a3, a3, -2 + addiu t1, a3, -1 + bgtz t1, 1b + addiu a0, a0, 4 +2: + beqz a3, 3f + nop + lhu t0, 0(a1) /* t0 = source (r5g6b5) */ + lbu t1, 0(a2) /* t1 = mask (a8) */ + lhu t2, 0(a0) /* t2 = destination (r5g6b5) */ + + CONVERT_1x0565_TO_1x8888 t0, t3, t4, t5 + CONVERT_1x0565_TO_1x8888 t2, t4, t5, t6 + OVER_8888_8_8888 t3, t1, t4, t0, t7, t2, t5, t6, t8 + CONVERT_1x8888_TO_1x0565 t0, t3, t4, t5 + + sh t3, 0(a0) +3: + RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5 + j ra + nop + +END(pixman_composite_over_0565_8_0565_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_over_8888_8888_8888_asm_mips) +/* + * a0 - dst (a8r8g8b8) + * a1 - src (a8r8g8b8) + * a2 - mask (a8r8g8b8) + * a3 - w + */ + + SAVE_REGS_ON_STACK 0, s0, s1, s2 + li t4, 0x00ff00ff + beqz a3, 3f + nop + addiu t1, a3, -1 + beqz t1, 2f + nop +1: + lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ + lw t1, 4(a1) /* t1 = source (a8r8g8b8) */ + lw t2, 0(a2) /* t2 = mask (a8r8g8b8) */ + lw t3, 4(a2) /* t3 = mask (a8r8g8b8) */ + lw t5, 0(a0) /* t5 = destination (a8r8g8b8) */ + lw t6, 4(a0) /* t6 = destination (a8r8g8b8) */ + addiu a1, a1, 8 + addiu a2, a2, 8 + srl t2, t2, 24 + srl t3, t3, 24 + + OVER_2x8888_2x8_2x8888 t0, t1, t2, t3, t5, t6, t7, t8, t4, t9, s0, s1, s2, t0, t1 + + sw t7, 0(a0) + sw t8, 4(a0) + addiu a3, a3, -2 + addiu t1, a3, -1 + bgtz t1, 1b + addiu a0, a0, 8 +2: + beqz a3, 3f + nop + lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ + lw t1, 0(a2) /* t1 = mask (a8r8g8b8) */ + lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */ + srl t1, t1, 24 + + OVER_8888_8_8888 t0, t1, t2, t3, t4, t5, t6, t7, t8 + + sw t3, 0(a0) +3: + RESTORE_REGS_FROM_STACK 0, s0, s1, s2 + j ra + nop + +END(pixman_composite_over_8888_8888_8888_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_over_8888_8888_asm_mips) +/* + * a0 - dst (a8r8g8b8) + * a1 - src (a8r8g8b8) + * a2 - w + */ + + SAVE_REGS_ON_STACK 0, s0, s1, s2 + li t4, 0x00ff00ff + beqz a2, 3f + nop + addiu t1, a2, -1 + beqz t1, 2f + nop +1: + lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ + lw t1, 4(a1) /* t1 = source (a8r8g8b8) */ + lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */ + lw t3, 4(a0) /* t3 = destination (a8r8g8b8) */ + addiu a1, a1, 8 + + not t5, t0 + srl t5, t5, 24 + not t6, t1 + srl t6, t6, 24 + + or t7, t5, t6 + beqz t7, 11f + or t8, t0, t1 + beqz t8, 12f + + MIPS_2xUN8x4_MUL_2xUN8 t2, t3, t5, t6, t7, t8, t4, t9, s0, s1, s2, t2, t3 + + addu_s.qb t0, t7, t0 + addu_s.qb t1, t8, t1 +11: + sw t0, 0(a0) + sw t1, 4(a0) +12: + addiu a2, a2, -2 + addiu t1, a2, -1 + bgtz t1, 1b + addiu a0, a0, 8 +2: + beqz a2, 3f + nop + + lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ + lw t1, 0(a0) /* t1 = destination (a8r8g8b8) */ + addiu a1, a1, 4 + + not t2, t0 + srl t2, t2, 24 + + beqz t2, 21f + nop + beqz t0, 3f + + MIPS_UN8x4_MUL_UN8 t1, t2, t3, t4, t5, t6, t7 + + addu_s.qb t0, t3, t0 +21: + sw t0, 0(a0) + +3: + RESTORE_REGS_FROM_STACK 0, s0, s1, s2 + j ra + nop + +END(pixman_composite_over_8888_8888_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_over_8888_0565_asm_mips) +/* + * a0 - dst (r5g6b5) + * a1 - src (a8r8g8b8) + * a2 - w + */ + + SAVE_REGS_ON_STACK 8, s0, s1, s2, s3, s4, s5 + li t4, 0x00ff00ff + li s3, 0xf800f800 + li s4, 0x07e007e0 + li s5, 0x001F001F + beqz a2, 3f + nop + addiu t1, a2, -1 + beqz t1, 2f + nop +1: + lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ + lw t1, 4(a1) /* t1 = source (a8r8g8b8) */ + lhu t2, 0(a0) /* t2 = destination (r5g6b5) */ + lhu t3, 2(a0) /* t3 = destination (r5g6b5) */ + addiu a1, a1, 8 + + not t5, t0 + srl t5, t5, 24 + not t6, t1 + srl t6, t6, 24 + + or t7, t5, t6 + beqz t7, 11f + or t8, t0, t1 + beqz t8, 12f + + CONVERT_2x0565_TO_2x8888 t2, t3, s0, s1, s4, s5, t7, t8, t9, s2 + MIPS_2xUN8x4_MUL_2xUN8 s0, s1, t5, t6, t7, t8, t4, t9, t2, t3, s2, s0, s1 + + addu_s.qb t0, t7, t0 + addu_s.qb t1, t8, t1 +11: + CONVERT_2x8888_TO_2x0565 t0, t1, t7, t8, s3, s4, s5, t2, t3 + sh t7, 0(a0) + sh t8, 2(a0) +12: + addiu a2, a2, -2 + addiu t1, a2, -1 + bgtz t1, 1b + addiu a0, a0, 4 +2: + beqz a2, 3f + nop + + lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ + lhu t1, 0(a0) /* t1 = destination (r5g6b5) */ + addiu a1, a1, 4 + + not t2, t0 + srl t2, t2, 24 + + beqz t2, 21f + nop + beqz t0, 3f + + CONVERT_1x0565_TO_1x8888 t1, s0, t8, t9 + MIPS_UN8x4_MUL_UN8 s0, t2, t3, t4, t5, t6, t7 + + addu_s.qb t0, t3, t0 +21: + CONVERT_1x8888_TO_1x0565 t0, s0, t8, t9 + sh s0, 0(a0) + +3: + RESTORE_REGS_FROM_STACK 8, s0, s1, s2, s3, s4, s5 + j ra + nop + +END(pixman_composite_over_8888_0565_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_over_n_0565_asm_mips) +/* + * a0 - dst (r5g6b5) + * a1 - src (32bit constant) + * a2 - w + */ + + beqz a2, 5f + nop + + not t0, a1 + srl t0, t0, 24 + bgtz t0, 1f + nop + CONVERT_1x8888_TO_1x0565 a1, t1, t2, t3 +0: + sh t1, 0(a0) + addiu a2, a2, -1 + bgtz a2, 0b + addiu a0, a0, 2 + j ra + nop + +1: + SAVE_REGS_ON_STACK 0, s0, s1, s2 + li t4, 0x00ff00ff + li t5, 0xf800f800 + li t6, 0x07e007e0 + li t7, 0x001F001F + addiu t1, a2, -1 + beqz t1, 3f + nop +2: + lhu t1, 0(a0) /* t1 = destination (r5g6b5) */ + lhu t2, 2(a0) /* t2 = destination (r5g6b5) */ + + CONVERT_2x0565_TO_2x8888 t1, t2, t3, t8, t6, t7, t9, s0, s1, s2 + MIPS_2xUN8x4_MUL_2xUN8 t3, t8, t0, t0, t1, t2, t4, t9, s0, s1, s2, t3, t8 + addu_s.qb t1, t1, a1 + addu_s.qb t2, t2, a1 + CONVERT_2x8888_TO_2x0565 t1, t2, t3, t8, t5, t6, t7, s0, s1 + + sh t3, 0(a0) + sh t8, 2(a0) + + addiu a2, a2, -2 + addiu t1, a2, -1 + bgtz t1, 2b + addiu a0, a0, 4 +3: + beqz a2, 4f + nop + + lhu t1, 0(a0) /* t1 = destination (r5g6b5) */ + + CONVERT_1x0565_TO_1x8888 t1, t2, s0, s1 + MIPS_UN8x4_MUL_UN8 t2, t0, t1, t4, s0, s1, s2 + addu_s.qb t1, t1, a1 + CONVERT_1x8888_TO_1x0565 t1, t2, s0, s1 + + sh t2, 0(a0) + +4: + RESTORE_REGS_FROM_STACK 0, s0, s1, s2 +5: + j ra + nop + +END(pixman_composite_over_n_0565_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_asm_mips) +/* + * a0 - dst (a8r8g8b8) + * a1 - src (32bit constant) + * a2 - w + */ + + beqz a2, 5f + nop + + not t0, a1 + srl t0, t0, 24 + bgtz t0, 1f + nop +0: + sw a1, 0(a0) + addiu a2, a2, -1 + bgtz a2, 0b + addiu a0, a0, 4 + j ra + nop + +1: + SAVE_REGS_ON_STACK 0, s0, s1, s2 + li t4, 0x00ff00ff + addiu t1, a2, -1 + beqz t1, 3f + nop +2: + lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */ + lw t3, 4(a0) /* t3 = destination (a8r8g8b8) */ + + MIPS_2xUN8x4_MUL_2xUN8 t2, t3, t0, t0, t7, t8, t4, t9, s0, s1, s2, t2, t3 + + addu_s.qb t7, t7, a1 + addu_s.qb t8, t8, a1 + + sw t7, 0(a0) + sw t8, 4(a0) + + addiu a2, a2, -2 + addiu t1, a2, -1 + bgtz t1, 2b + addiu a0, a0, 8 +3: + beqz a2, 4f + nop + + lw t1, 0(a0) /* t1 = destination (a8r8g8b8) */ + + MIPS_UN8x4_MUL_UN8 t1, t0, t3, t4, t5, t6, t7 + + addu_s.qb t3, t3, a1 + + sw t3, 0(a0) + +4: + RESTORE_REGS_FROM_STACK 0, s0, s1, s2 +5: + j ra + nop + +END(pixman_composite_over_n_8888_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_add_8_8_8_asm_mips) +/* + * a0 - dst (a8) + * a1 - src (a8) + * a2 - mask (a8) + * a3 - w + */ + + SAVE_REGS_ON_STACK 0, v0, v1 + li t9, 0x00ff00ff + beqz a3, 3f + nop + + srl v0, a3, 2 /* v0 = how many multiples of 4 dst pixels */ + beqz v0, 1f /* branch if less than 4 src pixels */ + nop + +0: + beqz v0, 1f + addiu v0, v0, -1 + lbu t0, 0(a2) + lbu t1, 1(a2) + lbu t2, 2(a2) + lbu t3, 3(a2) + lbu t4, 0(a0) + lbu t5, 1(a0) + lbu t6, 2(a0) + lbu t7, 3(a0) + + addiu a2, a2, 4 + + precr_sra.ph.w t1, t0, 0 + precr_sra.ph.w t3, t2, 0 + precr_sra.ph.w t5, t4, 0 + precr_sra.ph.w t7, t6, 0 + + precr.qb.ph t0, t3, t1 + precr.qb.ph t1, t7, t5 + + lbu t4, 0(a1) + lbu v1, 1(a1) + lbu t7, 2(a1) + lbu t8, 3(a1) + + addiu a1, a1, 4 + + precr_sra.ph.w v1, t4, 0 + precr_sra.ph.w t8, t7, 0 + + muleu_s.ph.qbl t2, t0, t8 + muleu_s.ph.qbr t3, t0, v1 + shra_r.ph t4, t2, 8 + shra_r.ph t5, t3, 8 + and t4, t4, t9 + and t5, t5, t9 + addq.ph t2, t2, t4 + addq.ph t3, t3, t5 + shra_r.ph t2, t2, 8 + shra_r.ph t3, t3, 8 + precr.qb.ph t0, t2, t3 + + addu_s.qb t2, t0, t1 + + sb t2, 0(a0) + srl t2, t2, 8 + sb t2, 1(a0) + srl t2, t2, 8 + sb t2, 2(a0) + srl t2, t2, 8 + sb t2, 3(a0) + addiu a3, a3, -4 + b 0b + addiu a0, a0, 4 + +1: + beqz a3, 3f + nop +2: + lbu t8, 0(a1) + lbu t0, 0(a2) + lbu t1, 0(a0) + addiu a1, a1, 1 + addiu a2, a2, 1 + + mul t2, t0, t8 + shra_r.ph t3, t2, 8 + andi t3, t3, 0xff + addq.ph t2, t2, t3 + shra_r.ph t2, t2, 8 + andi t2, t2, 0xff + + addu_s.qb t2, t2, t1 + sb t2, 0(a0) + addiu a3, a3, -1 + bnez a3, 2b + addiu a0, a0, 1 + +3: + RESTORE_REGS_FROM_STACK 0, v0, v1 + j ra + nop + +END(pixman_composite_add_8_8_8_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_add_n_8_8_asm_mips) +/* + * a0 - dst (a8) + * a1 - src (32bit constant) + * a2 - mask (a8) + * a3 - w + */ + + SAVE_REGS_ON_STACK 0, v0 + li t9, 0x00ff00ff + beqz a3, 3f + nop + + srl v0, a3, 2 /* v0 = how many multiples of 4 dst pixels */ + beqz v0, 1f /* branch if less than 4 src pixels */ + nop + + srl t8, a1, 24 + replv.ph t8, t8 + +0: + beqz v0, 1f + addiu v0, v0, -1 + lbu t0, 0(a2) + lbu t1, 1(a2) + lbu t2, 2(a2) + lbu t3, 3(a2) + lbu t4, 0(a0) + lbu t5, 1(a0) + lbu t6, 2(a0) + lbu t7, 3(a0) + + addiu a2, a2, 4 + + precr_sra.ph.w t1, t0, 0 + precr_sra.ph.w t3, t2, 0 + precr_sra.ph.w t5, t4, 0 + precr_sra.ph.w t7, t6, 0 + + precr.qb.ph t0, t3, t1 + precr.qb.ph t1, t7, t5 + + muleu_s.ph.qbl t2, t0, t8 + muleu_s.ph.qbr t3, t0, t8 + shra_r.ph t4, t2, 8 + shra_r.ph t5, t3, 8 + and t4, t4, t9 + and t5, t5, t9 + addq.ph t2, t2, t4 + addq.ph t3, t3, t5 + shra_r.ph t2, t2, 8 + shra_r.ph t3, t3, 8 + precr.qb.ph t0, t2, t3 + + addu_s.qb t2, t0, t1 + + sb t2, 0(a0) + srl t2, t2, 8 + sb t2, 1(a0) + srl t2, t2, 8 + sb t2, 2(a0) + srl t2, t2, 8 + sb t2, 3(a0) + addiu a3, a3, -4 + b 0b + addiu a0, a0, 4 + +1: + beqz a3, 3f + nop + srl t8, a1, 24 +2: + lbu t0, 0(a2) + lbu t1, 0(a0) + addiu a2, a2, 1 + + mul t2, t0, t8 + shra_r.ph t3, t2, 8 + andi t3, t3, 0xff + addq.ph t2, t2, t3 + shra_r.ph t2, t2, 8 + andi t2, t2, 0xff + + addu_s.qb t2, t2, t1 + sb t2, 0(a0) + addiu a3, a3, -1 + bnez a3, 2b + addiu a0, a0, 1 + +3: + RESTORE_REGS_FROM_STACK 0, v0 + j ra + nop + +END(pixman_composite_add_n_8_8_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_add_n_8_8888_asm_mips) +/* + * a0 - dst (a8r8g8b8) + * a1 - src (32bit constant) + * a2 - mask (a8) + * a3 - w + */ + + SAVE_REGS_ON_STACK 0, s0, s1, s2 + li t4, 0x00ff00ff + beqz a3, 3f + nop + addiu t1, a3, -1 + beqz t1, 2f + nop +1: + /* a1 = source (32bit constant) */ + lbu t0, 0(a2) /* t0 = mask (a8) */ + lbu t1, 1(a2) /* t1 = mask (a8) */ + lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */ + lw t3, 4(a0) /* t3 = destination (a8r8g8b8) */ + addiu a2, a2, 2 + + MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 a1, a1, \ + t0, t1, \ + t2, t3, \ + t5, t6, \ + t4, t7, t8, t9, s0, s1, s2 + + sw t5, 0(a0) + sw t6, 4(a0) + addiu a3, a3, -2 + addiu t1, a3, -1 + bgtz t1, 1b + addiu a0, a0, 8 +2: + beqz a3, 3f + nop + /* a1 = source (32bit constant) */ + lbu t0, 0(a2) /* t0 = mask (a8) */ + lw t1, 0(a0) /* t1 = destination (a8r8g8b8) */ + + MIPS_UN8x4_MUL_UN8_ADD_UN8x4 a1, t0, t1, t2, t4, t3, t5, t6 + + sw t2, 0(a0) +3: + RESTORE_REGS_FROM_STACK 0, s0, s1, s2 + j ra + nop + +END(pixman_composite_add_n_8_8888_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_add_0565_8_0565_asm_mips) +/* + * a0 - dst (r5g6b5) + * a1 - src (r5g6b5) + * a2 - mask (a8) + * a3 - w + */ + + SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7 + li t4, 0xf800f800 + li t5, 0x07e007e0 + li t6, 0x001F001F + li t7, 0x00ff00ff + beqz a3, 3f + nop + addiu t1, a3, -1 + beqz t1, 2f + nop +1: + lhu t0, 0(a1) /* t0 = source (r5g6b5) */ + lhu t1, 2(a1) /* t1 = source (r5g6b5) */ + lbu t2, 0(a2) /* t2 = mask (a8) */ + lbu t3, 1(a2) /* t3 = mask (a8) */ + lhu t8, 0(a0) /* t8 = destination (r5g6b5) */ + lhu t9, 2(a0) /* t9 = destination (r5g6b5) */ + addiu a1, a1, 4 + addiu a2, a2, 2 + + CONVERT_2x0565_TO_2x8888 t0, t1, s0, s1, t5, t6, s2, s3, s4, s5 + CONVERT_2x0565_TO_2x8888 t8, t9, s2, s3, t5, t6, s4, s5, s6, s7 + MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 s0, s1, \ + t2, t3, \ + s2, s3, \ + t0, t1, \ + t7, s4, s5, s6, s7, t8, t9 + CONVERT_2x8888_TO_2x0565 t0, t1, s0, s1, t4, t5, t6, s2, s3 + + sh s0, 0(a0) + sh s1, 2(a0) + addiu a3, a3, -2 + addiu t1, a3, -1 + bgtz t1, 1b + addiu a0, a0, 4 +2: + beqz a3, 3f + nop + lhu t0, 0(a1) /* t0 = source (r5g6b5) */ + lbu t1, 0(a2) /* t1 = mask (a8) */ + lhu t2, 0(a0) /* t2 = destination (r5g6b5) */ + + CONVERT_1x0565_TO_1x8888 t0, t3, t4, t5 + CONVERT_1x0565_TO_1x8888 t2, t4, t5, t6 + MIPS_UN8x4_MUL_UN8_ADD_UN8x4 t3, t1, t4, t0, t7, t2, t5, t6 + CONVERT_1x8888_TO_1x0565 t0, t3, t4, t5 + + sh t3, 0(a0) +3: + RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7 + j ra + nop + +END(pixman_composite_add_0565_8_0565_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_add_8888_8_8888_asm_mips) +/* + * a0 - dst (a8r8g8b8) + * a1 - src (a8r8g8b8) + * a2 - mask (a8) + * a3 - w + */ + + SAVE_REGS_ON_STACK 0, s0, s1, s2 + li t4, 0x00ff00ff + beqz a3, 3f + nop + addiu t1, a3, -1 + beqz t1, 2f + nop +1: + lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ + lw t1, 4(a1) /* t1 = source (a8r8g8b8) */ + lbu t2, 0(a2) /* t2 = mask (a8) */ + lbu t3, 1(a2) /* t3 = mask (a8) */ + lw t5, 0(a0) /* t5 = destination (a8r8g8b8) */ + lw t6, 4(a0) /* t6 = destination (a8r8g8b8) */ + addiu a1, a1, 8 + addiu a2, a2, 2 + + MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 t0, t1, \ + t2, t3, \ + t5, t6, \ + t7, t8, \ + t4, t9, s0, s1, s2, t0, t1 + + sw t7, 0(a0) + sw t8, 4(a0) + addiu a3, a3, -2 + addiu t1, a3, -1 + bgtz t1, 1b + addiu a0, a0, 8 +2: + beqz a3, 3f + nop + lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ + lbu t1, 0(a2) /* t1 = mask (a8) */ + lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */ + + MIPS_UN8x4_MUL_UN8_ADD_UN8x4 t0, t1, t2, t3, t4, t5, t6, t7 + + sw t3, 0(a0) +3: + RESTORE_REGS_FROM_STACK 0, s0, s1, s2 + j ra + nop + +END(pixman_composite_add_8888_8_8888_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_add_8888_n_8888_asm_mips) +/* + * a0 - dst (a8r8g8b8) + * a1 - src (a8r8g8b8) + * a2 - mask (32bit constant) + * a3 - w + */ + + SAVE_REGS_ON_STACK 0, s0, s1, s2 + li t4, 0x00ff00ff + beqz a3, 3f + nop + srl a2, a2, 24 + addiu t1, a3, -1 + beqz t1, 2f + nop +1: + lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ + lw t1, 4(a1) /* t1 = source (a8r8g8b8) */ + /* a2 = mask (32bit constant) */ + lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */ + lw t3, 4(a0) /* t3 = destination (a8r8g8b8) */ + addiu a1, a1, 8 + + MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 t0, t1, \ + a2, a2, \ + t2, t3, \ + t5, t6, \ + t4, t7, t8, t9, s0, s1, s2 + + sw t5, 0(a0) + sw t6, 4(a0) + addiu a3, a3, -2 + addiu t1, a3, -1 + bgtz t1, 1b + addiu a0, a0, 8 +2: + beqz a3, 3f + nop + lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ + /* a2 = mask (32bit constant) */ + lw t1, 0(a0) /* t1 = destination (a8r8g8b8) */ + + MIPS_UN8x4_MUL_UN8_ADD_UN8x4 t0, a2, t1, t3, t4, t5, t6, t7 + + sw t3, 0(a0) +3: + RESTORE_REGS_FROM_STACK 0, s0, s1, s2 + j ra + nop + +END(pixman_composite_add_8888_n_8888_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_add_8888_8888_8888_asm_mips) +/* + * a0 - dst (a8r8g8b8) + * a1 - src (a8r8g8b8) + * a2 - mask (a8r8g8b8) + * a3 - w + */ + + SAVE_REGS_ON_STACK 0, s0, s1, s2 + li t4, 0x00ff00ff + beqz a3, 3f + nop + addiu t1, a3, -1 + beqz t1, 2f + nop +1: + lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ + lw t1, 4(a1) /* t1 = source (a8r8g8b8) */ + lw t2, 0(a2) /* t2 = mask (a8r8g8b8) */ + lw t3, 4(a2) /* t3 = mask (a8r8g8b8) */ + lw t5, 0(a0) /* t5 = destination (a8r8g8b8) */ + lw t6, 4(a0) /* t6 = destination (a8r8g8b8) */ + addiu a1, a1, 8 + addiu a2, a2, 8 + srl t2, t2, 24 + srl t3, t3, 24 + + MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 t0, t1, \ + t2, t3, \ + t5, t6, \ + t7, t8, \ + t4, t9, s0, s1, s2, t0, t1 + + sw t7, 0(a0) + sw t8, 4(a0) + addiu a3, a3, -2 + addiu t1, a3, -1 + bgtz t1, 1b + addiu a0, a0, 8 +2: + beqz a3, 3f + nop + lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ + lw t1, 0(a2) /* t1 = mask (a8r8g8b8) */ + lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */ + srl t1, t1, 24 + + MIPS_UN8x4_MUL_UN8_ADD_UN8x4 t0, t1, t2, t3, t4, t5, t6, t7 + + sw t3, 0(a0) +3: + RESTORE_REGS_FROM_STACK 0, s0, s1, s2 + j ra + nop + +END(pixman_composite_add_8888_8888_8888_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_add_8_8_asm_mips) +/* + * a0 - dst (a8) + * a1 - src (a8) + * a2 - w + */ + + beqz a2, 3f + nop + srl t9, a2, 2 /* t9 = how many multiples of 4 dst pixels */ + beqz t9, 1f /* branch if less than 4 src pixels */ + nop + +0: + beqz t9, 1f + addiu t9, t9, -1 + lbu t0, 0(a1) + lbu t1, 1(a1) + lbu t2, 2(a1) + lbu t3, 3(a1) + lbu t4, 0(a0) + lbu t5, 1(a0) + lbu t6, 2(a0) + lbu t7, 3(a0) + + addiu a1, a1, 4 + + precr_sra.ph.w t1, t0, 0 + precr_sra.ph.w t3, t2, 0 + precr_sra.ph.w t5, t4, 0 + precr_sra.ph.w t7, t6, 0 + + precr.qb.ph t0, t3, t1 + precr.qb.ph t1, t7, t5 + + addu_s.qb t2, t0, t1 + + sb t2, 0(a0) + srl t2, t2, 8 + sb t2, 1(a0) + srl t2, t2, 8 + sb t2, 2(a0) + srl t2, t2, 8 + sb t2, 3(a0) + addiu a2, a2, -4 + b 0b + addiu a0, a0, 4 + +1: + beqz a2, 3f + nop +2: + lbu t0, 0(a1) + lbu t1, 0(a0) + addiu a1, a1, 1 + + addu_s.qb t2, t0, t1 + sb t2, 0(a0) + addiu a2, a2, -1 + bnez a2, 2b + addiu a0, a0, 1 + +3: + j ra + nop + +END(pixman_composite_add_8_8_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_add_8888_8888_asm_mips) +/* + * a0 - dst (a8r8g8b8) + * a1 - src (a8r8g8b8) + * a2 - w + */ + + beqz a2, 4f + nop + + srl t9, a2, 2 /* t1 = how many multiples of 4 src pixels */ + beqz t9, 3f /* branch if less than 4 src pixels */ + nop +1: + addiu t9, t9, -1 + beqz t9, 2f + addiu a2, a2, -4 + + lw t0, 0(a1) + lw t1, 4(a1) + lw t2, 8(a1) + lw t3, 12(a1) + lw t4, 0(a0) + lw t5, 4(a0) + lw t6, 8(a0) + lw t7, 12(a0) + addiu a1, a1, 16 + + addu_s.qb t4, t4, t0 + addu_s.qb t5, t5, t1 + addu_s.qb t6, t6, t2 + addu_s.qb t7, t7, t3 + + sw t4, 0(a0) + sw t5, 4(a0) + sw t6, 8(a0) + sw t7, 12(a0) + b 1b + addiu a0, a0, 16 +2: + lw t0, 0(a1) + lw t1, 4(a1) + lw t2, 8(a1) + lw t3, 12(a1) + lw t4, 0(a0) + lw t5, 4(a0) + lw t6, 8(a0) + lw t7, 12(a0) + addiu a1, a1, 16 + + addu_s.qb t4, t4, t0 + addu_s.qb t5, t5, t1 + addu_s.qb t6, t6, t2 + addu_s.qb t7, t7, t3 + + sw t4, 0(a0) + sw t5, 4(a0) + sw t6, 8(a0) + sw t7, 12(a0) + + beqz a2, 4f + addiu a0, a0, 16 +3: + lw t0, 0(a1) + lw t1, 0(a0) + addiu a1, a1, 4 + addiu a2, a2, -1 + addu_s.qb t1, t1, t0 + sw t1, 0(a0) + bnez a2, 3b + addiu a0, a0, 4 +4: + jr ra + nop + +END(pixman_composite_add_8888_8888_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_out_reverse_8_0565_asm_mips) +/* + * a0 - dst (r5g6b5) + * a1 - src (a8) + * a2 - w + */ + + beqz a2, 4f + nop + + SAVE_REGS_ON_STACK 0, s0, s1, s2, s3 + li t2, 0xf800f800 + li t3, 0x07e007e0 + li t4, 0x001F001F + li t5, 0x00ff00ff + + addiu t1, a2, -1 + beqz t1, 2f + nop +1: + lbu t0, 0(a1) /* t0 = source (a8) */ + lbu t1, 1(a1) /* t1 = source (a8) */ + lhu t6, 0(a0) /* t6 = destination (r5g6b5) */ + lhu t7, 2(a0) /* t7 = destination (r5g6b5) */ + addiu a1, a1, 2 + + not t0, t0 + not t1, t1 + andi t0, 0xff /* t0 = neg source1 */ + andi t1, 0xff /* t1 = neg source2 */ + CONVERT_2x0565_TO_2x8888 t6, t7, t8, t9, t3, t4, s0, s1, s2, s3 + MIPS_2xUN8x4_MUL_2xUN8 t8, t9, t0, t1, t6, t7, t5, s0, s1, s2, s3, t8, t9 + CONVERT_2x8888_TO_2x0565 t6, t7, t8, t9, t2, t3, t4, s0, s1 + + sh t8, 0(a0) + sh t9, 2(a0) + addiu a2, a2, -2 + addiu t1, a2, -1 + bgtz t1, 1b + addiu a0, a0, 4 +2: + beqz a2, 3f + nop + lbu t0, 0(a1) /* t0 = source (a8) */ + lhu t1, 0(a0) /* t1 = destination (r5g6b5) */ + + not t0, t0 + andi t0, 0xff /* t0 = neg source */ + CONVERT_1x0565_TO_1x8888 t1, t2, t3, t4 + MIPS_UN8x4_MUL_UN8 t2, t0, t1, t5, t3, t4, t6 + CONVERT_1x8888_TO_1x0565 t1, t2, t3, t4 + + sh t2, 0(a0) +3: + RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3 +4: + j ra + nop + +END(pixman_composite_out_reverse_8_0565_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_out_reverse_8_8888_asm_mips) +/* + * a0 - dst (a8r8g8b8) + * a1 - src (a8) + * a2 - w + */ + + beqz a2, 3f + nop + li t4, 0x00ff00ff + addiu t1, a2, -1 + beqz t1, 2f + nop +1: + lbu t0, 0(a1) /* t0 = source (a8) */ + lbu t1, 1(a1) /* t1 = source (a8) */ + lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */ + lw t3, 4(a0) /* t3 = destination (a8r8g8b8) */ + addiu a1, a1, 2 + not t0, t0 + not t1, t1 + andi t0, 0xff /* t0 = neg source */ + andi t1, 0xff /* t1 = neg source */ + + MIPS_2xUN8x4_MUL_2xUN8 t2, t3, t0, t1, t5, t6, t4, t7, t8, t9, t2, t3, t0 + + sw t5, 0(a0) + sw t6, 4(a0) + addiu a2, a2, -2 + addiu t1, a2, -1 + bgtz t1, 1b + addiu a0, a0, 8 +2: + beqz a2, 3f + nop + lbu t0, 0(a1) /* t0 = source (a8) */ + lw t1, 0(a0) /* t1 = destination (a8r8g8b8) */ + not t0, t0 + andi t0, 0xff /* t0 = neg source */ + + MIPS_UN8x4_MUL_UN8 t1, t0, t2, t4, t3, t5, t6 + + sw t2, 0(a0) +3: + j ra + nop + +END(pixman_composite_out_reverse_8_8888_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_over_reverse_n_8888_asm_mips) +/* + * a0 - dst (a8r8g8b8) + * a1 - src (32bit constant) + * a2 - w + */ + + beqz a2, 5f + nop + + SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7 + li t0, 0x00ff00ff + srl t9, a2, 2 /* t9 = how many multiples of 4 src pixels */ + beqz t9, 2f /* branch if less than 4 src pixels */ + nop +1: + beqz t9, 2f + addiu t9, t9, -1 + + lw t1, 0(a0) + lw t2, 4(a0) + lw t3, 8(a0) + lw t4, 12(a0) + + addiu a2, a2, -4 + + not t5, t1 + not t6, t2 + not t7, t3 + not t8, t4 + srl t5, t5, 24 + srl t6, t6, 24 + srl t7, t7, 24 + srl t8, t8, 24 + replv.ph t5, t5 + replv.ph t6, t6 + replv.ph t7, t7 + replv.ph t8, t8 + muleu_s.ph.qbl s0, a1, t5 + muleu_s.ph.qbr s1, a1, t5 + muleu_s.ph.qbl s2, a1, t6 + muleu_s.ph.qbr s3, a1, t6 + muleu_s.ph.qbl s4, a1, t7 + muleu_s.ph.qbr s5, a1, t7 + muleu_s.ph.qbl s6, a1, t8 + muleu_s.ph.qbr s7, a1, t8 + + shra_r.ph t5, s0, 8 + shra_r.ph t6, s1, 8 + shra_r.ph t7, s2, 8 + shra_r.ph t8, s3, 8 + and t5, t5, t0 + and t6, t6, t0 + and t7, t7, t0 + and t8, t8, t0 + addq.ph s0, s0, t5 + addq.ph s1, s1, t6 + addq.ph s2, s2, t7 + addq.ph s3, s3, t8 + shra_r.ph s0, s0, 8 + shra_r.ph s1, s1, 8 + shra_r.ph s2, s2, 8 + shra_r.ph s3, s3, 8 + shra_r.ph t5, s4, 8 + shra_r.ph t6, s5, 8 + shra_r.ph t7, s6, 8 + shra_r.ph t8, s7, 8 + and t5, t5, t0 + and t6, t6, t0 + and t7, t7, t0 + and t8, t8, t0 + addq.ph s4, s4, t5 + addq.ph s5, s5, t6 + addq.ph s6, s6, t7 + addq.ph s7, s7, t8 + shra_r.ph s4, s4, 8 + shra_r.ph s5, s5, 8 + shra_r.ph s6, s6, 8 + shra_r.ph s7, s7, 8 + + precr.qb.ph t5, s0, s1 + precr.qb.ph t6, s2, s3 + precr.qb.ph t7, s4, s5 + precr.qb.ph t8, s6, s7 + addu_s.qb t5, t1, t5 + addu_s.qb t6, t2, t6 + addu_s.qb t7, t3, t7 + addu_s.qb t8, t4, t8 + + sw t5, 0(a0) + sw t6, 4(a0) + sw t7, 8(a0) + sw t8, 12(a0) + b 1b + addiu a0, a0, 16 + +2: + beqz a2, 4f + nop +3: + lw t1, 0(a0) + + not t2, t1 + srl t2, t2, 24 + replv.ph t2, t2 + + muleu_s.ph.qbl t4, a1, t2 + muleu_s.ph.qbr t5, a1, t2 + shra_r.ph t6, t4, 8 + shra_r.ph t7, t5, 8 + + and t6,t6,t0 + and t7,t7,t0 + + addq.ph t8, t4, t6 + addq.ph t9, t5, t7 + + shra_r.ph t8, t8, 8 + shra_r.ph t9, t9, 8 + + precr.qb.ph t9, t8, t9 + + addu_s.qb t9, t1, t9 + sw t9, 0(a0) + + addiu a2, a2, -1 + bnez a2, 3b + addiu a0, a0, 4 +4: + RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7 +5: + j ra + nop + +END(pixman_composite_over_reverse_n_8888_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_in_n_8_asm_mips) +/* + * a0 - dst (a8) + * a1 - src (32bit constant) + * a2 - w + */ + + li t9, 0x00ff00ff + beqz a2, 3f + nop + srl t7, a2, 2 /* t7 = how many multiples of 4 dst pixels */ + beqz t7, 1f /* branch if less than 4 src pixels */ + nop + + srl t8, a1, 24 + replv.ph t8, t8 + +0: + beqz t7, 1f + addiu t7, t7, -1 + lbu t0, 0(a0) + lbu t1, 1(a0) + lbu t2, 2(a0) + lbu t3, 3(a0) + + precr_sra.ph.w t1, t0, 0 + precr_sra.ph.w t3, t2, 0 + precr.qb.ph t0, t3, t1 + + muleu_s.ph.qbl t2, t0, t8 + muleu_s.ph.qbr t3, t0, t8 + shra_r.ph t4, t2, 8 + shra_r.ph t5, t3, 8 + and t4, t4, t9 + and t5, t5, t9 + addq.ph t2, t2, t4 + addq.ph t3, t3, t5 + shra_r.ph t2, t2, 8 + shra_r.ph t3, t3, 8 + precr.qb.ph t2, t2, t3 + + sb t2, 0(a0) + srl t2, t2, 8 + sb t2, 1(a0) + srl t2, t2, 8 + sb t2, 2(a0) + srl t2, t2, 8 + sb t2, 3(a0) + addiu a2, a2, -4 + b 0b + addiu a0, a0, 4 + +1: + beqz a2, 3f + nop + srl t8, a1, 24 +2: + lbu t0, 0(a0) + + mul t2, t0, t8 + shra_r.ph t3, t2, 8 + andi t3, t3, 0x00ff + addq.ph t2, t2, t3 + shra_r.ph t2, t2, 8 + + sb t2, 0(a0) + addiu a2, a2, -1 + bnez a2, 2b + addiu a0, a0, 1 + +3: + j ra + nop + +END(pixman_composite_in_n_8_asm_mips) + +LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_8888_OVER_asm_mips) +/* + * a0 - dst (a8r8g8b8) + * a1 - src (a8r8g8b8) + * a2 - w + * a3 - vx + * 16(sp) - unit_x + */ + + SAVE_REGS_ON_STACK 0, s0, s1, s2, s3 + lw t8, 16(sp) /* t8 = unit_x */ + li t6, 0x00ff00ff + beqz a2, 3f + nop + addiu t1, a2, -1 + beqz t1, 2f + nop +1: + sra t0, a3, 16 /* t0 = vx >> 16 */ + sll t0, t0, 2 /* t0 = t0 * 4 (a8r8g8b8) */ + addu t0, a1, t0 + lw t0, 0(t0) /* t0 = source (a8r8g8b8) */ + addu a3, a3, t8 /* a3 = vx + unit_x */ + + sra t1, a3, 16 /* t0 = vx >> 16 */ + sll t1, t1, 2 /* t0 = t0 * 4 (a8r8g8b8) */ + addu t1, a1, t1 + lw t1, 0(t1) /* t1 = source (a8r8g8b8) */ + addu a3, a3, t8 /* a3 = vx + unit_x */ + + lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */ + lw t3, 4(a0) /* t3 = destination (a8r8g8b8) */ + + OVER_2x8888_2x8888 t0, t1, t2, t3, t4, t5, t6, t7, t9, s0, s1, s2, s3 + + sw t4, 0(a0) + sw t5, 4(a0) + addiu a2, a2, -2 + addiu t1, a2, -1 + bgtz t1, 1b + addiu a0, a0, 8 +2: + beqz a2, 3f + nop + sra t0, a3, 16 /* t0 = vx >> 16 */ + sll t0, t0, 2 /* t0 = t0 * 4 (a8r8g8b8) */ + addu t0, a1, t0 + lw t0, 0(t0) /* t0 = source (a8r8g8b8) */ + lw t1, 0(a0) /* t1 = destination (a8r8g8b8) */ + addu a3, a3, t8 /* a3 = vx + unit_x */ + + OVER_8888_8888 t0, t1, t2, t6, t4, t5, t3, t7 + + sw t2, 0(a0) +3: + RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3 + j ra + nop + +END(pixman_scaled_nearest_scanline_8888_8888_OVER_asm_mips) + +LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_0565_OVER_asm_mips) +/* + * a0 - dst (r5g6b5) + * a1 - src (a8r8g8b8) + * a2 - w + * a3 - vx + * 16(sp) - unit_x + */ + + SAVE_REGS_ON_STACK 24, s0, s1, s2, s3, s4, v0, v1 + lw t8, 40(sp) /* t8 = unit_x */ + li t4, 0x00ff00ff + li t5, 0xf800f800 + li t6, 0x07e007e0 + li t7, 0x001F001F + beqz a2, 3f + nop + addiu t1, a2, -1 + beqz t1, 2f + nop +1: + sra t0, a3, 16 /* t0 = vx >> 16 */ + sll t0, t0, 2 /* t0 = t0 * 4 (a8r8g8b8) */ + addu t0, a1, t0 + lw t0, 0(t0) /* t0 = source (a8r8g8b8) */ + addu a3, a3, t8 /* a3 = vx + unit_x */ + sra t1, a3, 16 /* t0 = vx >> 16 */ + sll t1, t1, 2 /* t0 = t0 * 4 (a8r8g8b8) */ + addu t1, a1, t1 + lw t1, 0(t1) /* t1 = source (a8r8g8b8) */ + addu a3, a3, t8 /* a3 = vx + unit_x */ + lhu t2, 0(a0) /* t2 = destination (r5g6b5) */ + lhu t3, 2(a0) /* t3 = destination (r5g6b5) */ + + CONVERT_2x0565_TO_2x8888 t2, t3, v0, v1, t6, t7, s0, s1, s2, s3 + OVER_2x8888_2x8888 t0, t1, v0, v1, t2, t3, t4, t9, s0, s1, s2, s3, s4 + CONVERT_2x8888_TO_2x0565 t2, t3, v0, v1, t5, t6, t7, t9, s2 + + sh v0, 0(a0) + sh v1, 2(a0) + addiu a2, a2, -2 + addiu t1, a2, -1 + bgtz t1, 1b + addiu a0, a0, 4 +2: + beqz a2, 3f + nop + sra t0, a3, 16 /* t0 = vx >> 16 */ + sll t0, t0, 2 /* t0 = t0 * 4 (a8r8g8b8) */ + addu t0, a1, t0 + lw t0, 0(t0) /* t0 = source (a8r8g8b8) */ + lhu t1, 0(a0) /* t1 = destination (r5g6b5) */ + addu a3, a3, t8 /* a3 = vx + unit_x */ + + CONVERT_1x0565_TO_1x8888 t1, t2, t5, t6 + OVER_8888_8888 t0, t2, t1, t4, t3, t5, t6, t7 + CONVERT_1x8888_TO_1x0565 t1, t2, t5, t6 + + sh t2, 0(a0) +3: + RESTORE_REGS_FROM_STACK 24, s0, s1, s2, s3, s4, v0, v1 + j ra + nop + +END(pixman_scaled_nearest_scanline_8888_0565_OVER_asm_mips) + +LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_0565_8888_SRC_asm_mips) +/* + * a0 - dst (a8r8g8b8) + * a1 - src (r5g6b5) + * a2 - w + * a3 - vx + * 16(sp) - unit_x + */ + + SAVE_REGS_ON_STACK 0, v0 + beqz a2, 3f + nop + + lw v0, 16(sp) /* v0 = unit_x */ + addiu t1, a2, -1 + beqz t1, 2f + nop + + li t4, 0x07e007e0 + li t5, 0x001F001F +1: + sra t0, a3, 16 /* t0 = vx >> 16 */ + sll t0, t0, 1 /* t0 = t0 * 2 ((r5g6b5)) */ + addu t0, a1, t0 + lhu t0, 0(t0) /* t0 = source ((r5g6b5)) */ + addu a3, a3, v0 /* a3 = vx + unit_x */ + sra t1, a3, 16 /* t1 = vx >> 16 */ + sll t1, t1, 1 /* t1 = t1 * 2 ((r5g6b5)) */ + addu t1, a1, t1 + lhu t1, 0(t1) /* t1 = source ((r5g6b5)) */ + addu a3, a3, v0 /* a3 = vx + unit_x */ + addiu a2, a2, -2 + + CONVERT_2x0565_TO_2x8888 t0, t1, t2, t3, t4, t5, t6, t7, t8, t9 + + sw t2, 0(a0) + sw t3, 4(a0) + + addiu t2, a2, -1 + bgtz t2, 1b + addiu a0, a0, 8 +2: + beqz a2, 3f + nop + sra t0, a3, 16 /* t0 = vx >> 16 */ + sll t0, t0, 1 /* t0 = t0 * 2 ((r5g6b5)) */ + addu t0, a1, t0 + lhu t0, 0(t0) /* t0 = source ((r5g6b5)) */ + + CONVERT_1x0565_TO_1x8888 t0, t1, t2, t3 + + sw t1, 0(a0) +3: + RESTORE_REGS_FROM_STACK 0, v0 + j ra + nop + +END(pixman_scaled_nearest_scanline_0565_8888_SRC_asm_mips) + +LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_8_0565_OVER_asm_mips) +/* + * a0 - dst (r5g6b5) + * a1 - src (a8r8g8b8) + * a2 - mask (a8) + * a3 - w + * 16(sp) - vx + * 20(sp) - unit_x + */ + beqz a3, 4f + nop + + SAVE_REGS_ON_STACK 20, v0, v1, s0, s1, s2, s3, s4, s5 + lw v0, 36(sp) /* v0 = vx */ + lw v1, 40(sp) /* v1 = unit_x */ + li t6, 0x00ff00ff + li t7, 0xf800f800 + li t8, 0x07e007e0 + li t9, 0x001F001F + + addiu t1, a3, -1 + beqz t1, 2f + nop +1: + sra t0, v0, 16 /* t0 = vx >> 16 */ + sll t0, t0, 2 /* t0 = t0 * 4 (a8r8g8b8) */ + addu t0, a1, t0 + lw t0, 0(t0) /* t0 = source (a8r8g8b8) */ + addu v0, v0, v1 /* v0 = vx + unit_x */ + sra t1, v0, 16 /* t1 = vx >> 16 */ + sll t1, t1, 2 /* t1 = t1 * 4 (a8r8g8b8) */ + addu t1, a1, t1 + lw t1, 0(t1) /* t1 = source (a8r8g8b8) */ + addu v0, v0, v1 /* v0 = vx + unit_x */ + lbu t2, 0(a2) /* t2 = mask (a8) */ + lbu t3, 1(a2) /* t3 = mask (a8) */ + lhu t4, 0(a0) /* t4 = destination (r5g6b5) */ + lhu t5, 2(a0) /* t5 = destination (r5g6b5) */ + addiu a2, a2, 2 + + CONVERT_2x0565_TO_2x8888 t4, t5, s0, s1, t8, t9, s2, s3, s4, s5 + OVER_2x8888_2x8_2x8888 t0, t1, \ + t2, t3, \ + s0, s1, \ + t4, t5, \ + t6, s2, s3, s4, s5, t2, t3 + CONVERT_2x8888_TO_2x0565 t4, t5, s0, s1, t7, t8, t9, s2, s3 + + sh s0, 0(a0) + sh s1, 2(a0) + addiu a3, a3, -2 + addiu t1, a3, -1 + bgtz t1, 1b + addiu a0, a0, 4 +2: + beqz a3, 3f + nop + sra t0, v0, 16 /* t0 = vx >> 16 */ + sll t0, t0, 2 /* t0 = t0 * 4 (a8r8g8b8) */ + addu t0, a1, t0 + lw t0, 0(t0) /* t0 = source (a8r8g8b8) */ + lbu t1, 0(a2) /* t1 = mask (a8) */ + lhu t2, 0(a0) /* t2 = destination (r5g6b5) */ + + CONVERT_1x0565_TO_1x8888 t2, t3, t4, t5 + OVER_8888_8_8888 t0, t1, t3, t2, t6, t4, t5, t7, t8 + CONVERT_1x8888_TO_1x0565 t2, t3, t4, t5 + + sh t3, 0(a0) +3: + RESTORE_REGS_FROM_STACK 20, v0, v1, s0, s1, s2, s3, s4, s5 +4: + j ra + nop + +END(pixman_scaled_nearest_scanline_8888_8_0565_OVER_asm_mips) + +LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_0565_8_0565_OVER_asm_mips) +/* + * a0 - dst (r5g6b5) + * a1 - src (r5g6b5) + * a2 - mask (a8) + * a3 - w + * 16(sp) - vx + * 20(sp) - unit_x + */ + + beqz a3, 4f + nop + SAVE_REGS_ON_STACK 20, v0, v1, s0, s1, s2, s3, s4, s5 + lw v0, 36(sp) /* v0 = vx */ + lw v1, 40(sp) /* v1 = unit_x */ + li t4, 0xf800f800 + li t5, 0x07e007e0 + li t6, 0x001F001F + li t7, 0x00ff00ff + + addiu t1, a3, -1 + beqz t1, 2f + nop +1: + sra t0, v0, 16 /* t0 = vx >> 16 */ + sll t0, t0, 1 /* t0 = t0 * 2 (r5g6b5) */ + addu t0, a1, t0 + lhu t0, 0(t0) /* t0 = source (r5g6b5) */ + addu v0, v0, v1 /* v0 = vx + unit_x */ + sra t1, v0, 16 /* t1 = vx >> 16 */ + sll t1, t1, 1 /* t1 = t1 * 2 (r5g6b5) */ + addu t1, a1, t1 + lhu t1, 0(t1) /* t1 = source (r5g6b5) */ + addu v0, v0, v1 /* v0 = vx + unit_x */ + lbu t2, 0(a2) /* t2 = mask (a8) */ + lbu t3, 1(a2) /* t3 = mask (a8) */ + lhu t8, 0(a0) /* t8 = destination (r5g6b5) */ + lhu t9, 2(a0) /* t9 = destination (r5g6b5) */ + addiu a2, a2, 2 + + CONVERT_2x0565_TO_2x8888 t0, t1, s0, s1, t5, t6, s2, s3, s4, s5 + CONVERT_2x0565_TO_2x8888 t8, t9, s2, s3, t5, t6, s4, s5, t0, t1 + OVER_2x8888_2x8_2x8888 s0, s1, \ + t2, t3, \ + s2, s3, \ + t0, t1, \ + t7, t8, t9, s4, s5, s0, s1 + CONVERT_2x8888_TO_2x0565 t0, t1, s0, s1, t4, t5, t6, s2, s3 + + sh s0, 0(a0) + sh s1, 2(a0) + addiu a3, a3, -2 + addiu t1, a3, -1 + bgtz t1, 1b + addiu a0, a0, 4 +2: + beqz a3, 3f + nop + sra t0, v0, 16 /* t0 = vx >> 16 */ + sll t0, t0, 1 /* t0 = t0 * 2 (r5g6b5) */ + addu t0, a1, t0 + + lhu t0, 0(t0) /* t0 = source (r5g6b5) */ + lbu t1, 0(a2) /* t1 = mask (a8) */ + lhu t2, 0(a0) /* t2 = destination (r5g6b5) */ + + CONVERT_1x0565_TO_1x8888 t0, t3, t4, t5 + CONVERT_1x0565_TO_1x8888 t2, t4, t5, t6 + OVER_8888_8_8888 t3, t1, t4, t0, t7, t2, t5, t6, t8 + CONVERT_1x8888_TO_1x0565 t0, t3, t4, t5 + + sh t3, 0(a0) +3: + RESTORE_REGS_FROM_STACK 20, v0, v1, s0, s1, s2, s3, s4, s5 +4: + j ra + nop + +END(pixman_scaled_nearest_scanline_0565_8_0565_OVER_asm_mips) + +LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_mips) +/* + * a0 - *dst + * a1 - *src_top + * a2 - *src_bottom + * a3 - w + * 16(sp) - wt + * 20(sp) - wb + * 24(sp) - vx + * 28(sp) - unit_x + */ + + beqz a3, 1f + nop + + SAVE_REGS_ON_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7 + + lw s0, 36(sp) /* s0 = wt */ + lw s1, 40(sp) /* s1 = wb */ + lw s2, 44(sp) /* s2 = vx */ + lw s3, 48(sp) /* s3 = unit_x */ + li v0, BILINEAR_INTERPOLATION_RANGE + + sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) + sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) +0: + andi t4, s2, 0xffff /* t4 = (short)vx */ + srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */ + subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */ + + mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */ + mul s5, s0, t4 /* s5 = wt*(vx>>8) */ + mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */ + mul s7, s1, t4 /* s7 = wb*(vx>>8) */ + + sra t9, s2, 16 + sll t9, t9, 2 + addiu t8, t9, 4 + lwx t0, t9(a1) /* t0 = tl */ + lwx t1, t8(a1) /* t1 = tr */ + addiu a3, a3, -1 + lwx t2, t9(a2) /* t2 = bl */ + lwx t3, t8(a2) /* t3 = br */ + + BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7 + + addu s2, s2, s3 /* vx += unit_x; */ + sw t0, 0(a0) + bnez a3, 0b + addiu a0, a0, 4 + + RESTORE_REGS_FROM_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7 +1: + j ra + nop + +END(pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_mips) + +LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_mips) +/* + * a0 - *dst + * a1 - *src_top + * a2 - *src_bottom + * a3 - w + * 16(sp) - wt + * 20(sp) - wb + * 24(sp) - vx + * 28(sp) - unit_x + */ + + beqz a3, 1f + nop + + SAVE_REGS_ON_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7 + + lw s0, 36(sp) /* s0 = wt */ + lw s1, 40(sp) /* s1 = wb */ + lw s2, 44(sp) /* s2 = vx */ + lw s3, 48(sp) /* s3 = unit_x */ + li v0, BILINEAR_INTERPOLATION_RANGE + + sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) + sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) +0: + andi t4, s2, 0xffff /* t4 = (short)vx */ + srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */ + subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */ + + mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */ + mul s5, s0, t4 /* s5 = wt*(vx>>8) */ + mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */ + mul s7, s1, t4 /* s7 = wb*(vx>>8) */ + + sra t9, s2, 16 + sll t9, t9, 2 + addiu t8, t9, 4 + lwx t0, t9(a1) /* t0 = tl */ + lwx t1, t8(a1) /* t1 = tr */ + addiu a3, a3, -1 + lwx t2, t9(a2) /* t2 = bl */ + lwx t3, t8(a2) /* t3 = br */ + + BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7 + CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3 + + addu s2, s2, s3 /* vx += unit_x; */ + sh t1, 0(a0) + bnez a3, 0b + addiu a0, a0, 2 + + RESTORE_REGS_FROM_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7 +1: + j ra + nop + +END(pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_mips) + +LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8888_SRC_asm_mips) +/* + * a0 - *dst + * a1 - *src_top + * a2 - *src_bottom + * a3 - w + * 16(sp) - wt + * 20(sp) - wb + * 24(sp) - vx + * 28(sp) - unit_x + */ + + beqz a3, 1f + nop + + SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8 + + lw s0, 44(sp) /* s0 = wt */ + lw s1, 48(sp) /* s1 = wb */ + lw s2, 52(sp) /* s2 = vx */ + lw s3, 56(sp) /* s3 = unit_x */ + li v0, BILINEAR_INTERPOLATION_RANGE + li v1, 0x07e007e0 + li s8, 0x001f001f + + sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) + sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) +0: + andi t4, s2, 0xffff /* t4 = (short)vx */ + srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */ + subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */ + + mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */ + mul s5, s0, t4 /* s5 = wt*(vx>>8) */ + mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */ + mul s7, s1, t4 /* s7 = wb*(vx>>8) */ + + sra t9, s2, 16 + sll t9, t9, 1 + addiu t8, t9, 2 + lhx t0, t9(a1) /* t0 = tl */ + lhx t1, t8(a1) /* t1 = tr */ + andi t1, t1, 0xffff + addiu a3, a3, -1 + lhx t2, t9(a2) /* t2 = bl */ + lhx t3, t8(a2) /* t3 = br */ + andi t3, t3, 0xffff + + CONVERT_2x0565_TO_2x8888 t0, t1, t0, t1, v1, s8, t4, t5, t6, t7 + CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, v1, s8, t4, t5, t6, t7 + BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7 + + addu s2, s2, s3 /* vx += unit_x; */ + sw t0, 0(a0) + bnez a3, 0b + addiu a0, a0, 4 + + RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8 +1: + j ra + nop + +END(pixman_scaled_bilinear_scanline_0565_8888_SRC_asm_mips) + +LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_mips) +/* + * a0 - *dst + * a1 - *src_top + * a2 - *src_bottom + * a3 - w + * 16(sp) - wt + * 20(sp) - wb + * 24(sp) - vx + * 28(sp) - unit_x + */ + + beqz a3, 1f + nop + + SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8 + + lw s0, 44(sp) /* s0 = wt */ + lw s1, 48(sp) /* s1 = wb */ + lw s2, 52(sp) /* s2 = vx */ + lw s3, 56(sp) /* s3 = unit_x */ + li v0, BILINEAR_INTERPOLATION_RANGE + li v1, 0x07e007e0 + li s8, 0x001f001f + + sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) + sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) +0: + andi t4, s2, 0xffff /* t4 = (short)vx */ + srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */ + subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */ + + mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */ + mul s5, s0, t4 /* s5 = wt*(vx>>8) */ + mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */ + mul s7, s1, t4 /* s7 = wb*(vx>>8) */ + + sra t9, s2, 16 + sll t9, t9, 1 + addiu t8, t9, 2 + lhx t0, t9(a1) /* t0 = tl */ + lhx t1, t8(a1) /* t1 = tr */ + andi t1, t1, 0xffff + addiu a3, a3, -1 + lhx t2, t9(a2) /* t2 = bl */ + lhx t3, t8(a2) /* t3 = br */ + andi t3, t3, 0xffff + + CONVERT_2x0565_TO_2x8888 t0, t1, t0, t1, v1, s8, t4, t5, t6, t7 + CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, v1, s8, t4, t5, t6, t7 + BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7 + CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3 + + addu s2, s2, s3 /* vx += unit_x; */ + sh t1, 0(a0) + bnez a3, 0b + addiu a0, a0, 2 + + RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8 +1: + j ra + nop + +END(pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_mips) + +LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_OVER_asm_mips) +/* + * a0 - *dst + * a1 - *src_top + * a2 - *src_bottom + * a3 - w + * 16(sp) - wt + * 20(sp) - wb + * 24(sp) - vx + * 28(sp) - unit_x + */ + + beqz a3, 1f + nop + + SAVE_REGS_ON_STACK 24, v0, s0, s1, s2, s3, s4, s5, s6, s7, s8 + + lw s0, 40(sp) /* s0 = wt */ + lw s1, 44(sp) /* s1 = wb */ + lw s2, 48(sp) /* s2 = vx */ + lw s3, 52(sp) /* s3 = unit_x */ + li v0, BILINEAR_INTERPOLATION_RANGE + li s8, 0x00ff00ff + + sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) + sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) +0: + andi t4, s2, 0xffff /* t4 = (short)vx */ + srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */ + subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */ + + mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */ + mul s5, s0, t4 /* s5 = wt*(vx>>8) */ + mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */ + mul s7, s1, t4 /* s7 = wb*(vx>>8) */ + + sra t9, s2, 16 + sll t9, t9, 2 + addiu t8, t9, 4 + lwx t0, t9(a1) /* t0 = tl */ + lwx t1, t8(a1) /* t1 = tr */ + addiu a3, a3, -1 + lwx t2, t9(a2) /* t2 = bl */ + lwx t3, t8(a2) /* t3 = br */ + + BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7 + lw t1, 0(a0) /* t1 = dest */ + OVER_8888_8888 t0, t1, t2, s8, t3, t4, t5, t6 + + addu s2, s2, s3 /* vx += unit_x; */ + sw t2, 0(a0) + bnez a3, 0b + addiu a0, a0, 4 + + RESTORE_REGS_FROM_STACK 24, v0, s0, s1, s2, s3, s4, s5, s6, s7, s8 +1: + j ra + nop + +END(pixman_scaled_bilinear_scanline_8888_8888_OVER_asm_mips) + +LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_ADD_asm_mips) +/* + * a0 - *dst + * a1 - *src_top + * a2 - *src_bottom + * a3 - w + * 16(sp) - wt + * 20(sp) - wb + * 24(sp) - vx + * 28(sp) - unit_x + */ + + beqz a3, 1f + nop + + SAVE_REGS_ON_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7 + + lw s0, 36(sp) /* s0 = wt */ + lw s1, 40(sp) /* s1 = wb */ + lw s2, 44(sp) /* s2 = vx */ + lw s3, 48(sp) /* s3 = unit_x */ + li v0, BILINEAR_INTERPOLATION_RANGE + + sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) + sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) +0: + andi t4, s2, 0xffff /* t4 = (short)vx */ + srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */ + subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */ + + mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */ + mul s5, s0, t4 /* s5 = wt*(vx>>8) */ + mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */ + mul s7, s1, t4 /* s7 = wb*(vx>>8) */ + + sra t9, s2, 16 + sll t9, t9, 2 + addiu t8, t9, 4 + lwx t0, t9(a1) /* t0 = tl */ + lwx t1, t8(a1) /* t1 = tr */ + addiu a3, a3, -1 + lwx t2, t9(a2) /* t2 = bl */ + lwx t3, t8(a2) /* t3 = br */ + + BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7 + lw t1, 0(a0) + addu_s.qb t2, t0, t1 + + addu s2, s2, s3 /* vx += unit_x; */ + sw t2, 0(a0) + bnez a3, 0b + addiu a0, a0, 4 + + RESTORE_REGS_FROM_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7 +1: + j ra + nop + +END(pixman_scaled_bilinear_scanline_8888_8888_ADD_asm_mips) + +LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_SRC_asm_mips) +/* + * a0 - *dst + * a1 - *mask + * a2 - *src_top + * a3 - *src_bottom + * 16(sp) - wt + * 20(sp) - wb + * 24(sp) - vx + * 28(sp) - unit_x + * 32(sp) - w + */ + + lw v1, 32(sp) + beqz v1, 1f + nop + + SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8 + + lw s0, 44(sp) /* s0 = wt */ + lw s1, 48(sp) /* s1 = wb */ + lw s2, 52(sp) /* s2 = vx */ + lw s3, 56(sp) /* s3 = unit_x */ + li v0, BILINEAR_INTERPOLATION_RANGE + li s8, 0x00ff00ff + + sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) + sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) +0: + andi t4, s2, 0xffff /* t4 = (short)vx */ + srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */ + subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */ + + mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */ + mul s5, s0, t4 /* s5 = wt*(vx>>8) */ + mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */ + mul s7, s1, t4 /* s7 = wb*(vx>>8) */ + + sra t9, s2, 16 + sll t9, t9, 2 + addiu t8, t9, 4 + lwx t0, t9(a2) /* t0 = tl */ + lwx t1, t8(a2) /* t1 = tr */ + addiu v1, v1, -1 + lwx t2, t9(a3) /* t2 = bl */ + lwx t3, t8(a3) /* t3 = br */ + + BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7 + lbu t1, 0(a1) /* t1 = mask */ + addiu a1, a1, 1 + MIPS_UN8x4_MUL_UN8 t0, t1, t0, s8, t2, t3, t4 + + addu s2, s2, s3 /* vx += unit_x; */ + sw t0, 0(a0) + bnez v1, 0b + addiu a0, a0, 4 + + RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8 +1: + j ra + nop + +END(pixman_scaled_bilinear_scanline_8888_8_8888_SRC_asm_mips) + +LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_0565_SRC_asm_mips) +/* + * a0 - *dst + * a1 - *mask + * a2 - *src_top + * a3 - *src_bottom + * 16(sp) - wt + * 20(sp) - wb + * 24(sp) - vx + * 28(sp) - unit_x + * 32(sp) - w + */ + + lw v1, 32(sp) + beqz v1, 1f + nop + + SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8 + + lw s0, 44(sp) /* s0 = wt */ + lw s1, 48(sp) /* s1 = wb */ + lw s2, 52(sp) /* s2 = vx */ + lw s3, 56(sp) /* s3 = unit_x */ + li v0, BILINEAR_INTERPOLATION_RANGE + li s8, 0x00ff00ff + + sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) + sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) +0: + andi t4, s2, 0xffff /* t4 = (short)vx */ + srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */ + subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */ + + mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */ + mul s5, s0, t4 /* s5 = wt*(vx>>8) */ + mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */ + mul s7, s1, t4 /* s7 = wb*(vx>>8) */ + + sra t9, s2, 16 + sll t9, t9, 2 + addiu t8, t9, 4 + lwx t0, t9(a2) /* t0 = tl */ + lwx t1, t8(a2) /* t1 = tr */ + addiu v1, v1, -1 + lwx t2, t9(a3) /* t2 = bl */ + lwx t3, t8(a3) /* t3 = br */ + + BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7 + lbu t1, 0(a1) /* t1 = mask */ + addiu a1, a1, 1 + MIPS_UN8x4_MUL_UN8 t0, t1, t0, s8, t2, t3, t4 + CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3 + + addu s2, s2, s3 /* vx += unit_x; */ + sh t1, 0(a0) + bnez v1, 0b + addiu a0, a0, 2 + + RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8 +1: + j ra + nop + +END(pixman_scaled_bilinear_scanline_8888_8_0565_SRC_asm_mips) + +LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8_x888_SRC_asm_mips) +/* + * a0 - *dst + * a1 - *mask + * a2 - *src_top + * a3 - *src_bottom + * 16(sp) - wt + * 20(sp) - wb + * 24(sp) - vx + * 28(sp) - unit_x + * 32(sp) - w + */ + + lw t0, 32(sp) + beqz t0, 1f + nop + + SAVE_REGS_ON_STACK 32, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8, ra + + lw s0, 48(sp) /* s0 = wt */ + lw s1, 52(sp) /* s1 = wb */ + lw s2, 56(sp) /* s2 = vx */ + lw s3, 60(sp) /* s3 = unit_x */ + lw ra, 64(sp) /* ra = w */ + li v0, 0x00ff00ff + li v1, 0x07e007e0 + li s8, 0x001f001f + + sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) + sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) +0: + andi t4, s2, 0xffff /* t4 = (short)vx */ + srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */ + li t5, BILINEAR_INTERPOLATION_RANGE + subu t5, t5, t4 /* t5 = ( 256 - (vx>>8)) */ + + mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */ + mul s5, s0, t4 /* s5 = wt*(vx>>8) */ + mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */ + mul s7, s1, t4 /* s7 = wb*(vx>>8) */ + + sra t9, s2, 16 + sll t9, t9, 1 + addiu t8, t9, 2 + lhx t0, t9(a2) /* t0 = tl */ + lhx t1, t8(a2) /* t1 = tr */ + andi t1, t1, 0xffff + addiu ra, ra, -1 + lhx t2, t9(a3) /* t2 = bl */ + lhx t3, t8(a3) /* t3 = br */ + andi t3, t3, 0xffff + + CONVERT_2x0565_TO_2x8888 t0, t1, t0, t1, v1, s8, t4, t5, t6, t7 + CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, v1, s8, t4, t5, t6, t7 + BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7 + lbu t1, 0(a1) /* t1 = mask */ + addiu a1, a1, 1 + MIPS_UN8x4_MUL_UN8 t0, t1, t0, v0, t2, t3, t4 + + addu s2, s2, s3 /* vx += unit_x; */ + sw t0, 0(a0) + bnez ra, 0b + addiu a0, a0, 4 + + RESTORE_REGS_FROM_STACK 32, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8, ra +1: + j ra + nop + +END(pixman_scaled_bilinear_scanline_0565_8_x888_SRC_asm_mips) + +LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8_0565_SRC_asm_mips) +/* + * a0 - *dst + * a1 - *mask + * a2 - *src_top + * a3 - *src_bottom + * 16(sp) - wt + * 20(sp) - wb + * 24(sp) - vx + * 28(sp) - unit_x + * 32(sp) - w + */ + + lw t0, 32(sp) + beqz t0, 1f + nop + + SAVE_REGS_ON_STACK 32, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8, ra + + lw s0, 48(sp) /* s0 = wt */ + lw s1, 52(sp) /* s1 = wb */ + lw s2, 56(sp) /* s2 = vx */ + lw s3, 60(sp) /* s3 = unit_x */ + lw ra, 64(sp) /* ra = w */ + li v0, 0x00ff00ff + li v1, 0x07e007e0 + li s8, 0x001f001f + + sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) + sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) +0: + andi t4, s2, 0xffff /* t4 = (short)vx */ + srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */ + li t5, BILINEAR_INTERPOLATION_RANGE + subu t5, t5, t4 /* t5 = ( 256 - (vx>>8)) */ + + mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */ + mul s5, s0, t4 /* s5 = wt*(vx>>8) */ + mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */ + mul s7, s1, t4 /* s7 = wb*(vx>>8) */ + + sra t9, s2, 16 + sll t9, t9, 1 + addiu t8, t9, 2 + lhx t0, t9(a2) /* t0 = tl */ + lhx t1, t8(a2) /* t1 = tr */ + andi t1, t1, 0xffff + addiu ra, ra, -1 + lhx t2, t9(a3) /* t2 = bl */ + lhx t3, t8(a3) /* t3 = br */ + andi t3, t3, 0xffff + + CONVERT_2x0565_TO_2x8888 t0, t1, t0, t1, v1, s8, t4, t5, t6, t7 + CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, v1, s8, t4, t5, t6, t7 + BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7 + lbu t1, 0(a1) /* t1 = mask */ + addiu a1, a1, 1 + MIPS_UN8x4_MUL_UN8 t0, t1, t0, v0, t2, t3, t4 + CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3 + + addu s2, s2, s3 /* vx += unit_x; */ + sh t1, 0(a0) + bnez ra, 0b + addiu a0, a0, 2 + + RESTORE_REGS_FROM_STACK 32, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8, ra +1: + j ra + nop + +END(pixman_scaled_bilinear_scanline_0565_8_0565_SRC_asm_mips) + +LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_OVER_asm_mips) +/* + * a0 - dst (a8r8g8b8) + * a1 - mask (a8) + * a2 - src_top (a8r8g8b8) + * a3 - src_bottom (a8r8g8b8) + * 16(sp) - wt + * 20(sp) - wb + * 24(sp) - vx + * 28(sp) - unit_x + * 32(sp) - w + */ + + SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8 + + lw v1, 60(sp) /* v1 = w(sp + 32 + 28 save regs stack offset)*/ + beqz v1, 1f + nop + + lw s0, 44(sp) /* s0 = wt */ + lw s1, 48(sp) /* s1 = wb */ + lw s2, 52(sp) /* s2 = vx */ + lw s3, 56(sp) /* s3 = unit_x */ + li v0, BILINEAR_INTERPOLATION_RANGE + li s8, 0x00ff00ff + + sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) + sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) + +0: + andi t4, s2, 0xffff /* t4 = (short)vx */ + srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */ + subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */ + + mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */ + mul s5, s0, t4 /* s5 = wt*(vx>>8) */ + mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */ + mul s7, s1, t4 /* s7 = wb*(vx>>8) */ + + sra t9, s2, 16 + sll t9, t9, 2 + addiu t8, t9, 4 + lwx t0, t9(a2) /* t0 = tl */ + lwx t1, t8(a2) /* t1 = tr */ + addiu v1, v1, -1 + lwx t2, t9(a3) /* t2 = bl */ + lwx t3, t8(a3) /* t3 = br */ + + BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, \ + t4, t5, t6, t7, t8, t9, s4, s5, s6, s7 + lbu t1, 0(a1) /* t1 = mask */ + lw t2, 0(a0) /* t2 = dst */ + addiu a1, a1, 1 + OVER_8888_8_8888 t0, t1, t2, t0, s8, t3, t4, t5, t6 + + addu s2, s2, s3 /* vx += unit_x; */ + sw t0, 0(a0) + bnez v1, 0b + addiu a0, a0, 4 + +1: + RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8 + j ra + nop + +END(pixman_scaled_bilinear_scanline_8888_8_8888_OVER_asm_mips) + +LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_ADD_asm_mips) +/* + * a0 - *dst + * a1 - *mask + * a2 - *src_top + * a3 - *src_bottom + * 16(sp) - wt + * 20(sp) - wb + * 24(sp) - vx + * 28(sp) - unit_x + * 32(sp) - w + */ + + lw v1, 32(sp) + beqz v1, 1f + nop + + SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8 + + lw s0, 44(sp) /* s0 = wt */ + lw s1, 48(sp) /* s1 = wb */ + lw s2, 52(sp) /* s2 = vx */ + lw s3, 56(sp) /* s3 = unit_x */ + li v0, BILINEAR_INTERPOLATION_RANGE + li s8, 0x00ff00ff + + sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) + sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) +0: + andi t4, s2, 0xffff /* t4 = (short)vx */ + srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */ + subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */ + + mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */ + mul s5, s0, t4 /* s5 = wt*(vx>>8) */ + mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */ + mul s7, s1, t4 /* s7 = wb*(vx>>8) */ + + sra t9, s2, 16 + sll t9, t9, 2 + addiu t8, t9, 4 + lwx t0, t9(a2) /* t0 = tl */ + lwx t1, t8(a2) /* t1 = tr */ + addiu v1, v1, -1 + lwx t2, t9(a3) /* t2 = bl */ + lwx t3, t8(a3) /* t3 = br */ + + BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7 + lbu t1, 0(a1) /* t1 = mask */ + lw t2, 0(a0) /* t2 = dst */ + addiu a1, a1, 1 + MIPS_UN8x4_MUL_UN8_ADD_UN8x4 t0, t1, t2, t0, s8, t3, t4, t5 + + addu s2, s2, s3 /* vx += unit_x; */ + sw t0, 0(a0) + bnez v1, 0b + addiu a0, a0, 4 + + RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8 +1: + j ra + nop + +END(pixman_scaled_bilinear_scanline_8888_8_8888_ADD_asm_mips) diff --git a/src/pixman/pixman/pixman-mips-dspr2-asm.h b/src/pixman/pixman/pixman-mips-dspr2-asm.h new file mode 100644 index 0000000..11849bd --- /dev/null +++ b/src/pixman/pixman/pixman-mips-dspr2-asm.h @@ -0,0 +1,713 @@ +/* + * Copyright (c) 2012 + * MIPS Technologies, Inc., California. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Author: Nemanja Lukic (nlukic@mips.com) + */ + +#ifndef PIXMAN_MIPS_DSPR2_ASM_H +#define PIXMAN_MIPS_DSPR2_ASM_H + +#define zero $0 +#define AT $1 +#define v0 $2 +#define v1 $3 +#define a0 $4 +#define a1 $5 +#define a2 $6 +#define a3 $7 +#define t0 $8 +#define t1 $9 +#define t2 $10 +#define t3 $11 +#define t4 $12 +#define t5 $13 +#define t6 $14 +#define t7 $15 +#define s0 $16 +#define s1 $17 +#define s2 $18 +#define s3 $19 +#define s4 $20 +#define s5 $21 +#define s6 $22 +#define s7 $23 +#define t8 $24 +#define t9 $25 +#define k0 $26 +#define k1 $27 +#define gp $28 +#define sp $29 +#define fp $30 +#define s8 $30 +#define ra $31 + +/* + * LEAF_MIPS32R2 - declare leaf routine for MIPS32r2 + */ +#define LEAF_MIPS32R2(symbol) \ + .globl symbol; \ + .align 2; \ +#ifdef __ELF__ + .hidden symbol; \ + .type symbol, @function; \ +#endif + .ent symbol, 0; \ +symbol: .frame sp, 0, ra; \ + .set push; \ + .set arch=mips32r2; \ + .set noreorder; \ + .set noat; + +/* + * LEAF_MIPS32R2 - declare leaf routine for MIPS DSPr2 + */ +#define LEAF_MIPS_DSPR2(symbol) \ +LEAF_MIPS32R2(symbol) \ + .set dspr2; + +/* + * END - mark end of function + */ +#define END(function) \ + .set pop; \ + .end function; \ + .size function,.-function + +/* + * Checks if stack offset is big enough for storing/restoring regs_num + * number of register to/from stack. Stack offset must be greater than + * or equal to the number of bytes needed for storing registers (regs_num*4). + * Since MIPS ABI allows usage of first 16 bytes of stack frame (this is + * preserved for input arguments of the functions, already stored in a0-a3), + * stack size can be further optimized by utilizing this space. + */ +.macro CHECK_STACK_OFFSET regs_num, stack_offset +.if \stack_offset < \regs_num * 4 - 16 +.error "Stack offset too small." +.endif +.endm + +/* + * Saves set of registers on stack. Maximum number of registers that + * can be saved on stack is limitted to 14 (a0-a3, v0-v1 and s0-s7). + * Stack offset is number of bytes that are added to stack pointer (sp) + * before registers are pushed in order to provide enough space on stack + * (offset must be multiple of 4, and must be big enough, as described by + * CHECK_STACK_OFFSET macro). This macro is intended to be used in + * combination with RESTORE_REGS_FROM_STACK macro. Example: + * SAVE_REGS_ON_STACK 4, v0, v1, s0, s1 + * RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1 + */ +.macro SAVE_REGS_ON_STACK stack_offset = 0, r1, \ + r2 = 0, r3 = 0, r4 = 0, \ + r5 = 0, r6 = 0, r7 = 0, \ + r8 = 0, r9 = 0, r10 = 0, \ + r11 = 0, r12 = 0, r13 = 0, \ + r14 = 0 + .if (\stack_offset < 0) || (\stack_offset - (\stack_offset / 4) * 4) + .error "Stack offset must be pozitive and multiple of 4." + .endif + .if \stack_offset != 0 + addiu sp, sp, -\stack_offset + .endif + sw \r1, 0(sp) + .if \r2 != 0 + sw \r2, 4(sp) + .endif + .if \r3 != 0 + sw \r3, 8(sp) + .endif + .if \r4 != 0 + sw \r4, 12(sp) + .endif + .if \r5 != 0 + CHECK_STACK_OFFSET 5, \stack_offset + sw \r5, 16(sp) + .endif + .if \r6 != 0 + CHECK_STACK_OFFSET 6, \stack_offset + sw \r6, 20(sp) + .endif + .if \r7 != 0 + CHECK_STACK_OFFSET 7, \stack_offset + sw \r7, 24(sp) + .endif + .if \r8 != 0 + CHECK_STACK_OFFSET 8, \stack_offset + sw \r8, 28(sp) + .endif + .if \r9 != 0 + CHECK_STACK_OFFSET 9, \stack_offset + sw \r9, 32(sp) + .endif + .if \r10 != 0 + CHECK_STACK_OFFSET 10, \stack_offset + sw \r10, 36(sp) + .endif + .if \r11 != 0 + CHECK_STACK_OFFSET 11, \stack_offset + sw \r11, 40(sp) + .endif + .if \r12 != 0 + CHECK_STACK_OFFSET 12, \stack_offset + sw \r12, 44(sp) + .endif + .if \r13 != 0 + CHECK_STACK_OFFSET 13, \stack_offset + sw \r13, 48(sp) + .endif + .if \r14 != 0 + CHECK_STACK_OFFSET 14, \stack_offset + sw \r14, 52(sp) + .endif +.endm + +/* + * Restores set of registers from stack. Maximum number of registers that + * can be restored from stack is limitted to 14 (a0-a3, v0-v1 and s0-s7). + * Stack offset is number of bytes that are added to stack pointer (sp) + * after registers are restored (offset must be multiple of 4, and must + * be big enough, as described by CHECK_STACK_OFFSET macro). This macro is + * intended to be used in combination with RESTORE_REGS_FROM_STACK macro. + * Example: + * SAVE_REGS_ON_STACK 4, v0, v1, s0, s1 + * RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1 + */ +.macro RESTORE_REGS_FROM_STACK stack_offset = 0, r1, \ + r2 = 0, r3 = 0, r4 = 0, \ + r5 = 0, r6 = 0, r7 = 0, \ + r8 = 0, r9 = 0, r10 = 0, \ + r11 = 0, r12 = 0, r13 = 0, \ + r14 = 0 + .if (\stack_offset < 0) || (\stack_offset - (\stack_offset/4)*4) + .error "Stack offset must be pozitive and multiple of 4." + .endif + lw \r1, 0(sp) + .if \r2 != 0 + lw \r2, 4(sp) + .endif + .if \r3 != 0 + lw \r3, 8(sp) + .endif + .if \r4 != 0 + lw \r4, 12(sp) + .endif + .if \r5 != 0 + CHECK_STACK_OFFSET 5, \stack_offset + lw \r5, 16(sp) + .endif + .if \r6 != 0 + CHECK_STACK_OFFSET 6, \stack_offset + lw \r6, 20(sp) + .endif + .if \r7 != 0 + CHECK_STACK_OFFSET 7, \stack_offset + lw \r7, 24(sp) + .endif + .if \r8 != 0 + CHECK_STACK_OFFSET 8, \stack_offset + lw \r8, 28(sp) + .endif + .if \r9 != 0 + CHECK_STACK_OFFSET 9, \stack_offset + lw \r9, 32(sp) + .endif + .if \r10 != 0 + CHECK_STACK_OFFSET 10, \stack_offset + lw \r10, 36(sp) + .endif + .if \r11 != 0 + CHECK_STACK_OFFSET 11, \stack_offset + lw \r11, 40(sp) + .endif + .if \r12 != 0 + CHECK_STACK_OFFSET 12, \stack_offset + lw \r12, 44(sp) + .endif + .if \r13 != 0 + CHECK_STACK_OFFSET 13, \stack_offset + lw \r13, 48(sp) + .endif + .if \r14 != 0 + CHECK_STACK_OFFSET 14, \stack_offset + lw \r14, 52(sp) + .endif + .if \stack_offset != 0 + addiu sp, sp, \stack_offset + .endif +.endm + +/* + * Conversion of single r5g6b5 pixel (in_565) to single a8r8g8b8 pixel + * returned in (out_8888) register. Requires two temporary registers + * (scratch1 and scratch2). + */ +.macro CONVERT_1x0565_TO_1x8888 in_565, \ + out_8888, \ + scratch1, scratch2 + lui \out_8888, 0xff00 + sll \scratch1, \in_565, 0x3 + andi \scratch2, \scratch1, 0xff + ext \scratch1, \in_565, 0x2, 0x3 + or \scratch1, \scratch2, \scratch1 + or \out_8888, \out_8888, \scratch1 + + sll \scratch1, \in_565, 0x5 + andi \scratch1, \scratch1, 0xfc00 + srl \scratch2, \in_565, 0x1 + andi \scratch2, \scratch2, 0x300 + or \scratch2, \scratch1, \scratch2 + or \out_8888, \out_8888, \scratch2 + + andi \scratch1, \in_565, 0xf800 + srl \scratch2, \scratch1, 0x5 + andi \scratch2, \scratch2, 0xff00 + or \scratch1, \scratch1, \scratch2 + sll \scratch1, \scratch1, 0x8 + or \out_8888, \out_8888, \scratch1 +.endm + +/* + * Conversion of two r5g6b5 pixels (in1_565 and in2_565) to two a8r8g8b8 pixels + * returned in (out1_8888 and out2_8888) registers. Requires four scratch + * registers (scratch1 ... scratch4). It also requires maskG and maskB for + * color component extractions. These masks must have following values: + * li maskG, 0x07e007e0 + * li maskB, 0x001F001F + */ +.macro CONVERT_2x0565_TO_2x8888 in1_565, in2_565, \ + out1_8888, out2_8888, \ + maskG, maskB, \ + scratch1, scratch2, scratch3, scratch4 + sll \scratch1, \in1_565, 16 + or \scratch1, \scratch1, \in2_565 + lui \out2_8888, 0xff00 + ori \out2_8888, \out2_8888, 0xff00 + shrl.ph \scratch2, \scratch1, 11 + and \scratch3, \scratch1, \maskG + shra.ph \scratch4, \scratch2, 2 + shll.ph \scratch2, \scratch2, 3 + shll.ph \scratch3, \scratch3, 5 + or \scratch2, \scratch2, \scratch4 + shrl.qb \scratch4, \scratch3, 6 + or \out2_8888, \out2_8888, \scratch2 + or \scratch3, \scratch3, \scratch4 + and \scratch1, \scratch1, \maskB + shll.ph \scratch2, \scratch1, 3 + shra.ph \scratch4, \scratch1, 2 + or \scratch2, \scratch2, \scratch4 + or \scratch3, \scratch2, \scratch3 + precrq.ph.w \out1_8888, \out2_8888, \scratch3 + precr_sra.ph.w \out2_8888, \scratch3, 0 +.endm + +/* + * Conversion of single a8r8g8b8 pixel (in_8888) to single r5g6b5 pixel + * returned in (out_565) register. Requires two temporary registers + * (scratch1 and scratch2). + */ +.macro CONVERT_1x8888_TO_1x0565 in_8888, \ + out_565, \ + scratch1, scratch2 + ext \out_565, \in_8888, 0x3, 0x5 + srl \scratch1, \in_8888, 0x5 + andi \scratch1, \scratch1, 0x07e0 + srl \scratch2, \in_8888, 0x8 + andi \scratch2, \scratch2, 0xf800 + or \out_565, \out_565, \scratch1 + or \out_565, \out_565, \scratch2 +.endm + +/* + * Conversion of two a8r8g8b8 pixels (in1_8888 and in2_8888) to two r5g6b5 + * pixels returned in (out1_565 and out2_565) registers. Requires two temporary + * registers (scratch1 and scratch2). It also requires maskR, maskG and maskB + * for color component extractions. These masks must have following values: + * li maskR, 0xf800f800 + * li maskG, 0x07e007e0 + * li maskB, 0x001F001F + * Value of input register in2_8888 is lost. + */ +.macro CONVERT_2x8888_TO_2x0565 in1_8888, in2_8888, \ + out1_565, out2_565, \ + maskR, maskG, maskB, \ + scratch1, scratch2 + precr.qb.ph \scratch1, \in2_8888, \in1_8888 + precrq.qb.ph \in2_8888, \in2_8888, \in1_8888 + and \out1_565, \scratch1, \maskR + shrl.ph \scratch1, \scratch1, 3 + shll.ph \in2_8888, \in2_8888, 3 + and \scratch1, \scratch1, \maskB + or \out1_565, \out1_565, \scratch1 + and \in2_8888, \in2_8888, \maskG + or \out1_565, \out1_565, \in2_8888 + srl \out2_565, \out1_565, 16 +.endm + +/* + * Multiply pixel (a8) with single pixel (a8r8g8b8). It requires maskLSR needed + * for rounding process. maskLSR must have following value: + * li maskLSR, 0x00ff00ff + */ +.macro MIPS_UN8x4_MUL_UN8 s_8888, \ + m_8, \ + d_8888, \ + maskLSR, \ + scratch1, scratch2, scratch3 + replv.ph \m_8, \m_8 /* 0 | M | 0 | M */ + muleu_s.ph.qbl \scratch1, \s_8888, \m_8 /* A*M | R*M */ + muleu_s.ph.qbr \scratch2, \s_8888, \m_8 /* G*M | B*M */ + shra_r.ph \scratch3, \scratch1, 8 + shra_r.ph \d_8888, \scratch2, 8 + and \scratch3, \scratch3, \maskLSR /* 0 |A*M| 0 |R*M */ + and \d_8888, \d_8888, \maskLSR /* 0 |G*M| 0 |B*M */ + addq.ph \scratch1, \scratch1, \scratch3 /* A*M+A*M | R*M+R*M */ + addq.ph \scratch2, \scratch2, \d_8888 /* G*M+G*M | B*M+B*M */ + shra_r.ph \scratch1, \scratch1, 8 + shra_r.ph \scratch2, \scratch2, 8 + precr.qb.ph \d_8888, \scratch1, \scratch2 +.endm + +/* + * Multiply two pixels (a8) with two pixels (a8r8g8b8). It requires maskLSR + * needed for rounding process. maskLSR must have following value: + * li maskLSR, 0x00ff00ff + */ +.macro MIPS_2xUN8x4_MUL_2xUN8 s1_8888, \ + s2_8888, \ + m1_8, \ + m2_8, \ + d1_8888, \ + d2_8888, \ + maskLSR, \ + scratch1, scratch2, scratch3, \ + scratch4, scratch5, scratch6 + replv.ph \m1_8, \m1_8 /* 0 | M1 | 0 | M1 */ + replv.ph \m2_8, \m2_8 /* 0 | M2 | 0 | M2 */ + muleu_s.ph.qbl \scratch1, \s1_8888, \m1_8 /* A1*M1 | R1*M1 */ + muleu_s.ph.qbr \scratch2, \s1_8888, \m1_8 /* G1*M1 | B1*M1 */ + muleu_s.ph.qbl \scratch3, \s2_8888, \m2_8 /* A2*M2 | R2*M2 */ + muleu_s.ph.qbr \scratch4, \s2_8888, \m2_8 /* G2*M2 | B2*M2 */ + shra_r.ph \scratch5, \scratch1, 8 + shra_r.ph \d1_8888, \scratch2, 8 + shra_r.ph \scratch6, \scratch3, 8 + shra_r.ph \d2_8888, \scratch4, 8 + and \scratch5, \scratch5, \maskLSR /* 0 |A1*M1| 0 |R1*M1 */ + and \d1_8888, \d1_8888, \maskLSR /* 0 |G1*M1| 0 |B1*M1 */ + and \scratch6, \scratch6, \maskLSR /* 0 |A2*M2| 0 |R2*M2 */ + and \d2_8888, \d2_8888, \maskLSR /* 0 |G2*M2| 0 |B2*M2 */ + addq.ph \scratch1, \scratch1, \scratch5 + addq.ph \scratch2, \scratch2, \d1_8888 + addq.ph \scratch3, \scratch3, \scratch6 + addq.ph \scratch4, \scratch4, \d2_8888 + shra_r.ph \scratch1, \scratch1, 8 + shra_r.ph \scratch2, \scratch2, 8 + shra_r.ph \scratch3, \scratch3, 8 + shra_r.ph \scratch4, \scratch4, 8 + precr.qb.ph \d1_8888, \scratch1, \scratch2 + precr.qb.ph \d2_8888, \scratch3, \scratch4 +.endm + +/* + * Multiply pixel (a8r8g8b8) with single pixel (a8r8g8b8). It requires maskLSR + * needed for rounding process. maskLSR must have following value: + * li maskLSR, 0x00ff00ff + */ +.macro MIPS_UN8x4_MUL_UN8x4 s_8888, \ + m_8888, \ + d_8888, \ + maskLSR, \ + scratch1, scratch2, scratch3, scratch4 + preceu.ph.qbl \scratch1, \m_8888 /* 0 | A | 0 | R */ + preceu.ph.qbr \scratch2, \m_8888 /* 0 | G | 0 | B */ + muleu_s.ph.qbl \scratch3, \s_8888, \scratch1 /* A*A | R*R */ + muleu_s.ph.qbr \scratch4, \s_8888, \scratch2 /* G*G | B*B */ + shra_r.ph \scratch1, \scratch3, 8 + shra_r.ph \scratch2, \scratch4, 8 + and \scratch1, \scratch1, \maskLSR /* 0 |A*A| 0 |R*R */ + and \scratch2, \scratch2, \maskLSR /* 0 |G*G| 0 |B*B */ + addq.ph \scratch1, \scratch1, \scratch3 + addq.ph \scratch2, \scratch2, \scratch4 + shra_r.ph \scratch1, \scratch1, 8 + shra_r.ph \scratch2, \scratch2, 8 + precr.qb.ph \d_8888, \scratch1, \scratch2 +.endm + +/* + * Multiply two pixels (a8r8g8b8) with two pixels (a8r8g8b8). It requires + * maskLSR needed for rounding process. maskLSR must have following value: + * li maskLSR, 0x00ff00ff + */ + +.macro MIPS_2xUN8x4_MUL_2xUN8x4 s1_8888, \ + s2_8888, \ + m1_8888, \ + m2_8888, \ + d1_8888, \ + d2_8888, \ + maskLSR, \ + scratch1, scratch2, scratch3, \ + scratch4, scratch5, scratch6 + preceu.ph.qbl \scratch1, \m1_8888 /* 0 | A | 0 | R */ + preceu.ph.qbr \scratch2, \m1_8888 /* 0 | G | 0 | B */ + preceu.ph.qbl \scratch3, \m2_8888 /* 0 | A | 0 | R */ + preceu.ph.qbr \scratch4, \m2_8888 /* 0 | G | 0 | B */ + muleu_s.ph.qbl \scratch5, \s1_8888, \scratch1 /* A*A | R*R */ + muleu_s.ph.qbr \scratch6, \s1_8888, \scratch2 /* G*G | B*B */ + muleu_s.ph.qbl \scratch1, \s2_8888, \scratch3 /* A*A | R*R */ + muleu_s.ph.qbr \scratch2, \s2_8888, \scratch4 /* G*G | B*B */ + shra_r.ph \scratch3, \scratch5, 8 + shra_r.ph \scratch4, \scratch6, 8 + shra_r.ph \d1_8888, \scratch1, 8 + shra_r.ph \d2_8888, \scratch2, 8 + and \scratch3, \scratch3, \maskLSR /* 0 |A*A| 0 |R*R */ + and \scratch4, \scratch4, \maskLSR /* 0 |G*G| 0 |B*B */ + and \d1_8888, \d1_8888, \maskLSR /* 0 |A*A| 0 |R*R */ + and \d2_8888, \d2_8888, \maskLSR /* 0 |G*G| 0 |B*B */ + addq.ph \scratch3, \scratch3, \scratch5 + addq.ph \scratch4, \scratch4, \scratch6 + addq.ph \d1_8888, \d1_8888, \scratch1 + addq.ph \d2_8888, \d2_8888, \scratch2 + shra_r.ph \scratch3, \scratch3, 8 + shra_r.ph \scratch4, \scratch4, 8 + shra_r.ph \scratch5, \d1_8888, 8 + shra_r.ph \scratch6, \d2_8888, 8 + precr.qb.ph \d1_8888, \scratch3, \scratch4 + precr.qb.ph \d2_8888, \scratch5, \scratch6 +.endm + +/* + * OVER operation on single a8r8g8b8 source pixel (s_8888) and single a8r8g8b8 + * destination pixel (d_8888) using a8 mask (m_8). It also requires maskLSR + * needed for rounding process. maskLSR must have following value: + * li maskLSR, 0x00ff00ff + */ +.macro OVER_8888_8_8888 s_8888, \ + m_8, \ + d_8888, \ + out_8888, \ + maskLSR, \ + scratch1, scratch2, scratch3, scratch4 + MIPS_UN8x4_MUL_UN8 \s_8888, \m_8, \ + \scratch1, \maskLSR, \ + \scratch2, \scratch3, \scratch4 + + not \scratch2, \scratch1 + srl \scratch2, \scratch2, 24 + + MIPS_UN8x4_MUL_UN8 \d_8888, \scratch2, \ + \d_8888, \maskLSR, \ + \scratch3, \scratch4, \out_8888 + + addu_s.qb \out_8888, \d_8888, \scratch1 +.endm + +/* + * OVER operation on two a8r8g8b8 source pixels (s1_8888 and s2_8888) and two + * a8r8g8b8 destination pixels (d1_8888 and d2_8888) using a8 masks (m1_8 and + * m2_8). It also requires maskLSR needed for rounding process. maskLSR must + * have following value: + * li maskLSR, 0x00ff00ff + */ +.macro OVER_2x8888_2x8_2x8888 s1_8888, \ + s2_8888, \ + m1_8, \ + m2_8, \ + d1_8888, \ + d2_8888, \ + out1_8888, \ + out2_8888, \ + maskLSR, \ + scratch1, scratch2, scratch3, \ + scratch4, scratch5, scratch6 + MIPS_2xUN8x4_MUL_2xUN8 \s1_8888, \s2_8888, \ + \m1_8, \m2_8, \ + \scratch1, \scratch2, \ + \maskLSR, \ + \scratch3, \scratch4, \out1_8888, \ + \out2_8888, \scratch5, \scratch6 + + not \scratch3, \scratch1 + srl \scratch3, \scratch3, 24 + not \scratch4, \scratch2 + srl \scratch4, \scratch4, 24 + + MIPS_2xUN8x4_MUL_2xUN8 \d1_8888, \d2_8888, \ + \scratch3, \scratch4, \ + \d1_8888, \d2_8888, \ + \maskLSR, \ + \scratch5, \scratch6, \out1_8888, \ + \out2_8888, \scratch3, \scratch4 + + addu_s.qb \out1_8888, \d1_8888, \scratch1 + addu_s.qb \out2_8888, \d2_8888, \scratch2 +.endm + +/* + * OVER operation on single a8r8g8b8 source pixel (s_8888) and single a8r8g8b8 + * destination pixel (d_8888). It also requires maskLSR needed for rounding + * process. maskLSR must have following value: + * li maskLSR, 0x00ff00ff + */ +.macro OVER_8888_8888 s_8888, \ + d_8888, \ + out_8888, \ + maskLSR, \ + scratch1, scratch2, scratch3, scratch4 + not \scratch1, \s_8888 + srl \scratch1, \scratch1, 24 + + MIPS_UN8x4_MUL_UN8 \d_8888, \scratch1, \ + \out_8888, \maskLSR, \ + \scratch2, \scratch3, \scratch4 + + addu_s.qb \out_8888, \out_8888, \s_8888 +.endm + +/* + * OVER operation on two a8r8g8b8 source pixels (s1_8888 and s2_8888) and two + * a8r8g8b8 destination pixels (d1_8888 and d2_8888). It also requires maskLSR + * needed for rounding process. maskLSR must have following value: + * li maskLSR, 0x00ff00ff + */ +.macro OVER_2x8888_2x8888 s1_8888, \ + s2_8888, \ + d1_8888, \ + d2_8888, \ + out1_8888, \ + out2_8888, \ + maskLSR, \ + scratch1, scratch2, scratch3, \ + scratch4, scratch5, scratch6 + not \scratch1, \s1_8888 + srl \scratch1, \scratch1, 24 + not \scratch2, \s2_8888 + srl \scratch2, \scratch2, 24 + MIPS_2xUN8x4_MUL_2xUN8 \d1_8888, \d2_8888, \ + \scratch1, \scratch2, \ + \out1_8888, \out2_8888, \ + \maskLSR, \ + \scratch3, \scratch4, \scratch5, \ + \scratch6, \d1_8888, \d2_8888 + + addu_s.qb \out1_8888, \out1_8888, \s1_8888 + addu_s.qb \out2_8888, \out2_8888, \s2_8888 +.endm + +.macro MIPS_UN8x4_MUL_UN8_ADD_UN8x4 s_8888, \ + m_8, \ + d_8888, \ + out_8888, \ + maskLSR, \ + scratch1, scratch2, scratch3 + MIPS_UN8x4_MUL_UN8 \s_8888, \m_8, \ + \out_8888, \maskLSR, \ + \scratch1, \scratch2, \scratch3 + + addu_s.qb \out_8888, \out_8888, \d_8888 +.endm + +.macro MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 s1_8888, \ + s2_8888, \ + m1_8, \ + m2_8, \ + d1_8888, \ + d2_8888, \ + out1_8888, \ + out2_8888, \ + maskLSR, \ + scratch1, scratch2, scratch3, \ + scratch4, scratch5, scratch6 + MIPS_2xUN8x4_MUL_2xUN8 \s1_8888, \s2_8888, \ + \m1_8, \m2_8, \ + \out1_8888, \out2_8888, \ + \maskLSR, \ + \scratch1, \scratch2, \scratch3, \ + \scratch4, \scratch5, \scratch6 + + addu_s.qb \out1_8888, \out1_8888, \d1_8888 + addu_s.qb \out2_8888, \out2_8888, \d2_8888 +.endm + +.macro BILINEAR_INTERPOLATE_SINGLE_PIXEL tl, tr, bl, br, \ + scratch1, scratch2, \ + alpha, red, green, blue \ + wt1, wt2, wb1, wb2 + andi \scratch1, \tl, 0xff + andi \scratch2, \tr, 0xff + andi \alpha, \bl, 0xff + andi \red, \br, 0xff + + multu $ac0, \wt1, \scratch1 + maddu $ac0, \wt2, \scratch2 + maddu $ac0, \wb1, \alpha + maddu $ac0, \wb2, \red + + ext \scratch1, \tl, 8, 8 + ext \scratch2, \tr, 8, 8 + ext \alpha, \bl, 8, 8 + ext \red, \br, 8, 8 + + multu $ac1, \wt1, \scratch1 + maddu $ac1, \wt2, \scratch2 + maddu $ac1, \wb1, \alpha + maddu $ac1, \wb2, \red + + ext \scratch1, \tl, 16, 8 + ext \scratch2, \tr, 16, 8 + ext \alpha, \bl, 16, 8 + ext \red, \br, 16, 8 + + mflo \blue, $ac0 + + multu $ac2, \wt1, \scratch1 + maddu $ac2, \wt2, \scratch2 + maddu $ac2, \wb1, \alpha + maddu $ac2, \wb2, \red + + ext \scratch1, \tl, 24, 8 + ext \scratch2, \tr, 24, 8 + ext \alpha, \bl, 24, 8 + ext \red, \br, 24, 8 + + mflo \green, $ac1 + + multu $ac3, \wt1, \scratch1 + maddu $ac3, \wt2, \scratch2 + maddu $ac3, \wb1, \alpha + maddu $ac3, \wb2, \red + + mflo \red, $ac2 + mflo \alpha, $ac3 + + precr.qb.ph \alpha, \alpha, \red + precr.qb.ph \scratch1, \green, \blue + precrq.qb.ph \tl, \alpha, \scratch1 +.endm + +#endif //PIXMAN_MIPS_DSPR2_ASM_H diff --git a/src/pixman/pixman/pixman-mips-dspr2.c b/src/pixman/pixman/pixman-mips-dspr2.c new file mode 100644 index 0000000..e10c9df --- /dev/null +++ b/src/pixman/pixman/pixman-mips-dspr2.c @@ -0,0 +1,459 @@ +/* + * Copyright (c) 2012 + * MIPS Technologies, Inc., California. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Author: Nemanja Lukic (nlukic@mips.com) + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include "pixman-private.h" +#include "pixman-mips-dspr2.h" + +PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_x888_8888, + uint32_t, 1, uint32_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_8888_0565, + uint32_t, 1, uint16_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_0565_8888, + uint16_t, 1, uint32_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (DO_FAST_MEMCPY, src_0565_0565, + uint16_t, 1, uint16_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (DO_FAST_MEMCPY, src_8888_8888, + uint32_t, 1, uint32_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (DO_FAST_MEMCPY, src_0888_0888, + uint8_t, 3, uint8_t, 3) +#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) +PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_0888_8888_rev, + uint8_t, 3, uint32_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_0888_0565_rev, + uint8_t, 3, uint16_t, 1) +#endif +PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_pixbuf_8888, + uint32_t, 1, uint32_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_rpixbuf_8888, + uint32_t, 1, uint32_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, over_8888_8888, + uint32_t, 1, uint32_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, over_8888_0565, + uint32_t, 1, uint16_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, add_8_8, + uint8_t, 1, uint8_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, add_8888_8888, + uint32_t, 1, uint32_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, out_reverse_8_0565, + uint8_t, 1, uint16_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, out_reverse_8_8888, + uint8_t, 1, uint32_t, 1) + +PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (0, src_n_8_8888, + uint8_t, 1, uint32_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (0, src_n_8_8, + uint8_t, 1, uint8_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8888_8888_ca, + uint32_t, 1, uint32_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8888_0565_ca, + uint32_t, 1, uint16_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_8, + uint8_t, 1, uint8_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_8888, + uint8_t, 1, uint32_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_0565, + uint8_t, 1, uint16_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, add_n_8_8, + uint8_t, 1, uint8_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, add_n_8_8888, + uint8_t, 1, uint32_t, 1) + +PIXMAN_MIPS_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, over_8888_n_8888, + uint32_t, 1, uint32_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, over_8888_n_0565, + uint32_t, 1, uint16_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, over_0565_n_0565, + uint16_t, 1, uint16_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, add_8888_n_8888, + uint32_t, 1, uint32_t, 1) + +PIXMAN_MIPS_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, over_n_0565, + uint16_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, over_n_8888, + uint32_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, over_reverse_n_8888, + uint32_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_N_DST (0, in_n_8, + uint8_t, 1) + +PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (add_8_8_8, uint8_t, 1, + uint8_t, 1, uint8_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (add_8888_8_8888, uint32_t, 1, + uint8_t, 1, uint32_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (add_8888_8888_8888, uint32_t, 1, + uint32_t, 1, uint32_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (add_0565_8_0565, uint16_t, 1, + uint8_t, 1, uint16_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (over_8888_8_8888, uint32_t, 1, + uint8_t, 1, uint32_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (over_8888_8_0565, uint32_t, 1, + uint8_t, 1, uint16_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (over_0565_8_0565, uint16_t, 1, + uint8_t, 1, uint16_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (over_8888_8888_8888, uint32_t, 1, + uint32_t, 1, uint32_t, 1) + +PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST (8888_8888, OVER, + uint32_t, uint32_t) +PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST (8888_0565, OVER, + uint32_t, uint16_t) +PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST (0565_8888, SRC, + uint16_t, uint32_t) + +PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 8888_8888, SRC, + uint32_t, uint32_t) +PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 8888_0565, SRC, + uint32_t, uint16_t) +PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 0565_8888, SRC, + uint16_t, uint32_t) +PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 0565_0565, SRC, + uint16_t, uint16_t) +PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, 8888_8888, OVER, + uint32_t, uint32_t) +PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, 8888_8888, ADD, + uint32_t, uint32_t) + +PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, 8888_8_0565, + OVER, uint32_t, uint16_t) +PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, 0565_8_0565, + OVER, uint16_t, uint16_t) + +PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (0, 8888_8_8888, SRC, + uint32_t, uint32_t) +PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (0, 8888_8_0565, SRC, + uint32_t, uint16_t) +PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (0, 0565_8_x888, SRC, + uint16_t, uint32_t) +PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (0, 0565_8_0565, SRC, + uint16_t, uint16_t) +PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (SKIP_ZERO_SRC, 8888_8_8888, OVER, + uint32_t, uint32_t) +PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (SKIP_ZERO_SRC, 8888_8_8888, ADD, + uint32_t, uint32_t) + +static pixman_bool_t +mips_dspr2_fill (pixman_implementation_t *imp, + uint32_t * bits, + int stride, + int bpp, + int x, + int y, + int width, + int height, + uint32_t _xor) +{ + uint8_t *byte_line; + uint32_t byte_width; + switch (bpp) + { + case 16: + stride = stride * (int) sizeof (uint32_t) / 2; + byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x); + byte_width = width * 2; + stride *= 2; + + while (height--) + { + uint8_t *dst = byte_line; + byte_line += stride; + pixman_fill_buff16_mips (dst, byte_width, _xor & 0xffff); + } + return TRUE; + case 32: + stride = stride * (int) sizeof (uint32_t) / 4; + byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x); + byte_width = width * 4; + stride *= 4; + + while (height--) + { + uint8_t *dst = byte_line; + byte_line += stride; + pixman_fill_buff32_mips (dst, byte_width, _xor); + } + return TRUE; + default: + return FALSE; + } +} + +static pixman_bool_t +mips_dspr2_blt (pixman_implementation_t *imp, + uint32_t * src_bits, + uint32_t * dst_bits, + int src_stride, + int dst_stride, + int src_bpp, + int dst_bpp, + int src_x, + int src_y, + int dest_x, + int dest_y, + int width, + int height) +{ + if (src_bpp != dst_bpp) + return FALSE; + + uint8_t *src_bytes; + uint8_t *dst_bytes; + uint32_t byte_width; + + switch (src_bpp) + { + case 16: + src_stride = src_stride * (int) sizeof (uint32_t) / 2; + dst_stride = dst_stride * (int) sizeof (uint32_t) / 2; + src_bytes =(uint8_t *)(((uint16_t *)src_bits) + + src_stride * (src_y) + (src_x)); + dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + + dst_stride * (dest_y) + (dest_x)); + byte_width = width * 2; + src_stride *= 2; + dst_stride *= 2; + + while (height--) + { + uint8_t *src = src_bytes; + uint8_t *dst = dst_bytes; + src_bytes += src_stride; + dst_bytes += dst_stride; + pixman_mips_fast_memcpy (dst, src, byte_width); + } + return TRUE; + case 32: + src_stride = src_stride * (int) sizeof (uint32_t) / 4; + dst_stride = dst_stride * (int) sizeof (uint32_t) / 4; + src_bytes = (uint8_t *)(((uint32_t *)src_bits) + + src_stride * (src_y) + (src_x)); + dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + + dst_stride * (dest_y) + (dest_x)); + byte_width = width * 4; + src_stride *= 4; + dst_stride *= 4; + + while (height--) + { + uint8_t *src = src_bytes; + uint8_t *dst = dst_bytes; + src_bytes += src_stride; + dst_bytes += dst_stride; + pixman_mips_fast_memcpy (dst, src, byte_width); + } + return TRUE; + default: + return FALSE; + } +} + +static const pixman_fast_path_t mips_dspr2_fast_paths[] = +{ + PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, mips_composite_src_0565_0565), + PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, mips_composite_src_0565_0565), + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, mips_composite_src_8888_0565), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, mips_composite_src_8888_0565), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, mips_composite_src_8888_0565), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, mips_composite_src_8888_0565), + PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, a8r8g8b8, mips_composite_src_0565_8888), + PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, x8r8g8b8, mips_composite_src_0565_8888), + PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, a8b8g8r8, mips_composite_src_0565_8888), + PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, x8b8g8r8, mips_composite_src_0565_8888), + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, mips_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, mips_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, mips_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, mips_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, mips_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, mips_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, mips_composite_src_x888_8888), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, mips_composite_src_x888_8888), + PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, mips_composite_src_0888_0888), +#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) + PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, x8r8g8b8, mips_composite_src_0888_8888_rev), + PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, r5g6b5, mips_composite_src_0888_0565_rev), +#endif + PIXMAN_STD_FAST_PATH (SRC, pixbuf, pixbuf, a8r8g8b8, mips_composite_src_pixbuf_8888), + PIXMAN_STD_FAST_PATH (SRC, pixbuf, pixbuf, a8b8g8r8, mips_composite_src_rpixbuf_8888), + PIXMAN_STD_FAST_PATH (SRC, rpixbuf, rpixbuf, a8r8g8b8, mips_composite_src_rpixbuf_8888), + PIXMAN_STD_FAST_PATH (SRC, rpixbuf, rpixbuf, a8b8g8r8, mips_composite_src_pixbuf_8888), + PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8r8g8b8, mips_composite_src_n_8_8888), + PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8r8g8b8, mips_composite_src_n_8_8888), + PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8b8g8r8, mips_composite_src_n_8_8888), + PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8b8g8r8, mips_composite_src_n_8_8888), + PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8, mips_composite_src_n_8_8), + + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, mips_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, mips_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, mips_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, mips_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, mips_composite_over_n_8888_0565_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, mips_composite_over_n_8888_0565_ca), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8, mips_composite_over_n_8_8), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, mips_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, mips_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, mips_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, mips_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, mips_composite_over_n_8_0565), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, mips_composite_over_n_8_0565), + PIXMAN_STD_FAST_PATH (OVER, solid, null, r5g6b5, mips_composite_over_n_0565), + PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, mips_composite_over_n_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, mips_composite_over_n_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, mips_composite_over_8888_n_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, mips_composite_over_8888_n_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, r5g6b5, mips_composite_over_8888_n_0565), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, b5g6r5, mips_composite_over_8888_n_0565), + PIXMAN_STD_FAST_PATH (OVER, r5g6b5, solid, r5g6b5, mips_composite_over_0565_n_0565), + PIXMAN_STD_FAST_PATH (OVER, b5g6r5, solid, b5g6r5, mips_composite_over_0565_n_0565), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, a8r8g8b8, mips_composite_over_8888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, x8r8g8b8, mips_composite_over_8888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, a8b8g8r8, mips_composite_over_8888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, x8b8g8r8, mips_composite_over_8888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, r5g6b5, mips_composite_over_8888_8_0565), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, b5g6r5, mips_composite_over_8888_8_0565), + PIXMAN_STD_FAST_PATH (OVER, r5g6b5, a8, r5g6b5, mips_composite_over_0565_8_0565), + PIXMAN_STD_FAST_PATH (OVER, b5g6r5, a8, b5g6r5, mips_composite_over_0565_8_0565), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, a8r8g8b8, mips_composite_over_8888_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, mips_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, mips_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, mips_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, mips_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, mips_composite_over_8888_0565), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, mips_composite_over_8888_0565), + PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, mips_composite_add_n_8_8), + PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8r8g8b8, mips_composite_add_n_8_8888), + PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8b8g8r8, mips_composite_add_n_8_8888), + PIXMAN_STD_FAST_PATH (ADD, a8, a8, a8, mips_composite_add_8_8_8), + PIXMAN_STD_FAST_PATH (ADD, r5g6b5, a8, r5g6b5, mips_composite_add_0565_8_0565), + PIXMAN_STD_FAST_PATH (ADD, b5g6r5, a8, b5g6r5, mips_composite_add_0565_8_0565), + PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, a8, a8r8g8b8, mips_composite_add_8888_8_8888), + PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, a8, a8b8g8r8, mips_composite_add_8888_8_8888), + PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, a8r8g8b8, mips_composite_add_8888_8888_8888), + PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, solid, a8r8g8b8, mips_composite_add_8888_n_8888), + PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, solid, a8b8g8r8, mips_composite_add_8888_n_8888), + PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, mips_composite_add_8_8), + PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, mips_composite_add_8888_8888), + PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, mips_composite_add_8888_8888), + PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, r5g6b5, mips_composite_out_reverse_8_0565), + PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, b5g6r5, mips_composite_out_reverse_8_0565), + PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, a8r8g8b8, mips_composite_out_reverse_8_8888), + PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, a8b8g8r8, mips_composite_out_reverse_8_8888), + PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, mips_composite_over_reverse_n_8888), + PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, mips_composite_over_reverse_n_8888), + PIXMAN_STD_FAST_PATH (IN, solid, null, a8, mips_composite_in_n_8), + + PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mips_8888_8888), + PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, mips_8888_8888), + PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mips_8888_8888), + PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, mips_8888_8888), + + PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, mips_8888_0565), + PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, b5g6r5, mips_8888_0565), + + PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, x8b8g8r8, mips_0565_8888), + PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, x8r8g8b8, mips_0565_8888), + /* Note: NONE repeat is not supported yet */ + SIMPLE_NEAREST_FAST_PATH_COVER (SRC, r5g6b5, a8r8g8b8, mips_0565_8888), + SIMPLE_NEAREST_FAST_PATH_COVER (SRC, b5g6r5, a8b8g8r8, mips_0565_8888), + SIMPLE_NEAREST_FAST_PATH_PAD (SRC, r5g6b5, a8r8g8b8, mips_0565_8888), + SIMPLE_NEAREST_FAST_PATH_PAD (SRC, b5g6r5, a8b8g8r8, mips_0565_8888), + + PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8r8g8b8, r5g6b5, mips_8888_8_0565), + PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8b8g8r8, b5g6r5, mips_8888_8_0565), + + PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, r5g6b5, r5g6b5, mips_0565_8_0565), + PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, b5g6r5, b5g6r5, mips_0565_8_0565), + + SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, mips_8888_8888), + SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, mips_8888_8888), + SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, mips_8888_8888), + + SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, r5g6b5, mips_8888_0565), + SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, r5g6b5, mips_8888_0565), + + SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, x8r8g8b8, mips_0565_8888), + SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, r5g6b5, mips_0565_0565), + + SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mips_8888_8888), + SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mips_8888_8888), + + SIMPLE_BILINEAR_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, mips_8888_8888), + SIMPLE_BILINEAR_FAST_PATH (ADD, a8r8g8b8, x8r8g8b8, mips_8888_8888), + + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, mips_8888_8_8888), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, mips_8888_8_8888), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, mips_8888_8_8888), + + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, r5g6b5, mips_8888_8_0565), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, x8r8g8b8, r5g6b5, mips_8888_8_0565), + + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, r5g6b5, x8r8g8b8, mips_0565_8_x888), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, r5g6b5, r5g6b5, mips_0565_8_0565), + + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mips_8888_8_8888), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mips_8888_8_8888), + + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, mips_8888_8_8888), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (ADD, a8r8g8b8, x8r8g8b8, mips_8888_8_8888), + { PIXMAN_OP_NONE }, +}; + +static void +mips_dspr2_combine_over_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + if (mask) + pixman_composite_over_8888_8888_8888_asm_mips ( + dest, (uint32_t *)src, (uint32_t *)mask, width); + else + pixman_composite_over_8888_8888_asm_mips ( + dest, (uint32_t *)src, width); +} + +pixman_implementation_t * +_pixman_implementation_create_mips_dspr2 (pixman_implementation_t *fallback) +{ + pixman_implementation_t *imp = + _pixman_implementation_create (fallback, mips_dspr2_fast_paths); + + imp->combine_32[PIXMAN_OP_OVER] = mips_dspr2_combine_over_u; + + imp->blt = mips_dspr2_blt; + imp->fill = mips_dspr2_fill; + + return imp; +} diff --git a/src/pixman/pixman/pixman-mips-dspr2.h b/src/pixman/pixman/pixman-mips-dspr2.h new file mode 100644 index 0000000..955ed70 --- /dev/null +++ b/src/pixman/pixman/pixman-mips-dspr2.h @@ -0,0 +1,438 @@ +/* + * Copyright (c) 2012 + * MIPS Technologies, Inc., California. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Author: Nemanja Lukic (nlukic@mips.com) + */ + +#ifndef PIXMAN_MIPS_DSPR2_H +#define PIXMAN_MIPS_DSPR2_H + +#include "pixman-private.h" +#include "pixman-inlines.h" + +#define SKIP_ZERO_SRC 1 +#define SKIP_ZERO_MASK 2 +#define DO_FAST_MEMCPY 3 + +void +pixman_mips_fast_memcpy (void *dst, void *src, uint32_t n_bytes); +void +pixman_fill_buff16_mips (void *dst, uint32_t n_bytes, uint16_t value); +void +pixman_fill_buff32_mips (void *dst, uint32_t n_bytes, uint32_t value); + +/****************************************************************/ + +#define PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST(flags, name, \ + src_type, src_cnt, \ + dst_type, dst_cnt) \ +void \ +pixman_composite_##name##_asm_mips (dst_type *dst, \ + src_type *src, \ + int32_t w); \ + \ +static void \ +mips_composite_##name (pixman_implementation_t *imp, \ + pixman_composite_info_t *info) \ +{ \ + PIXMAN_COMPOSITE_ARGS (info); \ + dst_type *dst_line, *dst; \ + src_type *src_line, *src; \ + int32_t dst_stride, src_stride; \ + int bpp = PIXMAN_FORMAT_BPP (dest_image->bits.format) / 8; \ + \ + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \ + src_stride, src_line, src_cnt); \ + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type, \ + dst_stride, dst_line, dst_cnt); \ + \ + while (height--) \ + { \ + dst = dst_line; \ + dst_line += dst_stride; \ + src = src_line; \ + src_line += src_stride; \ + \ + if (flags == DO_FAST_MEMCPY) \ + pixman_mips_fast_memcpy (dst, src, width * bpp); \ + else \ + pixman_composite_##name##_asm_mips (dst, src, width); \ + } \ +} + +/****************************************************************/ + +#define PIXMAN_MIPS_BIND_FAST_PATH_N_DST(flags, name, \ + dst_type, dst_cnt) \ +void \ +pixman_composite_##name##_asm_mips (dst_type *dst, \ + uint32_t src, \ + int32_t w); \ + \ +static void \ +mips_composite_##name (pixman_implementation_t *imp, \ + pixman_composite_info_t *info) \ +{ \ + PIXMAN_COMPOSITE_ARGS (info); \ + dst_type *dst_line, *dst; \ + int32_t dst_stride; \ + uint32_t src; \ + \ + src = _pixman_image_get_solid ( \ + imp, src_image, dest_image->bits.format); \ + \ + if ((flags & SKIP_ZERO_SRC) && src == 0) \ + return; \ + \ + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type, \ + dst_stride, dst_line, dst_cnt); \ + \ + while (height--) \ + { \ + dst = dst_line; \ + dst_line += dst_stride; \ + \ + pixman_composite_##name##_asm_mips (dst, src, width); \ + } \ +} + +/*******************************************************************/ + +#define PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST(flags, name, \ + mask_type, mask_cnt, \ + dst_type, dst_cnt) \ +void \ +pixman_composite_##name##_asm_mips (dst_type *dst, \ + uint32_t src, \ + mask_type *mask, \ + int32_t w); \ + \ +static void \ +mips_composite_##name (pixman_implementation_t *imp, \ + pixman_composite_info_t *info) \ +{ \ + PIXMAN_COMPOSITE_ARGS (info); \ + dst_type *dst_line, *dst; \ + mask_type *mask_line, *mask; \ + int32_t dst_stride, mask_stride; \ + uint32_t src; \ + \ + src = _pixman_image_get_solid ( \ + imp, src_image, dest_image->bits.format); \ + \ + if ((flags & SKIP_ZERO_SRC) && src == 0) \ + return; \ + \ + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type, \ + dst_stride, dst_line, dst_cnt); \ + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type, \ + mask_stride, mask_line, mask_cnt); \ + \ + while (height--) \ + { \ + dst = dst_line; \ + dst_line += dst_stride; \ + mask = mask_line; \ + mask_line += mask_stride; \ + pixman_composite_##name##_asm_mips (dst, src, mask, width); \ + } \ +} + +/*******************************************************************/ + +#define PIXMAN_MIPS_BIND_FAST_PATH_SRC_N_DST(flags, name, \ + src_type, src_cnt, \ + dst_type, dst_cnt) \ +void \ +pixman_composite_##name##_asm_mips (dst_type *dst, \ + src_type *src, \ + uint32_t mask, \ + int32_t w); \ + \ +static void \ +mips_composite_##name (pixman_implementation_t *imp, \ + pixman_composite_info_t *info) \ +{ \ + PIXMAN_COMPOSITE_ARGS (info); \ + dst_type *dst_line, *dst; \ + src_type *src_line, *src; \ + int32_t dst_stride, src_stride; \ + uint32_t mask; \ + \ + mask = _pixman_image_get_solid ( \ + imp, mask_image, dest_image->bits.format); \ + \ + if ((flags & SKIP_ZERO_MASK) && mask == 0) \ + return; \ + \ + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type, \ + dst_stride, dst_line, dst_cnt); \ + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \ + src_stride, src_line, src_cnt); \ + \ + while (height--) \ + { \ + dst = dst_line; \ + dst_line += dst_stride; \ + src = src_line; \ + src_line += src_stride; \ + \ + pixman_composite_##name##_asm_mips (dst, src, mask, width); \ + } \ +} + +/************************************************************************/ + +#define PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST(name, src_type, src_cnt, \ + mask_type, mask_cnt, \ + dst_type, dst_cnt) \ +void \ +pixman_composite_##name##_asm_mips (dst_type *dst, \ + src_type *src, \ + mask_type *mask, \ + int32_t w); \ + \ +static void \ +mips_composite_##name (pixman_implementation_t *imp, \ + pixman_composite_info_t *info) \ +{ \ + PIXMAN_COMPOSITE_ARGS (info); \ + dst_type *dst_line, *dst; \ + src_type *src_line, *src; \ + mask_type *mask_line, *mask; \ + int32_t dst_stride, src_stride, mask_stride; \ + \ + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type, \ + dst_stride, dst_line, dst_cnt); \ + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \ + src_stride, src_line, src_cnt); \ + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type, \ + mask_stride, mask_line, mask_cnt); \ + \ + while (height--) \ + { \ + dst = dst_line; \ + dst_line += dst_stride; \ + mask = mask_line; \ + mask_line += mask_stride; \ + src = src_line; \ + src_line += src_stride; \ + pixman_composite_##name##_asm_mips (dst, src, mask, width); \ + } \ +} + +/****************************************************************************/ + +#define PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST(name, op, \ + src_type, dst_type) \ +void \ +pixman_scaled_nearest_scanline_##name##_##op##_asm_mips ( \ + dst_type * dst, \ + const src_type * src, \ + int32_t w, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x); \ + \ +static force_inline void \ +scaled_nearest_scanline_mips_##name##_##op (dst_type * pd, \ + const src_type * ps, \ + int32_t w, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x, \ + pixman_fixed_t max_vx, \ + pixman_bool_t zero_src) \ +{ \ + pixman_scaled_nearest_scanline_##name##_##op##_asm_mips (pd, ps, w, \ + vx, unit_x); \ +} \ + \ +FAST_NEAREST_MAINLOOP (mips_##name##_cover_##op, \ + scaled_nearest_scanline_mips_##name##_##op, \ + src_type, dst_type, COVER) \ +FAST_NEAREST_MAINLOOP (mips_##name##_none_##op, \ + scaled_nearest_scanline_mips_##name##_##op, \ + src_type, dst_type, NONE) \ +FAST_NEAREST_MAINLOOP (mips_##name##_pad_##op, \ + scaled_nearest_scanline_mips_##name##_##op, \ + src_type, dst_type, PAD) + +/* Provide entries for the fast path table */ +#define PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \ + SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \ + SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \ + SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func) + + +/*****************************************************************************/ + +#define PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_A8_DST(flags, name, op, \ + src_type, dst_type) \ +void \ +pixman_scaled_nearest_scanline_##name##_##op##_asm_mips ( \ + dst_type * dst, \ + const src_type * src, \ + const uint8_t * mask, \ + int32_t w, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x); \ + \ +static force_inline void \ +scaled_nearest_scanline_mips_##name##_##op (const uint8_t * mask, \ + dst_type * pd, \ + const src_type * ps, \ + int32_t w, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x, \ + pixman_fixed_t max_vx, \ + pixman_bool_t zero_src) \ +{ \ + if ((flags & SKIP_ZERO_SRC) && zero_src) \ + return; \ + pixman_scaled_nearest_scanline_##name##_##op##_asm_mips (pd, ps, \ + mask, w, \ + vx, unit_x); \ +} \ + \ +FAST_NEAREST_MAINLOOP_COMMON (mips_##name##_cover_##op, \ + scaled_nearest_scanline_mips_##name##_##op, \ + src_type, uint8_t, dst_type, COVER, TRUE, FALSE)\ +FAST_NEAREST_MAINLOOP_COMMON (mips_##name##_none_##op, \ + scaled_nearest_scanline_mips_##name##_##op, \ + src_type, uint8_t, dst_type, NONE, TRUE, FALSE) \ +FAST_NEAREST_MAINLOOP_COMMON (mips_##name##_pad_##op, \ + scaled_nearest_scanline_mips_##name##_##op, \ + src_type, uint8_t, dst_type, PAD, TRUE, FALSE) + +/* Provide entries for the fast path table */ +#define PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func) \ + SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func), \ + SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func), \ + SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func) + +/****************************************************************************/ + +#define PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST(flags, name, op, \ + src_type, dst_type) \ +void \ +pixman_scaled_bilinear_scanline_##name##_##op##_asm_mips( \ + dst_type * dst, \ + const src_type * src_top, \ + const src_type * src_bottom, \ + int32_t w, \ + int wt, \ + int wb, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x); \ +static force_inline void \ +scaled_bilinear_scanline_mips_##name##_##op (dst_type * dst, \ + const uint32_t * mask, \ + const src_type * src_top, \ + const src_type * src_bottom, \ + int32_t w, \ + int wt, \ + int wb, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x, \ + pixman_fixed_t max_vx, \ + pixman_bool_t zero_src) \ +{ \ + if ((flags & SKIP_ZERO_SRC) && zero_src) \ + return; \ + pixman_scaled_bilinear_scanline_##name##_##op##_asm_mips (dst, src_top, \ + src_bottom, w, \ + wt, wb, \ + vx, unit_x); \ +} \ + \ +FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_cover_##op, \ + scaled_bilinear_scanline_mips_##name##_##op, \ + src_type, uint32_t, dst_type, COVER, FLAG_NONE) \ +FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_none_##op, \ + scaled_bilinear_scanline_mips_##name##_##op, \ + src_type, uint32_t, dst_type, NONE, FLAG_NONE) \ +FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_pad_##op, \ + scaled_bilinear_scanline_mips_##name##_##op, \ + src_type, uint32_t, dst_type, PAD, FLAG_NONE) \ +FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_normal_##op, \ + scaled_bilinear_scanline_mips_##name##_##op, \ + src_type, uint32_t, dst_type, NORMAL, \ + FLAG_NONE) + +/*****************************************************************************/ + +#define PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST(flags, name, op, \ + src_type, dst_type) \ +void \ +pixman_scaled_bilinear_scanline_##name##_##op##_asm_mips ( \ + dst_type * dst, \ + const uint8_t * mask, \ + const src_type * top, \ + const src_type * bottom, \ + int wt, \ + int wb, \ + pixman_fixed_t x, \ + pixman_fixed_t ux, \ + int width); \ + \ +static force_inline void \ +scaled_bilinear_scanline_mips_##name##_##op (dst_type * dst, \ + const uint8_t * mask, \ + const src_type * src_top, \ + const src_type * src_bottom, \ + int32_t w, \ + int wt, \ + int wb, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x, \ + pixman_fixed_t max_vx, \ + pixman_bool_t zero_src) \ +{ \ + if ((flags & SKIP_ZERO_SRC) && zero_src) \ + return; \ + pixman_scaled_bilinear_scanline_##name##_##op##_asm_mips ( \ + dst, mask, src_top, src_bottom, wt, wb, vx, unit_x, w); \ +} \ + \ +FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_cover_##op, \ + scaled_bilinear_scanline_mips_##name##_##op, \ + src_type, uint8_t, dst_type, COVER, \ + FLAG_HAVE_NON_SOLID_MASK) \ +FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_none_##op, \ + scaled_bilinear_scanline_mips_##name##_##op, \ + src_type, uint8_t, dst_type, NONE, \ + FLAG_HAVE_NON_SOLID_MASK) \ +FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_pad_##op, \ + scaled_bilinear_scanline_mips_##name##_##op, \ + src_type, uint8_t, dst_type, PAD, \ + FLAG_HAVE_NON_SOLID_MASK) \ +FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_normal_##op, \ + scaled_bilinear_scanline_mips_##name##_##op, \ + src_type, uint8_t, dst_type, NORMAL, \ + FLAG_HAVE_NON_SOLID_MASK) + +#endif //PIXMAN_MIPS_DSPR2_H diff --git a/src/pixman/pixman/pixman-mips-memcpy-asm.S b/src/pixman/pixman/pixman-mips-memcpy-asm.S new file mode 100644 index 0000000..9ad6da5 --- /dev/null +++ b/src/pixman/pixman/pixman-mips-memcpy-asm.S @@ -0,0 +1,382 @@ +/* + * Copyright (c) 2012 + * MIPS Technologies, Inc., California. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "pixman-mips-dspr2-asm.h" + +/* + * This routine could be optimized for MIPS64. The current code only + * uses MIPS32 instructions. + */ + +#ifdef EB +# define LWHI lwl /* high part is left in big-endian */ +# define SWHI swl /* high part is left in big-endian */ +# define LWLO lwr /* low part is right in big-endian */ +# define SWLO swr /* low part is right in big-endian */ +#else +# define LWHI lwr /* high part is right in little-endian */ +# define SWHI swr /* high part is right in little-endian */ +# define LWLO lwl /* low part is left in big-endian */ +# define SWLO swl /* low part is left in big-endian */ +#endif + +LEAF_MIPS32R2(pixman_mips_fast_memcpy) + + slti AT, a2, 8 + bne AT, zero, $last8 + move v0, a0 /* memcpy returns the dst pointer */ + +/* Test if the src and dst are word-aligned, or can be made word-aligned */ + xor t8, a1, a0 + andi t8, t8, 0x3 /* t8 is a0/a1 word-displacement */ + + bne t8, zero, $unaligned + negu a3, a0 + + andi a3, a3, 0x3 /* we need to copy a3 bytes to make a0/a1 aligned */ + beq a3, zero, $chk16w /* when a3=0 then the dst (a0) is word-aligned */ + subu a2, a2, a3 /* now a2 is the remining bytes count */ + + LWHI t8, 0(a1) + addu a1, a1, a3 + SWHI t8, 0(a0) + addu a0, a0, a3 + +/* Now the dst/src are mutually word-aligned with word-aligned addresses */ +$chk16w: andi t8, a2, 0x3f /* any whole 64-byte chunks? */ + /* t8 is the byte count after 64-byte chunks */ + + beq a2, t8, $chk8w /* if a2==t8, no 64-byte chunks */ + /* There will be at most 1 32-byte chunk after it */ + subu a3, a2, t8 /* subtract from a2 the reminder */ + /* Here a3 counts bytes in 16w chunks */ + addu a3, a0, a3 /* Now a3 is the final dst after 64-byte chunks */ + + addu t0, a0, a2 /* t0 is the "past the end" address */ + +/* + * When in the loop we exercise "pref 30, x(a0)", the a0+x should not be past + * the "t0-32" address + * This means: for x=128 the last "safe" a0 address is "t0-160" + * Alternatively, for x=64 the last "safe" a0 address is "t0-96" + * In the current version we use "pref 30, 128(a0)", so "t0-160" is the limit + */ + subu t9, t0, 160 /* t9 is the "last safe pref 30, 128(a0)" address */ + + pref 0, 0(a1) /* bring the first line of src, addr 0 */ + pref 0, 32(a1) /* bring the second line of src, addr 32 */ + pref 0, 64(a1) /* bring the third line of src, addr 64 */ + pref 30, 32(a0) /* safe, as we have at least 64 bytes ahead */ +/* In case the a0 > t9 don't use "pref 30" at all */ + sgtu v1, a0, t9 + bgtz v1, $loop16w /* skip "pref 30, 64(a0)" for too short arrays */ + nop +/* otherwise, start with using pref30 */ + pref 30, 64(a0) +$loop16w: + pref 0, 96(a1) + lw t0, 0(a1) + bgtz v1, $skip_pref30_96 /* skip "pref 30, 96(a0)" */ + lw t1, 4(a1) + pref 30, 96(a0) /* continue setting up the dest, addr 96 */ +$skip_pref30_96: + lw t2, 8(a1) + lw t3, 12(a1) + lw t4, 16(a1) + lw t5, 20(a1) + lw t6, 24(a1) + lw t7, 28(a1) + pref 0, 128(a1) /* bring the next lines of src, addr 128 */ + + sw t0, 0(a0) + sw t1, 4(a0) + sw t2, 8(a0) + sw t3, 12(a0) + sw t4, 16(a0) + sw t5, 20(a0) + sw t6, 24(a0) + sw t7, 28(a0) + + lw t0, 32(a1) + bgtz v1, $skip_pref30_128 /* skip "pref 30, 128(a0)" */ + lw t1, 36(a1) + pref 30, 128(a0) /* continue setting up the dest, addr 128 */ +$skip_pref30_128: + lw t2, 40(a1) + lw t3, 44(a1) + lw t4, 48(a1) + lw t5, 52(a1) + lw t6, 56(a1) + lw t7, 60(a1) + pref 0, 160(a1) /* bring the next lines of src, addr 160 */ + + sw t0, 32(a0) + sw t1, 36(a0) + sw t2, 40(a0) + sw t3, 44(a0) + sw t4, 48(a0) + sw t5, 52(a0) + sw t6, 56(a0) + sw t7, 60(a0) + + addiu a0, a0, 64 /* adding 64 to dest */ + sgtu v1, a0, t9 + bne a0, a3, $loop16w + addiu a1, a1, 64 /* adding 64 to src */ + move a2, t8 + +/* Here we have src and dest word-aligned but less than 64-bytes to go */ + +$chk8w: + pref 0, 0x0(a1) + andi t8, a2, 0x1f /* is there a 32-byte chunk? */ + /* the t8 is the reminder count past 32-bytes */ + beq a2, t8, $chk1w /* when a2=t8, no 32-byte chunk */ + nop + + lw t0, 0(a1) + lw t1, 4(a1) + lw t2, 8(a1) + lw t3, 12(a1) + lw t4, 16(a1) + lw t5, 20(a1) + lw t6, 24(a1) + lw t7, 28(a1) + addiu a1, a1, 32 + + sw t0, 0(a0) + sw t1, 4(a0) + sw t2, 8(a0) + sw t3, 12(a0) + sw t4, 16(a0) + sw t5, 20(a0) + sw t6, 24(a0) + sw t7, 28(a0) + addiu a0, a0, 32 + +$chk1w: + andi a2, t8, 0x3 /* now a2 is the reminder past 1w chunks */ + beq a2, t8, $last8 + subu a3, t8, a2 /* a3 is count of bytes in 1w chunks */ + addu a3, a0, a3 /* now a3 is the dst address past the 1w chunks */ + +/* copying in words (4-byte chunks) */ +$wordCopy_loop: + lw t3, 0(a1) /* the first t3 may be equal t0 ... optimize? */ + addiu a1, a1, 4 + addiu a0, a0, 4 + bne a0, a3, $wordCopy_loop + sw t3, -4(a0) + +/* For the last (<8) bytes */ +$last8: + blez a2, leave + addu a3, a0, a2 /* a3 is the last dst address */ +$last8loop: + lb v1, 0(a1) + addiu a1, a1, 1 + addiu a0, a0, 1 + bne a0, a3, $last8loop + sb v1, -1(a0) + +leave: j ra + nop + +/* + * UNALIGNED case + */ + +$unaligned: + /* got here with a3="negu a0" */ + andi a3, a3, 0x3 /* test if the a0 is word aligned */ + beqz a3, $ua_chk16w + subu a2, a2, a3 /* bytes left after initial a3 bytes */ + + LWHI v1, 0(a1) + LWLO v1, 3(a1) + addu a1, a1, a3 /* a3 may be here 1, 2 or 3 */ + SWHI v1, 0(a0) + addu a0, a0, a3 /* below the dst will be word aligned (NOTE1) */ + +$ua_chk16w: andi t8, a2, 0x3f /* any whole 64-byte chunks? */ + /* t8 is the byte count after 64-byte chunks */ + beq a2, t8, $ua_chk8w /* if a2==t8, no 64-byte chunks */ + /* There will be at most 1 32-byte chunk after it */ + subu a3, a2, t8 /* subtract from a2 the reminder */ + /* Here a3 counts bytes in 16w chunks */ + addu a3, a0, a3 /* Now a3 is the final dst after 64-byte chunks */ + + addu t0, a0, a2 /* t0 is the "past the end" address */ + + subu t9, t0, 160 /* t9 is the "last safe pref 30, 128(a0)" address */ + + pref 0, 0(a1) /* bring the first line of src, addr 0 */ + pref 0, 32(a1) /* bring the second line of src, addr 32 */ + pref 0, 64(a1) /* bring the third line of src, addr 64 */ + pref 30, 32(a0) /* safe, as we have at least 64 bytes ahead */ +/* In case the a0 > t9 don't use "pref 30" at all */ + sgtu v1, a0, t9 + bgtz v1, $ua_loop16w /* skip "pref 30, 64(a0)" for too short arrays */ + nop +/* otherwise, start with using pref30 */ + pref 30, 64(a0) +$ua_loop16w: + pref 0, 96(a1) + LWHI t0, 0(a1) + LWLO t0, 3(a1) + LWHI t1, 4(a1) + bgtz v1, $ua_skip_pref30_96 + LWLO t1, 7(a1) + pref 30, 96(a0) /* continue setting up the dest, addr 96 */ +$ua_skip_pref30_96: + LWHI t2, 8(a1) + LWLO t2, 11(a1) + LWHI t3, 12(a1) + LWLO t3, 15(a1) + LWHI t4, 16(a1) + LWLO t4, 19(a1) + LWHI t5, 20(a1) + LWLO t5, 23(a1) + LWHI t6, 24(a1) + LWLO t6, 27(a1) + LWHI t7, 28(a1) + LWLO t7, 31(a1) + pref 0, 128(a1) /* bring the next lines of src, addr 128 */ + + sw t0, 0(a0) + sw t1, 4(a0) + sw t2, 8(a0) + sw t3, 12(a0) + sw t4, 16(a0) + sw t5, 20(a0) + sw t6, 24(a0) + sw t7, 28(a0) + + LWHI t0, 32(a1) + LWLO t0, 35(a1) + LWHI t1, 36(a1) + bgtz v1, $ua_skip_pref30_128 + LWLO t1, 39(a1) + pref 30, 128(a0) /* continue setting up the dest, addr 128 */ +$ua_skip_pref30_128: + LWHI t2, 40(a1) + LWLO t2, 43(a1) + LWHI t3, 44(a1) + LWLO t3, 47(a1) + LWHI t4, 48(a1) + LWLO t4, 51(a1) + LWHI t5, 52(a1) + LWLO t5, 55(a1) + LWHI t6, 56(a1) + LWLO t6, 59(a1) + LWHI t7, 60(a1) + LWLO t7, 63(a1) + pref 0, 160(a1) /* bring the next lines of src, addr 160 */ + + sw t0, 32(a0) + sw t1, 36(a0) + sw t2, 40(a0) + sw t3, 44(a0) + sw t4, 48(a0) + sw t5, 52(a0) + sw t6, 56(a0) + sw t7, 60(a0) + + addiu a0, a0, 64 /* adding 64 to dest */ + sgtu v1, a0, t9 + bne a0, a3, $ua_loop16w + addiu a1, a1, 64 /* adding 64 to src */ + move a2, t8 + +/* Here we have src and dest word-aligned but less than 64-bytes to go */ + +$ua_chk8w: + pref 0, 0x0(a1) + andi t8, a2, 0x1f /* is there a 32-byte chunk? */ + /* the t8 is the reminder count */ + beq a2, t8, $ua_chk1w /* when a2=t8, no 32-byte chunk */ + + LWHI t0, 0(a1) + LWLO t0, 3(a1) + LWHI t1, 4(a1) + LWLO t1, 7(a1) + LWHI t2, 8(a1) + LWLO t2, 11(a1) + LWHI t3, 12(a1) + LWLO t3, 15(a1) + LWHI t4, 16(a1) + LWLO t4, 19(a1) + LWHI t5, 20(a1) + LWLO t5, 23(a1) + LWHI t6, 24(a1) + LWLO t6, 27(a1) + LWHI t7, 28(a1) + LWLO t7, 31(a1) + addiu a1, a1, 32 + + sw t0, 0(a0) + sw t1, 4(a0) + sw t2, 8(a0) + sw t3, 12(a0) + sw t4, 16(a0) + sw t5, 20(a0) + sw t6, 24(a0) + sw t7, 28(a0) + addiu a0, a0, 32 + +$ua_chk1w: + andi a2, t8, 0x3 /* now a2 is the reminder past 1w chunks */ + beq a2, t8, $ua_smallCopy + subu a3, t8, a2 /* a3 is count of bytes in 1w chunks */ + addu a3, a0, a3 /* now a3 is the dst address past the 1w chunks */ + +/* copying in words (4-byte chunks) */ +$ua_wordCopy_loop: + LWHI v1, 0(a1) + LWLO v1, 3(a1) + addiu a1, a1, 4 + addiu a0, a0, 4 /* note: dst=a0 is word aligned here, see NOTE1 */ + bne a0, a3, $ua_wordCopy_loop + sw v1, -4(a0) + +/* Now less than 4 bytes (value in a2) left to copy */ +$ua_smallCopy: + beqz a2, leave + addu a3, a0, a2 /* a3 is the last dst address */ +$ua_smallCopy_loop: + lb v1, 0(a1) + addiu a1, a1, 1 + addiu a0, a0, 1 + bne a0, a3, $ua_smallCopy_loop + sb v1, -1(a0) + + j ra + nop + +END(pixman_mips_fast_memcpy) diff --git a/src/pixman/pixman/pixman-mips.c b/src/pixman/pixman/pixman-mips.c new file mode 100644 index 0000000..3048813 --- /dev/null +++ b/src/pixman/pixman/pixman-mips.c @@ -0,0 +1,94 @@ +/* + * Copyright © 2000 SuSE, Inc. + * Copyright © 2007 Red Hat, Inc. + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of SuSE not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. SuSE makes no representations about the + * suitability of this software for any purpose. It is provided "as is" + * without express or implied warranty. + * + * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE + * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include "pixman-private.h" + +#if defined(USE_MIPS_DSPR2) || defined(USE_LOONGSON_MMI) + +#include <string.h> +#include <stdlib.h> + +static pixman_bool_t +have_feature (const char *search_string) +{ +#if defined (__linux__) /* linux ELF */ + /* Simple detection of MIPS features at runtime for Linux. + * It is based on /proc/cpuinfo, which reveals hardware configuration + * to user-space applications. According to MIPS (early 2010), no similar + * facility is universally available on the MIPS architectures, so it's up + * to individual OSes to provide such. + */ + const char *file_name = "/proc/cpuinfo"; + char cpuinfo_line[256]; + FILE *f = NULL; + + if ((f = fopen (file_name, "r")) == NULL) + return FALSE; + + while (fgets (cpuinfo_line, sizeof (cpuinfo_line), f) != NULL) + { + if (strstr (cpuinfo_line, search_string) != NULL) + { + fclose (f); + return TRUE; + } + } + + fclose (f); +#endif + + /* Did not find string in the proc file, or not Linux ELF. */ + return FALSE; +} + +#endif + +pixman_implementation_t * +_pixman_mips_get_implementations (pixman_implementation_t *imp) +{ +#ifdef USE_LOONGSON_MMI + /* I really don't know if some Loongson CPUs don't have MMI. */ + if (!_pixman_disabled ("loongson-mmi") && have_feature ("Loongson")) + imp = _pixman_implementation_create_mmx (imp); +#endif + +#ifdef USE_MIPS_DSPR2 + if (!_pixman_disabled ("mips-dspr2")) + { + int already_compiling_everything_for_dspr2 = 0; +#if defined(__mips_dsp) && (__mips_dsp_rev >= 2) + already_compiling_everything_for_dspr2 = 1; +#endif + if (already_compiling_everything_for_dspr2 || + /* Only currently available MIPS core that supports DSPr2 is 74K. */ + have_feature ("MIPS 74K")) + { + imp = _pixman_implementation_create_mips_dspr2 (imp); + } + } +#endif + + return imp; +} diff --git a/src/pixman/pixman/pixman-mmx.c b/src/pixman/pixman/pixman-mmx.c new file mode 100644 index 0000000..f9a92ce --- /dev/null +++ b/src/pixman/pixman/pixman-mmx.c @@ -0,0 +1,4055 @@ +/* + * Copyright © 2004, 2005 Red Hat, Inc. + * Copyright © 2004 Nicholas Miell + * Copyright © 2005 Trolltech AS + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of Red Hat not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. Red Hat makes no representations about the + * suitability of this software for any purpose. It is provided "as is" + * without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + * + * Author: Søren Sandmann (sandmann@redhat.com) + * Minor Improvements: Nicholas Miell (nmiell@gmail.com) + * MMX code paths for fbcompose.c by Lars Knoll (lars@trolltech.com) + * + * Based on work by Owen Taylor + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#if defined USE_X86_MMX || defined USE_ARM_IWMMXT || defined USE_LOONGSON_MMI + +#ifdef USE_LOONGSON_MMI +#include <loongson-mmintrin.h> +#else +#include <mmintrin.h> +#endif +#include "pixman-private.h" +#include "pixman-combine32.h" +#include "pixman-inlines.h" + +#ifdef VERBOSE +#define CHECKPOINT() error_f ("at %s %d\n", __FUNCTION__, __LINE__) +#else +#define CHECKPOINT() +#endif + +#if defined USE_ARM_IWMMXT && __GNUC__ == 4 && __GNUC_MINOR__ < 8 +/* Empty the multimedia state. For some reason, ARM's mmintrin.h doesn't provide this. */ +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_empty (void) +{ + +} +#endif + +#ifdef USE_X86_MMX +# if (defined(__SUNPRO_C) || defined(_MSC_VER) || defined(_WIN64)) +# include <xmmintrin.h> +# else +/* We have to compile with -msse to use xmmintrin.h, but that causes SSE + * instructions to be generated that we don't want. Just duplicate the + * functions we want to use. */ +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_movemask_pi8 (__m64 __A) +{ + int ret; + + asm ("pmovmskb %1, %0\n\t" + : "=r" (ret) + : "y" (__A) + ); + + return ret; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mulhi_pu16 (__m64 __A, __m64 __B) +{ + asm ("pmulhuw %1, %0\n\t" + : "+y" (__A) + : "y" (__B) + ); + return __A; +} + +# ifdef __OPTIMIZE__ +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_shuffle_pi16 (__m64 __A, int8_t const __N) +{ + __m64 ret; + + asm ("pshufw %2, %1, %0\n\t" + : "=y" (ret) + : "y" (__A), "K" (__N) + ); + + return ret; +} +# else +# define _mm_shuffle_pi16(A, N) \ + ({ \ + __m64 ret; \ + \ + asm ("pshufw %2, %1, %0\n\t" \ + : "=y" (ret) \ + : "y" (A), "K" ((const int8_t)N) \ + ); \ + \ + ret; \ + }) +# endif +# endif +#endif + +#ifndef _MSC_VER +#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \ + (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0)) +#endif + +/* Notes about writing mmx code + * + * give memory operands as the second operand. If you give it as the + * first, gcc will first load it into a register, then use that + * register + * + * ie. use + * + * _mm_mullo_pi16 (x, mmx_constant); + * + * not + * + * _mm_mullo_pi16 (mmx_constant, x); + * + * Also try to minimize dependencies. i.e. when you need a value, try + * to calculate it from a value that was calculated as early as + * possible. + */ + +/* --------------- MMX primitives ------------------------------------- */ + +/* If __m64 is defined as a struct or union, then define M64_MEMBER to be + * the name of the member used to access the data. + * If __m64 requires using mm_cvt* intrinsics functions to convert between + * uint64_t and __m64 values, then define USE_CVT_INTRINSICS. + * If __m64 and uint64_t values can just be cast to each other directly, + * then define USE_M64_CASTS. + * If __m64 is a double datatype, then define USE_M64_DOUBLE. + */ +#ifdef _MSC_VER +# define M64_MEMBER m64_u64 +#elif defined(__ICC) +# define USE_CVT_INTRINSICS +#elif defined(USE_LOONGSON_MMI) +# define USE_M64_DOUBLE +#elif defined(__GNUC__) +# define USE_M64_CASTS +#elif defined(__SUNPRO_C) +# if (__SUNPRO_C >= 0x5120) && !defined(__NOVECTORSIZE__) +/* Solaris Studio 12.3 (Sun C 5.12) introduces __attribute__(__vector_size__) + * support, and defaults to using it to define __m64, unless __NOVECTORSIZE__ + * is defined. If it is used, then the mm_cvt* intrinsics must be used. + */ +# define USE_CVT_INTRINSICS +# else +/* For Studio 12.2 or older, or when __attribute__(__vector_size__) is + * disabled, __m64 is defined as a struct containing "unsigned long long l_". + */ +# define M64_MEMBER l_ +# endif +#endif + +#if defined(USE_M64_CASTS) || defined(USE_CVT_INTRINSICS) || defined(USE_M64_DOUBLE) +typedef uint64_t mmxdatafield; +#else +typedef __m64 mmxdatafield; +#endif + +typedef struct +{ + mmxdatafield mmx_4x00ff; + mmxdatafield mmx_4x0080; + mmxdatafield mmx_565_rgb; + mmxdatafield mmx_565_unpack_multiplier; + mmxdatafield mmx_565_pack_multiplier; + mmxdatafield mmx_565_r; + mmxdatafield mmx_565_g; + mmxdatafield mmx_565_b; + mmxdatafield mmx_packed_565_rb; + mmxdatafield mmx_packed_565_g; + mmxdatafield mmx_expand_565_g; + mmxdatafield mmx_expand_565_b; + mmxdatafield mmx_expand_565_r; +#ifndef USE_LOONGSON_MMI + mmxdatafield mmx_mask_0; + mmxdatafield mmx_mask_1; + mmxdatafield mmx_mask_2; + mmxdatafield mmx_mask_3; +#endif + mmxdatafield mmx_full_alpha; + mmxdatafield mmx_4x0101; + mmxdatafield mmx_ff000000; +} mmx_data_t; + +#if defined(_MSC_VER) +# define MMXDATA_INIT(field, val) { val ## UI64 } +#elif defined(M64_MEMBER) /* __m64 is a struct, not an integral type */ +# define MMXDATA_INIT(field, val) field = { val ## ULL } +#else /* mmxdatafield is an integral type */ +# define MMXDATA_INIT(field, val) field = val ## ULL +#endif + +static const mmx_data_t c = +{ + MMXDATA_INIT (.mmx_4x00ff, 0x00ff00ff00ff00ff), + MMXDATA_INIT (.mmx_4x0080, 0x0080008000800080), + MMXDATA_INIT (.mmx_565_rgb, 0x000001f0003f001f), + MMXDATA_INIT (.mmx_565_unpack_multiplier, 0x0000008404100840), + MMXDATA_INIT (.mmx_565_pack_multiplier, 0x2000000420000004), + MMXDATA_INIT (.mmx_565_r, 0x000000f800000000), + MMXDATA_INIT (.mmx_565_g, 0x0000000000fc0000), + MMXDATA_INIT (.mmx_565_b, 0x00000000000000f8), + MMXDATA_INIT (.mmx_packed_565_rb, 0x00f800f800f800f8), + MMXDATA_INIT (.mmx_packed_565_g, 0x0000fc000000fc00), + MMXDATA_INIT (.mmx_expand_565_g, 0x07e007e007e007e0), + MMXDATA_INIT (.mmx_expand_565_b, 0x001f001f001f001f), + MMXDATA_INIT (.mmx_expand_565_r, 0xf800f800f800f800), +#ifndef USE_LOONGSON_MMI + MMXDATA_INIT (.mmx_mask_0, 0xffffffffffff0000), + MMXDATA_INIT (.mmx_mask_1, 0xffffffff0000ffff), + MMXDATA_INIT (.mmx_mask_2, 0xffff0000ffffffff), + MMXDATA_INIT (.mmx_mask_3, 0x0000ffffffffffff), +#endif + MMXDATA_INIT (.mmx_full_alpha, 0x00ff000000000000), + MMXDATA_INIT (.mmx_4x0101, 0x0101010101010101), + MMXDATA_INIT (.mmx_ff000000, 0xff000000ff000000), +}; + +#ifdef USE_CVT_INTRINSICS +# define MC(x) to_m64 (c.mmx_ ## x) +#elif defined(USE_M64_CASTS) +# define MC(x) ((__m64)c.mmx_ ## x) +#elif defined(USE_M64_DOUBLE) +# define MC(x) (*(__m64 *)&c.mmx_ ## x) +#else +# define MC(x) c.mmx_ ## x +#endif + +static force_inline __m64 +to_m64 (uint64_t x) +{ +#ifdef USE_CVT_INTRINSICS + return _mm_cvtsi64_m64 (x); +#elif defined M64_MEMBER /* __m64 is a struct, not an integral type */ + __m64 res; + + res.M64_MEMBER = x; + return res; +#elif defined USE_M64_DOUBLE + return *(__m64 *)&x; +#else /* USE_M64_CASTS */ + return (__m64)x; +#endif +} + +static force_inline uint64_t +to_uint64 (__m64 x) +{ +#ifdef USE_CVT_INTRINSICS + return _mm_cvtm64_si64 (x); +#elif defined M64_MEMBER /* __m64 is a struct, not an integral type */ + uint64_t res = x.M64_MEMBER; + return res; +#elif defined USE_M64_DOUBLE + return *(uint64_t *)&x; +#else /* USE_M64_CASTS */ + return (uint64_t)x; +#endif +} + +static force_inline __m64 +shift (__m64 v, + int s) +{ + if (s > 0) + return _mm_slli_si64 (v, s); + else if (s < 0) + return _mm_srli_si64 (v, -s); + else + return v; +} + +static force_inline __m64 +negate (__m64 mask) +{ + return _mm_xor_si64 (mask, MC (4x00ff)); +} + +/* Computes the product of two unsigned fixed-point 8-bit values from 0 to 1 + * and maps its result to the same range. + * + * Jim Blinn gives multiple ways to compute this in "Jim Blinn's Corner: + * Notation, Notation, Notation", the first of which is + * + * prod(a, b) = (a * b + 128) / 255. + * + * By approximating the division by 255 as 257/65536 it can be replaced by a + * multiply and a right shift. This is the implementation that we use in + * pix_multiply(), but we _mm_mulhi_pu16() by 257 (part of SSE1 or Extended + * 3DNow!, and unavailable at the time of the book's publication) to perform + * the multiplication and right shift in a single operation. + * + * prod(a, b) = ((a * b + 128) * 257) >> 16. + * + * A third way (how pix_multiply() was implemented prior to 14208344) exists + * also that performs the multiplication by 257 with adds and shifts. + * + * Where temp = a * b + 128 + * + * prod(a, b) = (temp + (temp >> 8)) >> 8. + */ +static force_inline __m64 +pix_multiply (__m64 a, __m64 b) +{ + __m64 res; + + res = _mm_mullo_pi16 (a, b); + res = _mm_adds_pu16 (res, MC (4x0080)); + res = _mm_mulhi_pu16 (res, MC (4x0101)); + + return res; +} + +static force_inline __m64 +pix_add (__m64 a, __m64 b) +{ + return _mm_adds_pu8 (a, b); +} + +static force_inline __m64 +expand_alpha (__m64 pixel) +{ + return _mm_shuffle_pi16 (pixel, _MM_SHUFFLE (3, 3, 3, 3)); +} + +static force_inline __m64 +expand_alpha_rev (__m64 pixel) +{ + return _mm_shuffle_pi16 (pixel, _MM_SHUFFLE (0, 0, 0, 0)); +} + +static force_inline __m64 +invert_colors (__m64 pixel) +{ + return _mm_shuffle_pi16 (pixel, _MM_SHUFFLE (3, 0, 1, 2)); +} + +static force_inline __m64 +over (__m64 src, + __m64 srca, + __m64 dest) +{ + return _mm_adds_pu8 (src, pix_multiply (dest, negate (srca))); +} + +static force_inline __m64 +over_rev_non_pre (__m64 src, __m64 dest) +{ + __m64 srca = expand_alpha (src); + __m64 srcfaaa = _mm_or_si64 (srca, MC (full_alpha)); + + return over (pix_multiply (invert_colors (src), srcfaaa), srca, dest); +} + +static force_inline __m64 +in (__m64 src, __m64 mask) +{ + return pix_multiply (src, mask); +} + +#ifndef _MSC_VER +static force_inline __m64 +in_over (__m64 src, __m64 srca, __m64 mask, __m64 dest) +{ + return over (in (src, mask), pix_multiply (srca, mask), dest); +} + +#else + +#define in_over(src, srca, mask, dest) \ + over (in (src, mask), pix_multiply (srca, mask), dest) + +#endif + +/* Elemental unaligned loads */ + +static force_inline __m64 ldq_u(__m64 *p) +{ +#ifdef USE_X86_MMX + /* x86's alignment restrictions are very relaxed. */ + return *(__m64 *)p; +#elif defined USE_ARM_IWMMXT + int align = (uintptr_t)p & 7; + __m64 *aligned_p; + if (align == 0) + return *p; + aligned_p = (__m64 *)((uintptr_t)p & ~7); + return (__m64) _mm_align_si64 (aligned_p[0], aligned_p[1], align); +#else + struct __una_u64 { __m64 x __attribute__((packed)); }; + const struct __una_u64 *ptr = (const struct __una_u64 *) p; + return (__m64) ptr->x; +#endif +} + +static force_inline uint32_t ldl_u(const uint32_t *p) +{ +#ifdef USE_X86_MMX + /* x86's alignment restrictions are very relaxed. */ + return *p; +#else + struct __una_u32 { uint32_t x __attribute__((packed)); }; + const struct __una_u32 *ptr = (const struct __una_u32 *) p; + return ptr->x; +#endif +} + +static force_inline __m64 +load (const uint32_t *v) +{ +#ifdef USE_LOONGSON_MMI + __m64 ret; + asm ("lwc1 %0, %1\n\t" + : "=f" (ret) + : "m" (*v) + ); + return ret; +#else + return _mm_cvtsi32_si64 (*v); +#endif +} + +static force_inline __m64 +load8888 (const uint32_t *v) +{ +#ifdef USE_LOONGSON_MMI + return _mm_unpacklo_pi8_f (*(__m32 *)v, _mm_setzero_si64 ()); +#else + return _mm_unpacklo_pi8 (load (v), _mm_setzero_si64 ()); +#endif +} + +static force_inline __m64 +load8888u (const uint32_t *v) +{ + uint32_t l = ldl_u (v); + return load8888 (&l); +} + +static force_inline __m64 +pack8888 (__m64 lo, __m64 hi) +{ + return _mm_packs_pu16 (lo, hi); +} + +static force_inline void +store (uint32_t *dest, __m64 v) +{ +#ifdef USE_LOONGSON_MMI + asm ("swc1 %1, %0\n\t" + : "=m" (*dest) + : "f" (v) + : "memory" + ); +#else + *dest = _mm_cvtsi64_si32 (v); +#endif +} + +static force_inline void +store8888 (uint32_t *dest, __m64 v) +{ + v = pack8888 (v, _mm_setzero_si64 ()); + store (dest, v); +} + +static force_inline pixman_bool_t +is_equal (__m64 a, __m64 b) +{ +#ifdef USE_LOONGSON_MMI + /* __m64 is double, we can compare directly. */ + return a == b; +#else + return _mm_movemask_pi8 (_mm_cmpeq_pi8 (a, b)) == 0xff; +#endif +} + +static force_inline pixman_bool_t +is_opaque (__m64 v) +{ +#ifdef USE_LOONGSON_MMI + return is_equal (_mm_and_si64 (v, MC (full_alpha)), MC (full_alpha)); +#else + __m64 ffs = _mm_cmpeq_pi8 (v, v); + return (_mm_movemask_pi8 (_mm_cmpeq_pi8 (v, ffs)) & 0x40); +#endif +} + +static force_inline pixman_bool_t +is_zero (__m64 v) +{ + return is_equal (v, _mm_setzero_si64 ()); +} + +/* Expand 16 bits positioned at @pos (0-3) of a mmx register into + * + * 00RR00GG00BB + * + * --- Expanding 565 in the low word --- + * + * m = (m << (32 - 3)) | (m << (16 - 5)) | m; + * m = m & (01f0003f001f); + * m = m * (008404100840); + * m = m >> 8; + * + * Note the trick here - the top word is shifted by another nibble to + * avoid it bumping into the middle word + */ +static force_inline __m64 +expand565 (__m64 pixel, int pos) +{ + __m64 p = pixel; + __m64 t1, t2; + + /* move pixel to low 16 bit and zero the rest */ +#ifdef USE_LOONGSON_MMI + p = loongson_extract_pi16 (p, pos); +#else + p = shift (shift (p, (3 - pos) * 16), -48); +#endif + + t1 = shift (p, 36 - 11); + t2 = shift (p, 16 - 5); + + p = _mm_or_si64 (t1, p); + p = _mm_or_si64 (t2, p); + p = _mm_and_si64 (p, MC (565_rgb)); + + pixel = _mm_mullo_pi16 (p, MC (565_unpack_multiplier)); + return _mm_srli_pi16 (pixel, 8); +} + +/* Expand 4 16 bit pixels in an mmx register into two mmx registers of + * + * AARRGGBBRRGGBB + */ +static force_inline void +expand_4xpacked565 (__m64 vin, __m64 *vout0, __m64 *vout1, int full_alpha) +{ + __m64 t0, t1, alpha = _mm_setzero_si64 (); + __m64 r = _mm_and_si64 (vin, MC (expand_565_r)); + __m64 g = _mm_and_si64 (vin, MC (expand_565_g)); + __m64 b = _mm_and_si64 (vin, MC (expand_565_b)); + if (full_alpha) + alpha = _mm_cmpeq_pi32 (alpha, alpha); + + /* Replicate high bits into empty low bits. */ + r = _mm_or_si64 (_mm_srli_pi16 (r, 8), _mm_srli_pi16 (r, 13)); + g = _mm_or_si64 (_mm_srli_pi16 (g, 3), _mm_srli_pi16 (g, 9)); + b = _mm_or_si64 (_mm_slli_pi16 (b, 3), _mm_srli_pi16 (b, 2)); + + r = _mm_packs_pu16 (r, _mm_setzero_si64 ()); /* 00 00 00 00 R3 R2 R1 R0 */ + g = _mm_packs_pu16 (g, _mm_setzero_si64 ()); /* 00 00 00 00 G3 G2 G1 G0 */ + b = _mm_packs_pu16 (b, _mm_setzero_si64 ()); /* 00 00 00 00 B3 B2 B1 B0 */ + + t1 = _mm_unpacklo_pi8 (r, alpha); /* A3 R3 A2 R2 A1 R1 A0 R0 */ + t0 = _mm_unpacklo_pi8 (b, g); /* G3 B3 G2 B2 G1 B1 G0 B0 */ + + *vout0 = _mm_unpacklo_pi16 (t0, t1); /* A1 R1 G1 B1 A0 R0 G0 B0 */ + *vout1 = _mm_unpackhi_pi16 (t0, t1); /* A3 R3 G3 B3 A2 R2 G2 B2 */ +} + +static force_inline __m64 +expand8888 (__m64 in, int pos) +{ + if (pos == 0) + return _mm_unpacklo_pi8 (in, _mm_setzero_si64 ()); + else + return _mm_unpackhi_pi8 (in, _mm_setzero_si64 ()); +} + +static force_inline __m64 +expandx888 (__m64 in, int pos) +{ + return _mm_or_si64 (expand8888 (in, pos), MC (full_alpha)); +} + +static force_inline void +expand_4x565 (__m64 vin, __m64 *vout0, __m64 *vout1, __m64 *vout2, __m64 *vout3, int full_alpha) +{ + __m64 v0, v1; + expand_4xpacked565 (vin, &v0, &v1, full_alpha); + *vout0 = expand8888 (v0, 0); + *vout1 = expand8888 (v0, 1); + *vout2 = expand8888 (v1, 0); + *vout3 = expand8888 (v1, 1); +} + +static force_inline __m64 +pack_565 (__m64 pixel, __m64 target, int pos) +{ + __m64 p = pixel; + __m64 t = target; + __m64 r, g, b; + + r = _mm_and_si64 (p, MC (565_r)); + g = _mm_and_si64 (p, MC (565_g)); + b = _mm_and_si64 (p, MC (565_b)); + +#ifdef USE_LOONGSON_MMI + r = shift (r, -(32 - 8)); + g = shift (g, -(16 - 3)); + b = shift (b, -(0 + 3)); + + p = _mm_or_si64 (r, g); + p = _mm_or_si64 (p, b); + return loongson_insert_pi16 (t, p, pos); +#else + r = shift (r, -(32 - 8) + pos * 16); + g = shift (g, -(16 - 3) + pos * 16); + b = shift (b, -(0 + 3) + pos * 16); + + if (pos == 0) + t = _mm_and_si64 (t, MC (mask_0)); + else if (pos == 1) + t = _mm_and_si64 (t, MC (mask_1)); + else if (pos == 2) + t = _mm_and_si64 (t, MC (mask_2)); + else if (pos == 3) + t = _mm_and_si64 (t, MC (mask_3)); + + p = _mm_or_si64 (r, t); + p = _mm_or_si64 (g, p); + + return _mm_or_si64 (b, p); +#endif +} + +static force_inline __m64 +pack_4xpacked565 (__m64 a, __m64 b) +{ + __m64 rb0 = _mm_and_si64 (a, MC (packed_565_rb)); + __m64 rb1 = _mm_and_si64 (b, MC (packed_565_rb)); + + __m64 t0 = _mm_madd_pi16 (rb0, MC (565_pack_multiplier)); + __m64 t1 = _mm_madd_pi16 (rb1, MC (565_pack_multiplier)); + + __m64 g0 = _mm_and_si64 (a, MC (packed_565_g)); + __m64 g1 = _mm_and_si64 (b, MC (packed_565_g)); + + t0 = _mm_or_si64 (t0, g0); + t1 = _mm_or_si64 (t1, g1); + + t0 = shift(t0, -5); +#ifdef USE_ARM_IWMMXT + t1 = shift(t1, -5); + return _mm_packs_pu32 (t0, t1); +#else + t1 = shift(t1, -5 + 16); + return _mm_shuffle_pi16 (_mm_or_si64 (t0, t1), _MM_SHUFFLE (3, 1, 2, 0)); +#endif +} + +#ifndef _MSC_VER + +static force_inline __m64 +pack_4x565 (__m64 v0, __m64 v1, __m64 v2, __m64 v3) +{ + return pack_4xpacked565 (pack8888 (v0, v1), pack8888 (v2, v3)); +} + +static force_inline __m64 +pix_add_mul (__m64 x, __m64 a, __m64 y, __m64 b) +{ + x = pix_multiply (x, a); + y = pix_multiply (y, b); + + return pix_add (x, y); +} + +#else + +/* MSVC only handles a "pass by register" of up to three SSE intrinsics */ + +#define pack_4x565(v0, v1, v2, v3) \ + pack_4xpacked565 (pack8888 (v0, v1), pack8888 (v2, v3)) + +#define pix_add_mul(x, a, y, b) \ + ( x = pix_multiply (x, a), \ + y = pix_multiply (y, b), \ + pix_add (x, y) ) + +#endif + +/* --------------- MMX code patch for fbcompose.c --------------------- */ + +static force_inline __m64 +combine (const uint32_t *src, const uint32_t *mask) +{ + __m64 vsrc = load8888 (src); + + if (mask) + { + __m64 m = load8888 (mask); + + m = expand_alpha (m); + vsrc = pix_multiply (vsrc, m); + } + + return vsrc; +} + +static force_inline __m64 +core_combine_over_u_pixel_mmx (__m64 vsrc, __m64 vdst) +{ + vsrc = _mm_unpacklo_pi8 (vsrc, _mm_setzero_si64 ()); + + if (is_opaque (vsrc)) + { + return vsrc; + } + else if (!is_zero (vsrc)) + { + return over (vsrc, expand_alpha (vsrc), + _mm_unpacklo_pi8 (vdst, _mm_setzero_si64 ())); + } + + return _mm_unpacklo_pi8 (vdst, _mm_setzero_si64 ()); +} + +static void +mmx_combine_over_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + const uint32_t *end = dest + width; + + while (dest < end) + { + __m64 vsrc = combine (src, mask); + + if (is_opaque (vsrc)) + { + store8888 (dest, vsrc); + } + else if (!is_zero (vsrc)) + { + __m64 sa = expand_alpha (vsrc); + store8888 (dest, over (vsrc, sa, load8888 (dest))); + } + + ++dest; + ++src; + if (mask) + ++mask; + } + _mm_empty (); +} + +static void +mmx_combine_over_reverse_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + const uint32_t *end = dest + width; + + while (dest < end) + { + __m64 d, da; + __m64 s = combine (src, mask); + + d = load8888 (dest); + da = expand_alpha (d); + store8888 (dest, over (d, da, s)); + + ++dest; + ++src; + if (mask) + mask++; + } + _mm_empty (); +} + +static void +mmx_combine_in_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + const uint32_t *end = dest + width; + + while (dest < end) + { + __m64 a; + __m64 x = combine (src, mask); + + a = load8888 (dest); + a = expand_alpha (a); + x = pix_multiply (x, a); + + store8888 (dest, x); + + ++dest; + ++src; + if (mask) + mask++; + } + _mm_empty (); +} + +static void +mmx_combine_in_reverse_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + const uint32_t *end = dest + width; + + while (dest < end) + { + __m64 a = combine (src, mask); + __m64 x; + + x = load8888 (dest); + a = expand_alpha (a); + x = pix_multiply (x, a); + store8888 (dest, x); + + ++dest; + ++src; + if (mask) + mask++; + } + _mm_empty (); +} + +static void +mmx_combine_out_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + const uint32_t *end = dest + width; + + while (dest < end) + { + __m64 a; + __m64 x = combine (src, mask); + + a = load8888 (dest); + a = expand_alpha (a); + a = negate (a); + x = pix_multiply (x, a); + store8888 (dest, x); + + ++dest; + ++src; + if (mask) + mask++; + } + _mm_empty (); +} + +static void +mmx_combine_out_reverse_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + const uint32_t *end = dest + width; + + while (dest < end) + { + __m64 a = combine (src, mask); + __m64 x; + + x = load8888 (dest); + a = expand_alpha (a); + a = negate (a); + x = pix_multiply (x, a); + + store8888 (dest, x); + + ++dest; + ++src; + if (mask) + mask++; + } + _mm_empty (); +} + +static void +mmx_combine_atop_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + const uint32_t *end = dest + width; + + while (dest < end) + { + __m64 da, d, sia; + __m64 s = combine (src, mask); + + d = load8888 (dest); + sia = expand_alpha (s); + sia = negate (sia); + da = expand_alpha (d); + s = pix_add_mul (s, da, d, sia); + store8888 (dest, s); + + ++dest; + ++src; + if (mask) + mask++; + } + _mm_empty (); +} + +static void +mmx_combine_atop_reverse_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + const uint32_t *end; + + end = dest + width; + + while (dest < end) + { + __m64 dia, d, sa; + __m64 s = combine (src, mask); + + d = load8888 (dest); + sa = expand_alpha (s); + dia = expand_alpha (d); + dia = negate (dia); + s = pix_add_mul (s, dia, d, sa); + store8888 (dest, s); + + ++dest; + ++src; + if (mask) + mask++; + } + _mm_empty (); +} + +static void +mmx_combine_xor_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + const uint32_t *end = dest + width; + + while (dest < end) + { + __m64 dia, d, sia; + __m64 s = combine (src, mask); + + d = load8888 (dest); + sia = expand_alpha (s); + dia = expand_alpha (d); + sia = negate (sia); + dia = negate (dia); + s = pix_add_mul (s, dia, d, sia); + store8888 (dest, s); + + ++dest; + ++src; + if (mask) + mask++; + } + _mm_empty (); +} + +static void +mmx_combine_add_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + const uint32_t *end = dest + width; + + while (dest < end) + { + __m64 d; + __m64 s = combine (src, mask); + + d = load8888 (dest); + s = pix_add (s, d); + store8888 (dest, s); + + ++dest; + ++src; + if (mask) + mask++; + } + _mm_empty (); +} + +static void +mmx_combine_saturate_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + const uint32_t *end = dest + width; + + while (dest < end) + { + uint32_t s, sa, da; + uint32_t d = *dest; + __m64 ms = combine (src, mask); + __m64 md = load8888 (dest); + + store8888(&s, ms); + da = ~d >> 24; + sa = s >> 24; + + if (sa > da) + { + uint32_t quot = DIV_UN8 (da, sa) << 24; + __m64 msa = load8888 ("); + msa = expand_alpha (msa); + ms = pix_multiply (ms, msa); + } + + md = pix_add (md, ms); + store8888 (dest, md); + + ++src; + ++dest; + if (mask) + mask++; + } + _mm_empty (); +} + +static void +mmx_combine_src_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + const uint32_t *end = src + width; + + while (src < end) + { + __m64 a = load8888 (mask); + __m64 s = load8888 (src); + + s = pix_multiply (s, a); + store8888 (dest, s); + + ++src; + ++mask; + ++dest; + } + _mm_empty (); +} + +static void +mmx_combine_over_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + const uint32_t *end = src + width; + + while (src < end) + { + __m64 a = load8888 (mask); + __m64 s = load8888 (src); + __m64 d = load8888 (dest); + __m64 sa = expand_alpha (s); + + store8888 (dest, in_over (s, sa, a, d)); + + ++src; + ++dest; + ++mask; + } + _mm_empty (); +} + +static void +mmx_combine_over_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + const uint32_t *end = src + width; + + while (src < end) + { + __m64 a = load8888 (mask); + __m64 s = load8888 (src); + __m64 d = load8888 (dest); + __m64 da = expand_alpha (d); + + store8888 (dest, over (d, da, in (s, a))); + + ++src; + ++dest; + ++mask; + } + _mm_empty (); +} + +static void +mmx_combine_in_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + const uint32_t *end = src + width; + + while (src < end) + { + __m64 a = load8888 (mask); + __m64 s = load8888 (src); + __m64 d = load8888 (dest); + __m64 da = expand_alpha (d); + + s = pix_multiply (s, a); + s = pix_multiply (s, da); + store8888 (dest, s); + + ++src; + ++dest; + ++mask; + } + _mm_empty (); +} + +static void +mmx_combine_in_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + const uint32_t *end = src + width; + + while (src < end) + { + __m64 a = load8888 (mask); + __m64 s = load8888 (src); + __m64 d = load8888 (dest); + __m64 sa = expand_alpha (s); + + a = pix_multiply (a, sa); + d = pix_multiply (d, a); + store8888 (dest, d); + + ++src; + ++dest; + ++mask; + } + _mm_empty (); +} + +static void +mmx_combine_out_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + const uint32_t *end = src + width; + + while (src < end) + { + __m64 a = load8888 (mask); + __m64 s = load8888 (src); + __m64 d = load8888 (dest); + __m64 da = expand_alpha (d); + + da = negate (da); + s = pix_multiply (s, a); + s = pix_multiply (s, da); + store8888 (dest, s); + + ++src; + ++dest; + ++mask; + } + _mm_empty (); +} + +static void +mmx_combine_out_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + const uint32_t *end = src + width; + + while (src < end) + { + __m64 a = load8888 (mask); + __m64 s = load8888 (src); + __m64 d = load8888 (dest); + __m64 sa = expand_alpha (s); + + a = pix_multiply (a, sa); + a = negate (a); + d = pix_multiply (d, a); + store8888 (dest, d); + + ++src; + ++dest; + ++mask; + } + _mm_empty (); +} + +static void +mmx_combine_atop_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + const uint32_t *end = src + width; + + while (src < end) + { + __m64 a = load8888 (mask); + __m64 s = load8888 (src); + __m64 d = load8888 (dest); + __m64 da = expand_alpha (d); + __m64 sa = expand_alpha (s); + + s = pix_multiply (s, a); + a = pix_multiply (a, sa); + a = negate (a); + d = pix_add_mul (d, a, s, da); + store8888 (dest, d); + + ++src; + ++dest; + ++mask; + } + _mm_empty (); +} + +static void +mmx_combine_atop_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + const uint32_t *end = src + width; + + while (src < end) + { + __m64 a = load8888 (mask); + __m64 s = load8888 (src); + __m64 d = load8888 (dest); + __m64 da = expand_alpha (d); + __m64 sa = expand_alpha (s); + + s = pix_multiply (s, a); + a = pix_multiply (a, sa); + da = negate (da); + d = pix_add_mul (d, a, s, da); + store8888 (dest, d); + + ++src; + ++dest; + ++mask; + } + _mm_empty (); +} + +static void +mmx_combine_xor_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + const uint32_t *end = src + width; + + while (src < end) + { + __m64 a = load8888 (mask); + __m64 s = load8888 (src); + __m64 d = load8888 (dest); + __m64 da = expand_alpha (d); + __m64 sa = expand_alpha (s); + + s = pix_multiply (s, a); + a = pix_multiply (a, sa); + da = negate (da); + a = negate (a); + d = pix_add_mul (d, a, s, da); + store8888 (dest, d); + + ++src; + ++dest; + ++mask; + } + _mm_empty (); +} + +static void +mmx_combine_add_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + const uint32_t *end = src + width; + + while (src < end) + { + __m64 a = load8888 (mask); + __m64 s = load8888 (src); + __m64 d = load8888 (dest); + + s = pix_multiply (s, a); + d = pix_add (s, d); + store8888 (dest, d); + + ++src; + ++dest; + ++mask; + } + _mm_empty (); +} + +/* ------------- MMX code paths called from fbpict.c -------------------- */ + +static void +mmx_composite_over_n_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src; + uint32_t *dst_line, *dst; + int32_t w; + int dst_stride; + __m64 vsrc, vsrca; + + CHECKPOINT (); + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + + vsrc = load8888 (&src); + vsrca = expand_alpha (vsrc); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + w = width; + + CHECKPOINT (); + + while (w && (uintptr_t)dst & 7) + { + store8888 (dst, over (vsrc, vsrca, load8888 (dst))); + + w--; + dst++; + } + + while (w >= 2) + { + __m64 vdest; + __m64 dest0, dest1; + + vdest = *(__m64 *)dst; + + dest0 = over (vsrc, vsrca, expand8888 (vdest, 0)); + dest1 = over (vsrc, vsrca, expand8888 (vdest, 1)); + + *(__m64 *)dst = pack8888 (dest0, dest1); + + dst += 2; + w -= 2; + } + + CHECKPOINT (); + + if (w) + { + store8888 (dst, over (vsrc, vsrca, load8888 (dst))); + } + } + + _mm_empty (); +} + +static void +mmx_composite_over_n_0565 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src; + uint16_t *dst_line, *dst; + int32_t w; + int dst_stride; + __m64 vsrc, vsrca; + + CHECKPOINT (); + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + + vsrc = load8888 (&src); + vsrca = expand_alpha (vsrc); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + w = width; + + CHECKPOINT (); + + while (w && (uintptr_t)dst & 7) + { + uint64_t d = *dst; + __m64 vdest = expand565 (to_m64 (d), 0); + + vdest = pack_565 (over (vsrc, vsrca, vdest), vdest, 0); + *dst = to_uint64 (vdest); + + w--; + dst++; + } + + while (w >= 4) + { + __m64 vdest = *(__m64 *)dst; + __m64 v0, v1, v2, v3; + + expand_4x565 (vdest, &v0, &v1, &v2, &v3, 0); + + v0 = over (vsrc, vsrca, v0); + v1 = over (vsrc, vsrca, v1); + v2 = over (vsrc, vsrca, v2); + v3 = over (vsrc, vsrca, v3); + + *(__m64 *)dst = pack_4x565 (v0, v1, v2, v3); + + dst += 4; + w -= 4; + } + + CHECKPOINT (); + + while (w) + { + uint64_t d = *dst; + __m64 vdest = expand565 (to_m64 (d), 0); + + vdest = pack_565 (over (vsrc, vsrca, vdest), vdest, 0); + *dst = to_uint64 (vdest); + + w--; + dst++; + } + } + + _mm_empty (); +} + +static void +mmx_composite_over_n_8888_8888_ca (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src; + uint32_t *dst_line; + uint32_t *mask_line; + int dst_stride, mask_stride; + __m64 vsrc, vsrca; + + CHECKPOINT (); + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); + + vsrc = load8888 (&src); + vsrca = expand_alpha (vsrc); + + while (height--) + { + int twidth = width; + uint32_t *p = (uint32_t *)mask_line; + uint32_t *q = (uint32_t *)dst_line; + + while (twidth && (uintptr_t)q & 7) + { + uint32_t m = *(uint32_t *)p; + + if (m) + { + __m64 vdest = load8888 (q); + vdest = in_over (vsrc, vsrca, load8888 (&m), vdest); + store8888 (q, vdest); + } + + twidth--; + p++; + q++; + } + + while (twidth >= 2) + { + uint32_t m0, m1; + m0 = *p; + m1 = *(p + 1); + + if (m0 | m1) + { + __m64 dest0, dest1; + __m64 vdest = *(__m64 *)q; + + dest0 = in_over (vsrc, vsrca, load8888 (&m0), + expand8888 (vdest, 0)); + dest1 = in_over (vsrc, vsrca, load8888 (&m1), + expand8888 (vdest, 1)); + + *(__m64 *)q = pack8888 (dest0, dest1); + } + + p += 2; + q += 2; + twidth -= 2; + } + + if (twidth) + { + uint32_t m = *(uint32_t *)p; + + if (m) + { + __m64 vdest = load8888 (q); + vdest = in_over (vsrc, vsrca, load8888 (&m), vdest); + store8888 (q, vdest); + } + + twidth--; + p++; + q++; + } + + dst_line += dst_stride; + mask_line += mask_stride; + } + + _mm_empty (); +} + +static void +mmx_composite_over_8888_n_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; + uint32_t mask; + __m64 vmask; + int dst_stride, src_stride; + int32_t w; + + CHECKPOINT (); + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + + mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); + vmask = expand_alpha (load8888 (&mask)); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w && (uintptr_t)dst & 7) + { + __m64 s = load8888 (src); + __m64 d = load8888 (dst); + + store8888 (dst, in_over (s, expand_alpha (s), vmask, d)); + + w--; + dst++; + src++; + } + + while (w >= 2) + { + __m64 vs = ldq_u ((__m64 *)src); + __m64 vd = *(__m64 *)dst; + __m64 vsrc0 = expand8888 (vs, 0); + __m64 vsrc1 = expand8888 (vs, 1); + + *(__m64 *)dst = pack8888 ( + in_over (vsrc0, expand_alpha (vsrc0), vmask, expand8888 (vd, 0)), + in_over (vsrc1, expand_alpha (vsrc1), vmask, expand8888 (vd, 1))); + + w -= 2; + dst += 2; + src += 2; + } + + if (w) + { + __m64 s = load8888 (src); + __m64 d = load8888 (dst); + + store8888 (dst, in_over (s, expand_alpha (s), vmask, d)); + } + } + + _mm_empty (); +} + +static void +mmx_composite_over_x888_n_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; + uint32_t mask; + __m64 vmask; + int dst_stride, src_stride; + int32_t w; + __m64 srca; + + CHECKPOINT (); + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); + + vmask = expand_alpha (load8888 (&mask)); + srca = MC (4x00ff); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w && (uintptr_t)dst & 7) + { + uint32_t ssrc = *src | 0xff000000; + __m64 s = load8888 (&ssrc); + __m64 d = load8888 (dst); + + store8888 (dst, in_over (s, srca, vmask, d)); + + w--; + dst++; + src++; + } + + while (w >= 16) + { + __m64 vd0 = *(__m64 *)(dst + 0); + __m64 vd1 = *(__m64 *)(dst + 2); + __m64 vd2 = *(__m64 *)(dst + 4); + __m64 vd3 = *(__m64 *)(dst + 6); + __m64 vd4 = *(__m64 *)(dst + 8); + __m64 vd5 = *(__m64 *)(dst + 10); + __m64 vd6 = *(__m64 *)(dst + 12); + __m64 vd7 = *(__m64 *)(dst + 14); + + __m64 vs0 = ldq_u ((__m64 *)(src + 0)); + __m64 vs1 = ldq_u ((__m64 *)(src + 2)); + __m64 vs2 = ldq_u ((__m64 *)(src + 4)); + __m64 vs3 = ldq_u ((__m64 *)(src + 6)); + __m64 vs4 = ldq_u ((__m64 *)(src + 8)); + __m64 vs5 = ldq_u ((__m64 *)(src + 10)); + __m64 vs6 = ldq_u ((__m64 *)(src + 12)); + __m64 vs7 = ldq_u ((__m64 *)(src + 14)); + + vd0 = pack8888 ( + in_over (expandx888 (vs0, 0), srca, vmask, expand8888 (vd0, 0)), + in_over (expandx888 (vs0, 1), srca, vmask, expand8888 (vd0, 1))); + + vd1 = pack8888 ( + in_over (expandx888 (vs1, 0), srca, vmask, expand8888 (vd1, 0)), + in_over (expandx888 (vs1, 1), srca, vmask, expand8888 (vd1, 1))); + + vd2 = pack8888 ( + in_over (expandx888 (vs2, 0), srca, vmask, expand8888 (vd2, 0)), + in_over (expandx888 (vs2, 1), srca, vmask, expand8888 (vd2, 1))); + + vd3 = pack8888 ( + in_over (expandx888 (vs3, 0), srca, vmask, expand8888 (vd3, 0)), + in_over (expandx888 (vs3, 1), srca, vmask, expand8888 (vd3, 1))); + + vd4 = pack8888 ( + in_over (expandx888 (vs4, 0), srca, vmask, expand8888 (vd4, 0)), + in_over (expandx888 (vs4, 1), srca, vmask, expand8888 (vd4, 1))); + + vd5 = pack8888 ( + in_over (expandx888 (vs5, 0), srca, vmask, expand8888 (vd5, 0)), + in_over (expandx888 (vs5, 1), srca, vmask, expand8888 (vd5, 1))); + + vd6 = pack8888 ( + in_over (expandx888 (vs6, 0), srca, vmask, expand8888 (vd6, 0)), + in_over (expandx888 (vs6, 1), srca, vmask, expand8888 (vd6, 1))); + + vd7 = pack8888 ( + in_over (expandx888 (vs7, 0), srca, vmask, expand8888 (vd7, 0)), + in_over (expandx888 (vs7, 1), srca, vmask, expand8888 (vd7, 1))); + + *(__m64 *)(dst + 0) = vd0; + *(__m64 *)(dst + 2) = vd1; + *(__m64 *)(dst + 4) = vd2; + *(__m64 *)(dst + 6) = vd3; + *(__m64 *)(dst + 8) = vd4; + *(__m64 *)(dst + 10) = vd5; + *(__m64 *)(dst + 12) = vd6; + *(__m64 *)(dst + 14) = vd7; + + w -= 16; + dst += 16; + src += 16; + } + + while (w) + { + uint32_t ssrc = *src | 0xff000000; + __m64 s = load8888 (&ssrc); + __m64 d = load8888 (dst); + + store8888 (dst, in_over (s, srca, vmask, d)); + + w--; + dst++; + src++; + } + } + + _mm_empty (); +} + +static void +mmx_composite_over_8888_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; + uint32_t s; + int dst_stride, src_stride; + uint8_t a; + int32_t w; + + CHECKPOINT (); + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w--) + { + s = *src++; + a = s >> 24; + + if (a == 0xff) + { + *dst = s; + } + else if (s) + { + __m64 ms, sa; + ms = load8888 (&s); + sa = expand_alpha (ms); + store8888 (dst, over (ms, sa, load8888 (dst))); + } + + dst++; + } + } + _mm_empty (); +} + +static void +mmx_composite_over_8888_0565 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint16_t *dst_line, *dst; + uint32_t *src_line, *src; + int dst_stride, src_stride; + int32_t w; + + CHECKPOINT (); + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + +#if 0 + /* FIXME */ + assert (src_image->drawable == mask_image->drawable); +#endif + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + CHECKPOINT (); + + while (w && (uintptr_t)dst & 7) + { + __m64 vsrc = load8888 (src); + uint64_t d = *dst; + __m64 vdest = expand565 (to_m64 (d), 0); + + vdest = pack_565 ( + over (vsrc, expand_alpha (vsrc), vdest), vdest, 0); + + *dst = to_uint64 (vdest); + + w--; + dst++; + src++; + } + + CHECKPOINT (); + + while (w >= 4) + { + __m64 vdest = *(__m64 *)dst; + __m64 v0, v1, v2, v3; + __m64 vsrc0, vsrc1, vsrc2, vsrc3; + + expand_4x565 (vdest, &v0, &v1, &v2, &v3, 0); + + vsrc0 = load8888 ((src + 0)); + vsrc1 = load8888 ((src + 1)); + vsrc2 = load8888 ((src + 2)); + vsrc3 = load8888 ((src + 3)); + + v0 = over (vsrc0, expand_alpha (vsrc0), v0); + v1 = over (vsrc1, expand_alpha (vsrc1), v1); + v2 = over (vsrc2, expand_alpha (vsrc2), v2); + v3 = over (vsrc3, expand_alpha (vsrc3), v3); + + *(__m64 *)dst = pack_4x565 (v0, v1, v2, v3); + + w -= 4; + dst += 4; + src += 4; + } + + CHECKPOINT (); + + while (w) + { + __m64 vsrc = load8888 (src); + uint64_t d = *dst; + __m64 vdest = expand565 (to_m64 (d), 0); + + vdest = pack_565 (over (vsrc, expand_alpha (vsrc), vdest), vdest, 0); + + *dst = to_uint64 (vdest); + + w--; + dst++; + src++; + } + } + + _mm_empty (); +} + +static void +mmx_composite_over_n_8_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src, srca; + uint32_t *dst_line, *dst; + uint8_t *mask_line, *mask; + int dst_stride, mask_stride; + int32_t w; + __m64 vsrc, vsrca; + uint64_t srcsrc; + + CHECKPOINT (); + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + + srca = src >> 24; + if (src == 0) + return; + + srcsrc = (uint64_t)src << 32 | src; + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + + vsrc = load8888 (&src); + vsrca = expand_alpha (vsrc); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + CHECKPOINT (); + + while (w && (uintptr_t)dst & 7) + { + uint64_t m = *mask; + + if (m) + { + __m64 vdest = in_over (vsrc, vsrca, + expand_alpha_rev (to_m64 (m)), + load8888 (dst)); + + store8888 (dst, vdest); + } + + w--; + mask++; + dst++; + } + + CHECKPOINT (); + + while (w >= 2) + { + uint64_t m0, m1; + + m0 = *mask; + m1 = *(mask + 1); + + if (srca == 0xff && (m0 & m1) == 0xff) + { + *(uint64_t *)dst = srcsrc; + } + else if (m0 | m1) + { + __m64 vdest; + __m64 dest0, dest1; + + vdest = *(__m64 *)dst; + + dest0 = in_over (vsrc, vsrca, expand_alpha_rev (to_m64 (m0)), + expand8888 (vdest, 0)); + dest1 = in_over (vsrc, vsrca, expand_alpha_rev (to_m64 (m1)), + expand8888 (vdest, 1)); + + *(__m64 *)dst = pack8888 (dest0, dest1); + } + + mask += 2; + dst += 2; + w -= 2; + } + + CHECKPOINT (); + + if (w) + { + uint64_t m = *mask; + + if (m) + { + __m64 vdest = load8888 (dst); + + vdest = in_over ( + vsrc, vsrca, expand_alpha_rev (to_m64 (m)), vdest); + store8888 (dst, vdest); + } + } + } + + _mm_empty (); +} + +static pixman_bool_t +mmx_fill (pixman_implementation_t *imp, + uint32_t * bits, + int stride, + int bpp, + int x, + int y, + int width, + int height, + uint32_t filler) +{ + uint64_t fill; + __m64 vfill; + uint32_t byte_width; + uint8_t *byte_line; + +#if defined __GNUC__ && defined USE_X86_MMX + __m64 v1, v2, v3, v4, v5, v6, v7; +#endif + + if (bpp != 16 && bpp != 32 && bpp != 8) + return FALSE; + + if (bpp == 8) + { + stride = stride * (int) sizeof (uint32_t) / 1; + byte_line = (uint8_t *)(((uint8_t *)bits) + stride * y + x); + byte_width = width; + stride *= 1; + filler = (filler & 0xff) * 0x01010101; + } + else if (bpp == 16) + { + stride = stride * (int) sizeof (uint32_t) / 2; + byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x); + byte_width = 2 * width; + stride *= 2; + filler = (filler & 0xffff) * 0x00010001; + } + else + { + stride = stride * (int) sizeof (uint32_t) / 4; + byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x); + byte_width = 4 * width; + stride *= 4; + } + + fill = ((uint64_t)filler << 32) | filler; + vfill = to_m64 (fill); + +#if defined __GNUC__ && defined USE_X86_MMX + __asm__ ( + "movq %7, %0\n" + "movq %7, %1\n" + "movq %7, %2\n" + "movq %7, %3\n" + "movq %7, %4\n" + "movq %7, %5\n" + "movq %7, %6\n" + : "=&y" (v1), "=&y" (v2), "=&y" (v3), + "=&y" (v4), "=&y" (v5), "=&y" (v6), "=y" (v7) + : "y" (vfill)); +#endif + + while (height--) + { + int w; + uint8_t *d = byte_line; + + byte_line += stride; + w = byte_width; + + if (w >= 1 && ((uintptr_t)d & 1)) + { + *(uint8_t *)d = (filler & 0xff); + w--; + d++; + } + + if (w >= 2 && ((uintptr_t)d & 3)) + { + *(uint16_t *)d = filler; + w -= 2; + d += 2; + } + + while (w >= 4 && ((uintptr_t)d & 7)) + { + *(uint32_t *)d = filler; + + w -= 4; + d += 4; + } + + while (w >= 64) + { +#if defined __GNUC__ && defined USE_X86_MMX + __asm__ ( + "movq %1, (%0)\n" + "movq %2, 8(%0)\n" + "movq %3, 16(%0)\n" + "movq %4, 24(%0)\n" + "movq %5, 32(%0)\n" + "movq %6, 40(%0)\n" + "movq %7, 48(%0)\n" + "movq %8, 56(%0)\n" + : + : "r" (d), + "y" (vfill), "y" (v1), "y" (v2), "y" (v3), + "y" (v4), "y" (v5), "y" (v6), "y" (v7) + : "memory"); +#else + *(__m64*) (d + 0) = vfill; + *(__m64*) (d + 8) = vfill; + *(__m64*) (d + 16) = vfill; + *(__m64*) (d + 24) = vfill; + *(__m64*) (d + 32) = vfill; + *(__m64*) (d + 40) = vfill; + *(__m64*) (d + 48) = vfill; + *(__m64*) (d + 56) = vfill; +#endif + w -= 64; + d += 64; + } + + while (w >= 4) + { + *(uint32_t *)d = filler; + + w -= 4; + d += 4; + } + if (w >= 2) + { + *(uint16_t *)d = filler; + w -= 2; + d += 2; + } + if (w >= 1) + { + *(uint8_t *)d = (filler & 0xff); + w--; + d++; + } + + } + + _mm_empty (); + return TRUE; +} + +static void +mmx_composite_src_x888_0565 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint16_t *dst_line, *dst; + uint32_t *src_line, *src, s; + int dst_stride, src_stride; + int32_t w; + + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w && (uintptr_t)dst & 7) + { + s = *src++; + *dst = convert_8888_to_0565 (s); + dst++; + w--; + } + + while (w >= 4) + { + __m64 vdest; + __m64 vsrc0 = ldq_u ((__m64 *)(src + 0)); + __m64 vsrc1 = ldq_u ((__m64 *)(src + 2)); + + vdest = pack_4xpacked565 (vsrc0, vsrc1); + + *(__m64 *)dst = vdest; + + w -= 4; + src += 4; + dst += 4; + } + + while (w) + { + s = *src++; + *dst = convert_8888_to_0565 (s); + dst++; + w--; + } + } + + _mm_empty (); +} + +static void +mmx_composite_src_n_8_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src, srca; + uint32_t *dst_line, *dst; + uint8_t *mask_line, *mask; + int dst_stride, mask_stride; + int32_t w; + __m64 vsrc; + uint64_t srcsrc; + + CHECKPOINT (); + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + + srca = src >> 24; + if (src == 0) + { + mmx_fill (imp, dest_image->bits.bits, dest_image->bits.rowstride, + PIXMAN_FORMAT_BPP (dest_image->bits.format), + dest_x, dest_y, width, height, 0); + return; + } + + srcsrc = (uint64_t)src << 32 | src; + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + + vsrc = load8888 (&src); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + CHECKPOINT (); + + while (w && (uintptr_t)dst & 7) + { + uint64_t m = *mask; + + if (m) + { + __m64 vdest = in (vsrc, expand_alpha_rev (to_m64 (m))); + + store8888 (dst, vdest); + } + else + { + *dst = 0; + } + + w--; + mask++; + dst++; + } + + CHECKPOINT (); + + while (w >= 2) + { + uint64_t m0, m1; + m0 = *mask; + m1 = *(mask + 1); + + if (srca == 0xff && (m0 & m1) == 0xff) + { + *(uint64_t *)dst = srcsrc; + } + else if (m0 | m1) + { + __m64 dest0, dest1; + + dest0 = in (vsrc, expand_alpha_rev (to_m64 (m0))); + dest1 = in (vsrc, expand_alpha_rev (to_m64 (m1))); + + *(__m64 *)dst = pack8888 (dest0, dest1); + } + else + { + *(uint64_t *)dst = 0; + } + + mask += 2; + dst += 2; + w -= 2; + } + + CHECKPOINT (); + + if (w) + { + uint64_t m = *mask; + + if (m) + { + __m64 vdest = load8888 (dst); + + vdest = in (vsrc, expand_alpha_rev (to_m64 (m))); + store8888 (dst, vdest); + } + else + { + *dst = 0; + } + } + } + + _mm_empty (); +} + +static void +mmx_composite_over_n_8_0565 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src, srca; + uint16_t *dst_line, *dst; + uint8_t *mask_line, *mask; + int dst_stride, mask_stride; + int32_t w; + __m64 vsrc, vsrca, tmp; + __m64 srcsrcsrcsrc; + + CHECKPOINT (); + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + + srca = src >> 24; + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + + vsrc = load8888 (&src); + vsrca = expand_alpha (vsrc); + + tmp = pack_565 (vsrc, _mm_setzero_si64 (), 0); + srcsrcsrcsrc = expand_alpha_rev (tmp); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + CHECKPOINT (); + + while (w && (uintptr_t)dst & 7) + { + uint64_t m = *mask; + + if (m) + { + uint64_t d = *dst; + __m64 vd = to_m64 (d); + __m64 vdest = in_over ( + vsrc, vsrca, expand_alpha_rev (to_m64 (m)), expand565 (vd, 0)); + + vd = pack_565 (vdest, _mm_setzero_si64 (), 0); + *dst = to_uint64 (vd); + } + + w--; + mask++; + dst++; + } + + CHECKPOINT (); + + while (w >= 4) + { + uint64_t m0, m1, m2, m3; + m0 = *mask; + m1 = *(mask + 1); + m2 = *(mask + 2); + m3 = *(mask + 3); + + if (srca == 0xff && (m0 & m1 & m2 & m3) == 0xff) + { + *(__m64 *)dst = srcsrcsrcsrc; + } + else if (m0 | m1 | m2 | m3) + { + __m64 vdest = *(__m64 *)dst; + __m64 v0, v1, v2, v3; + __m64 vm0, vm1, vm2, vm3; + + expand_4x565 (vdest, &v0, &v1, &v2, &v3, 0); + + vm0 = to_m64 (m0); + v0 = in_over (vsrc, vsrca, expand_alpha_rev (vm0), v0); + + vm1 = to_m64 (m1); + v1 = in_over (vsrc, vsrca, expand_alpha_rev (vm1), v1); + + vm2 = to_m64 (m2); + v2 = in_over (vsrc, vsrca, expand_alpha_rev (vm2), v2); + + vm3 = to_m64 (m3); + v3 = in_over (vsrc, vsrca, expand_alpha_rev (vm3), v3); + + *(__m64 *)dst = pack_4x565 (v0, v1, v2, v3);; + } + + w -= 4; + mask += 4; + dst += 4; + } + + CHECKPOINT (); + + while (w) + { + uint64_t m = *mask; + + if (m) + { + uint64_t d = *dst; + __m64 vd = to_m64 (d); + __m64 vdest = in_over (vsrc, vsrca, expand_alpha_rev (to_m64 (m)), + expand565 (vd, 0)); + vd = pack_565 (vdest, _mm_setzero_si64 (), 0); + *dst = to_uint64 (vd); + } + + w--; + mask++; + dst++; + } + } + + _mm_empty (); +} + +static void +mmx_composite_over_pixbuf_0565 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint16_t *dst_line, *dst; + uint32_t *src_line, *src; + int dst_stride, src_stride; + int32_t w; + + CHECKPOINT (); + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + +#if 0 + /* FIXME */ + assert (src_image->drawable == mask_image->drawable); +#endif + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + CHECKPOINT (); + + while (w && (uintptr_t)dst & 7) + { + __m64 vsrc = load8888 (src); + uint64_t d = *dst; + __m64 vdest = expand565 (to_m64 (d), 0); + + vdest = pack_565 (over_rev_non_pre (vsrc, vdest), vdest, 0); + + *dst = to_uint64 (vdest); + + w--; + dst++; + src++; + } + + CHECKPOINT (); + + while (w >= 4) + { + uint32_t s0, s1, s2, s3; + unsigned char a0, a1, a2, a3; + + s0 = *src; + s1 = *(src + 1); + s2 = *(src + 2); + s3 = *(src + 3); + + a0 = (s0 >> 24); + a1 = (s1 >> 24); + a2 = (s2 >> 24); + a3 = (s3 >> 24); + + if ((a0 & a1 & a2 & a3) == 0xFF) + { + __m64 v0 = invert_colors (load8888 (&s0)); + __m64 v1 = invert_colors (load8888 (&s1)); + __m64 v2 = invert_colors (load8888 (&s2)); + __m64 v3 = invert_colors (load8888 (&s3)); + + *(__m64 *)dst = pack_4x565 (v0, v1, v2, v3); + } + else if (s0 | s1 | s2 | s3) + { + __m64 vdest = *(__m64 *)dst; + __m64 v0, v1, v2, v3; + + __m64 vsrc0 = load8888 (&s0); + __m64 vsrc1 = load8888 (&s1); + __m64 vsrc2 = load8888 (&s2); + __m64 vsrc3 = load8888 (&s3); + + expand_4x565 (vdest, &v0, &v1, &v2, &v3, 0); + + v0 = over_rev_non_pre (vsrc0, v0); + v1 = over_rev_non_pre (vsrc1, v1); + v2 = over_rev_non_pre (vsrc2, v2); + v3 = over_rev_non_pre (vsrc3, v3); + + *(__m64 *)dst = pack_4x565 (v0, v1, v2, v3); + } + + w -= 4; + dst += 4; + src += 4; + } + + CHECKPOINT (); + + while (w) + { + __m64 vsrc = load8888 (src); + uint64_t d = *dst; + __m64 vdest = expand565 (to_m64 (d), 0); + + vdest = pack_565 (over_rev_non_pre (vsrc, vdest), vdest, 0); + + *dst = to_uint64 (vdest); + + w--; + dst++; + src++; + } + } + + _mm_empty (); +} + +static void +mmx_composite_over_pixbuf_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; + int dst_stride, src_stride; + int32_t w; + + CHECKPOINT (); + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + +#if 0 + /* FIXME */ + assert (src_image->drawable == mask_image->drawable); +#endif + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w && (uintptr_t)dst & 7) + { + __m64 s = load8888 (src); + __m64 d = load8888 (dst); + + store8888 (dst, over_rev_non_pre (s, d)); + + w--; + dst++; + src++; + } + + while (w >= 2) + { + uint32_t s0, s1; + unsigned char a0, a1; + __m64 d0, d1; + + s0 = *src; + s1 = *(src + 1); + + a0 = (s0 >> 24); + a1 = (s1 >> 24); + + if ((a0 & a1) == 0xFF) + { + d0 = invert_colors (load8888 (&s0)); + d1 = invert_colors (load8888 (&s1)); + + *(__m64 *)dst = pack8888 (d0, d1); + } + else if (s0 | s1) + { + __m64 vdest = *(__m64 *)dst; + + d0 = over_rev_non_pre (load8888 (&s0), expand8888 (vdest, 0)); + d1 = over_rev_non_pre (load8888 (&s1), expand8888 (vdest, 1)); + + *(__m64 *)dst = pack8888 (d0, d1); + } + + w -= 2; + dst += 2; + src += 2; + } + + if (w) + { + __m64 s = load8888 (src); + __m64 d = load8888 (dst); + + store8888 (dst, over_rev_non_pre (s, d)); + } + } + + _mm_empty (); +} + +static void +mmx_composite_over_n_8888_0565_ca (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src; + uint16_t *dst_line; + uint32_t *mask_line; + int dst_stride, mask_stride; + __m64 vsrc, vsrca; + + CHECKPOINT (); + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); + + vsrc = load8888 (&src); + vsrca = expand_alpha (vsrc); + + while (height--) + { + int twidth = width; + uint32_t *p = (uint32_t *)mask_line; + uint16_t *q = (uint16_t *)dst_line; + + while (twidth && ((uintptr_t)q & 7)) + { + uint32_t m = *(uint32_t *)p; + + if (m) + { + uint64_t d = *q; + __m64 vdest = expand565 (to_m64 (d), 0); + vdest = pack_565 (in_over (vsrc, vsrca, load8888 (&m), vdest), vdest, 0); + *q = to_uint64 (vdest); + } + + twidth--; + p++; + q++; + } + + while (twidth >= 4) + { + uint32_t m0, m1, m2, m3; + + m0 = *p; + m1 = *(p + 1); + m2 = *(p + 2); + m3 = *(p + 3); + + if ((m0 | m1 | m2 | m3)) + { + __m64 vdest = *(__m64 *)q; + __m64 v0, v1, v2, v3; + + expand_4x565 (vdest, &v0, &v1, &v2, &v3, 0); + + v0 = in_over (vsrc, vsrca, load8888 (&m0), v0); + v1 = in_over (vsrc, vsrca, load8888 (&m1), v1); + v2 = in_over (vsrc, vsrca, load8888 (&m2), v2); + v3 = in_over (vsrc, vsrca, load8888 (&m3), v3); + + *(__m64 *)q = pack_4x565 (v0, v1, v2, v3); + } + twidth -= 4; + p += 4; + q += 4; + } + + while (twidth) + { + uint32_t m; + + m = *(uint32_t *)p; + if (m) + { + uint64_t d = *q; + __m64 vdest = expand565 (to_m64 (d), 0); + vdest = pack_565 (in_over (vsrc, vsrca, load8888 (&m), vdest), vdest, 0); + *q = to_uint64 (vdest); + } + + twidth--; + p++; + q++; + } + + mask_line += mask_stride; + dst_line += dst_stride; + } + + _mm_empty (); +} + +static void +mmx_composite_in_n_8_8 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint8_t *dst_line, *dst; + uint8_t *mask_line, *mask; + int dst_stride, mask_stride; + int32_t w; + uint32_t src; + uint8_t sa; + __m64 vsrc, vsrca; + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + + sa = src >> 24; + + vsrc = load8888 (&src); + vsrca = expand_alpha (vsrc); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w && (uintptr_t)dst & 7) + { + uint16_t tmp; + uint8_t a; + uint32_t m, d; + + a = *mask++; + d = *dst; + + m = MUL_UN8 (sa, a, tmp); + d = MUL_UN8 (m, d, tmp); + + *dst++ = d; + w--; + } + + while (w >= 4) + { + __m64 vmask; + __m64 vdest; + + vmask = load8888u ((uint32_t *)mask); + vdest = load8888 ((uint32_t *)dst); + + store8888 ((uint32_t *)dst, in (in (vsrca, vmask), vdest)); + + dst += 4; + mask += 4; + w -= 4; + } + + while (w--) + { + uint16_t tmp; + uint8_t a; + uint32_t m, d; + + a = *mask++; + d = *dst; + + m = MUL_UN8 (sa, a, tmp); + d = MUL_UN8 (m, d, tmp); + + *dst++ = d; + } + } + + _mm_empty (); +} + +static void +mmx_composite_in_8_8 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint8_t *dst_line, *dst; + uint8_t *src_line, *src; + int src_stride, dst_stride; + int32_t w; + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w && (uintptr_t)dst & 3) + { + uint8_t s, d; + uint16_t tmp; + + s = *src; + d = *dst; + + *dst = MUL_UN8 (s, d, tmp); + + src++; + dst++; + w--; + } + + while (w >= 4) + { + uint32_t *s = (uint32_t *)src; + uint32_t *d = (uint32_t *)dst; + + store8888 (d, in (load8888u (s), load8888 (d))); + + w -= 4; + dst += 4; + src += 4; + } + + while (w--) + { + uint8_t s, d; + uint16_t tmp; + + s = *src; + d = *dst; + + *dst = MUL_UN8 (s, d, tmp); + + src++; + dst++; + } + } + + _mm_empty (); +} + +static void +mmx_composite_add_n_8_8 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint8_t *dst_line, *dst; + uint8_t *mask_line, *mask; + int dst_stride, mask_stride; + int32_t w; + uint32_t src; + uint8_t sa; + __m64 vsrc, vsrca; + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + + sa = src >> 24; + + if (src == 0) + return; + + vsrc = load8888 (&src); + vsrca = expand_alpha (vsrc); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w && (uintptr_t)dst & 3) + { + uint16_t tmp; + uint16_t a; + uint32_t m, d; + uint32_t r; + + a = *mask++; + d = *dst; + + m = MUL_UN8 (sa, a, tmp); + r = ADD_UN8 (m, d, tmp); + + *dst++ = r; + w--; + } + + while (w >= 4) + { + __m64 vmask; + __m64 vdest; + + vmask = load8888u ((uint32_t *)mask); + vdest = load8888 ((uint32_t *)dst); + + store8888 ((uint32_t *)dst, _mm_adds_pu8 (in (vsrca, vmask), vdest)); + + dst += 4; + mask += 4; + w -= 4; + } + + while (w--) + { + uint16_t tmp; + uint16_t a; + uint32_t m, d; + uint32_t r; + + a = *mask++; + d = *dst; + + m = MUL_UN8 (sa, a, tmp); + r = ADD_UN8 (m, d, tmp); + + *dst++ = r; + } + } + + _mm_empty (); +} + +static void +mmx_composite_add_8_8 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint8_t *dst_line, *dst; + uint8_t *src_line, *src; + int dst_stride, src_stride; + int32_t w; + uint8_t s, d; + uint16_t t; + + CHECKPOINT (); + + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w && (uintptr_t)dst & 7) + { + s = *src; + d = *dst; + t = d + s; + s = t | (0 - (t >> 8)); + *dst = s; + + dst++; + src++; + w--; + } + + while (w >= 8) + { + *(__m64*)dst = _mm_adds_pu8 (ldq_u ((__m64 *)src), *(__m64*)dst); + dst += 8; + src += 8; + w -= 8; + } + + while (w) + { + s = *src; + d = *dst; + t = d + s; + s = t | (0 - (t >> 8)); + *dst = s; + + dst++; + src++; + w--; + } + } + + _mm_empty (); +} + +static void +mmx_composite_add_0565_0565 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint16_t *dst_line, *dst; + uint32_t d; + uint16_t *src_line, *src; + uint32_t s; + int dst_stride, src_stride; + int32_t w; + + CHECKPOINT (); + + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint16_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w && (uintptr_t)dst & 7) + { + s = *src++; + if (s) + { + d = *dst; + s = convert_0565_to_8888 (s); + if (d) + { + d = convert_0565_to_8888 (d); + UN8x4_ADD_UN8x4 (s, d); + } + *dst = convert_8888_to_0565 (s); + } + dst++; + w--; + } + + while (w >= 4) + { + __m64 vdest = *(__m64 *)dst; + __m64 vsrc = ldq_u ((__m64 *)src); + __m64 vd0, vd1; + __m64 vs0, vs1; + + expand_4xpacked565 (vdest, &vd0, &vd1, 0); + expand_4xpacked565 (vsrc, &vs0, &vs1, 0); + + vd0 = _mm_adds_pu8 (vd0, vs0); + vd1 = _mm_adds_pu8 (vd1, vs1); + + *(__m64 *)dst = pack_4xpacked565 (vd0, vd1); + + dst += 4; + src += 4; + w -= 4; + } + + while (w--) + { + s = *src++; + if (s) + { + d = *dst; + s = convert_0565_to_8888 (s); + if (d) + { + d = convert_0565_to_8888 (d); + UN8x4_ADD_UN8x4 (s, d); + } + *dst = convert_8888_to_0565 (s); + } + dst++; + } + } + + _mm_empty (); +} + +static void +mmx_composite_add_8888_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; + int dst_stride, src_stride; + int32_t w; + + CHECKPOINT (); + + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w && (uintptr_t)dst & 7) + { + store (dst, _mm_adds_pu8 (load ((const uint32_t *)src), + load ((const uint32_t *)dst))); + dst++; + src++; + w--; + } + + while (w >= 2) + { + *(__m64 *)dst = _mm_adds_pu8 (ldq_u ((__m64 *)src), *(__m64*)dst); + dst += 2; + src += 2; + w -= 2; + } + + if (w) + { + store (dst, _mm_adds_pu8 (load ((const uint32_t *)src), + load ((const uint32_t *)dst))); + + } + } + + _mm_empty (); +} + +static pixman_bool_t +mmx_blt (pixman_implementation_t *imp, + uint32_t * src_bits, + uint32_t * dst_bits, + int src_stride, + int dst_stride, + int src_bpp, + int dst_bpp, + int src_x, + int src_y, + int dest_x, + int dest_y, + int width, + int height) +{ + uint8_t * src_bytes; + uint8_t * dst_bytes; + int byte_width; + + if (src_bpp != dst_bpp) + return FALSE; + + if (src_bpp == 16) + { + src_stride = src_stride * (int) sizeof (uint32_t) / 2; + dst_stride = dst_stride * (int) sizeof (uint32_t) / 2; + src_bytes = (uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x)); + dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dest_y) + (dest_x)); + byte_width = 2 * width; + src_stride *= 2; + dst_stride *= 2; + } + else if (src_bpp == 32) + { + src_stride = src_stride * (int) sizeof (uint32_t) / 4; + dst_stride = dst_stride * (int) sizeof (uint32_t) / 4; + src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x)); + dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dest_y) + (dest_x)); + byte_width = 4 * width; + src_stride *= 4; + dst_stride *= 4; + } + else + { + return FALSE; + } + + while (height--) + { + int w; + uint8_t *s = src_bytes; + uint8_t *d = dst_bytes; + src_bytes += src_stride; + dst_bytes += dst_stride; + w = byte_width; + + if (w >= 1 && ((uintptr_t)d & 1)) + { + *(uint8_t *)d = *(uint8_t *)s; + w -= 1; + s += 1; + d += 1; + } + + if (w >= 2 && ((uintptr_t)d & 3)) + { + *(uint16_t *)d = *(uint16_t *)s; + w -= 2; + s += 2; + d += 2; + } + + while (w >= 4 && ((uintptr_t)d & 7)) + { + *(uint32_t *)d = ldl_u ((uint32_t *)s); + + w -= 4; + s += 4; + d += 4; + } + + while (w >= 64) + { +#if (defined (__GNUC__) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))) && defined USE_X86_MMX + __asm__ ( + "movq (%1), %%mm0\n" + "movq 8(%1), %%mm1\n" + "movq 16(%1), %%mm2\n" + "movq 24(%1), %%mm3\n" + "movq 32(%1), %%mm4\n" + "movq 40(%1), %%mm5\n" + "movq 48(%1), %%mm6\n" + "movq 56(%1), %%mm7\n" + + "movq %%mm0, (%0)\n" + "movq %%mm1, 8(%0)\n" + "movq %%mm2, 16(%0)\n" + "movq %%mm3, 24(%0)\n" + "movq %%mm4, 32(%0)\n" + "movq %%mm5, 40(%0)\n" + "movq %%mm6, 48(%0)\n" + "movq %%mm7, 56(%0)\n" + : + : "r" (d), "r" (s) + : "memory", + "%mm0", "%mm1", "%mm2", "%mm3", + "%mm4", "%mm5", "%mm6", "%mm7"); +#else + __m64 v0 = ldq_u ((__m64 *)(s + 0)); + __m64 v1 = ldq_u ((__m64 *)(s + 8)); + __m64 v2 = ldq_u ((__m64 *)(s + 16)); + __m64 v3 = ldq_u ((__m64 *)(s + 24)); + __m64 v4 = ldq_u ((__m64 *)(s + 32)); + __m64 v5 = ldq_u ((__m64 *)(s + 40)); + __m64 v6 = ldq_u ((__m64 *)(s + 48)); + __m64 v7 = ldq_u ((__m64 *)(s + 56)); + *(__m64 *)(d + 0) = v0; + *(__m64 *)(d + 8) = v1; + *(__m64 *)(d + 16) = v2; + *(__m64 *)(d + 24) = v3; + *(__m64 *)(d + 32) = v4; + *(__m64 *)(d + 40) = v5; + *(__m64 *)(d + 48) = v6; + *(__m64 *)(d + 56) = v7; +#endif + + w -= 64; + s += 64; + d += 64; + } + while (w >= 4) + { + *(uint32_t *)d = ldl_u ((uint32_t *)s); + + w -= 4; + s += 4; + d += 4; + } + if (w >= 2) + { + *(uint16_t *)d = *(uint16_t *)s; + w -= 2; + s += 2; + d += 2; + } + } + + _mm_empty (); + + return TRUE; +} + +static void +mmx_composite_copy_area (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + + mmx_blt (imp, src_image->bits.bits, + dest_image->bits.bits, + src_image->bits.rowstride, + dest_image->bits.rowstride, + PIXMAN_FORMAT_BPP (src_image->bits.format), + PIXMAN_FORMAT_BPP (dest_image->bits.format), + src_x, src_y, dest_x, dest_y, width, height); +} + +static void +mmx_composite_over_x888_8_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *src, *src_line; + uint32_t *dst, *dst_line; + uint8_t *mask, *mask_line; + int src_stride, mask_stride, dst_stride; + int32_t w; + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + + while (height--) + { + src = src_line; + src_line += src_stride; + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + + w = width; + + while (w--) + { + uint64_t m = *mask; + + if (m) + { + uint32_t ssrc = *src | 0xff000000; + __m64 s = load8888 (&ssrc); + + if (m == 0xff) + { + store8888 (dst, s); + } + else + { + __m64 sa = expand_alpha (s); + __m64 vm = expand_alpha_rev (to_m64 (m)); + __m64 vdest = in_over (s, sa, vm, load8888 (dst)); + + store8888 (dst, vdest); + } + } + + mask++; + dst++; + src++; + } + } + + _mm_empty (); +} + +static void +mmx_composite_over_reverse_n_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src; + uint32_t *dst_line, *dst; + int32_t w; + int dst_stride; + __m64 vsrc; + + CHECKPOINT (); + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + + vsrc = load8888 (&src); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + w = width; + + CHECKPOINT (); + + while (w && (uintptr_t)dst & 7) + { + __m64 vdest = load8888 (dst); + + store8888 (dst, over (vdest, expand_alpha (vdest), vsrc)); + + w--; + dst++; + } + + while (w >= 2) + { + __m64 vdest = *(__m64 *)dst; + __m64 dest0 = expand8888 (vdest, 0); + __m64 dest1 = expand8888 (vdest, 1); + + + dest0 = over (dest0, expand_alpha (dest0), vsrc); + dest1 = over (dest1, expand_alpha (dest1), vsrc); + + *(__m64 *)dst = pack8888 (dest0, dest1); + + dst += 2; + w -= 2; + } + + CHECKPOINT (); + + if (w) + { + __m64 vdest = load8888 (dst); + + store8888 (dst, over (vdest, expand_alpha (vdest), vsrc)); + } + } + + _mm_empty (); +} + +#define BSHIFT ((1 << BILINEAR_INTERPOLATION_BITS)) +#define BMSK (BSHIFT - 1) + +#define BILINEAR_DECLARE_VARIABLES \ + const __m64 mm_wt = _mm_set_pi16 (wt, wt, wt, wt); \ + const __m64 mm_wb = _mm_set_pi16 (wb, wb, wb, wb); \ + const __m64 mm_addc7 = _mm_set_pi16 (0, 1, 0, 1); \ + const __m64 mm_xorc7 = _mm_set_pi16 (0, BMSK, 0, BMSK); \ + const __m64 mm_ux = _mm_set_pi16 (unit_x, unit_x, unit_x, unit_x); \ + const __m64 mm_zero = _mm_setzero_si64 (); \ + __m64 mm_x = _mm_set_pi16 (vx, vx, vx, vx) + +#define BILINEAR_INTERPOLATE_ONE_PIXEL(pix) \ +do { \ + /* fetch 2x2 pixel block into 2 mmx registers */ \ + __m64 t = ldq_u ((__m64 *)&src_top [pixman_fixed_to_int (vx)]); \ + __m64 b = ldq_u ((__m64 *)&src_bottom [pixman_fixed_to_int (vx)]); \ + /* vertical interpolation */ \ + __m64 t_hi = _mm_mullo_pi16 (_mm_unpackhi_pi8 (t, mm_zero), mm_wt); \ + __m64 t_lo = _mm_mullo_pi16 (_mm_unpacklo_pi8 (t, mm_zero), mm_wt); \ + __m64 b_hi = _mm_mullo_pi16 (_mm_unpackhi_pi8 (b, mm_zero), mm_wb); \ + __m64 b_lo = _mm_mullo_pi16 (_mm_unpacklo_pi8 (b, mm_zero), mm_wb); \ + __m64 hi = _mm_add_pi16 (t_hi, b_hi); \ + __m64 lo = _mm_add_pi16 (t_lo, b_lo); \ + /* calculate horizontal weights */ \ + __m64 mm_wh = _mm_add_pi16 (mm_addc7, _mm_xor_si64 (mm_xorc7, \ + _mm_srli_pi16 (mm_x, \ + 16 - BILINEAR_INTERPOLATION_BITS))); \ + /* horizontal interpolation */ \ + __m64 p = _mm_unpacklo_pi16 (lo, hi); \ + __m64 q = _mm_unpackhi_pi16 (lo, hi); \ + vx += unit_x; \ + lo = _mm_madd_pi16 (p, mm_wh); \ + hi = _mm_madd_pi16 (q, mm_wh); \ + mm_x = _mm_add_pi16 (mm_x, mm_ux); \ + /* shift and pack the result */ \ + hi = _mm_srli_pi32 (hi, BILINEAR_INTERPOLATION_BITS * 2); \ + lo = _mm_srli_pi32 (lo, BILINEAR_INTERPOLATION_BITS * 2); \ + lo = _mm_packs_pi32 (lo, hi); \ + lo = _mm_packs_pu16 (lo, lo); \ + pix = lo; \ +} while (0) + +#define BILINEAR_SKIP_ONE_PIXEL() \ +do { \ + vx += unit_x; \ + mm_x = _mm_add_pi16 (mm_x, mm_ux); \ +} while(0) + +static force_inline void +scaled_bilinear_scanline_mmx_8888_8888_SRC (uint32_t * dst, + const uint32_t * mask, + const uint32_t * src_top, + const uint32_t * src_bottom, + int32_t w, + int wt, + int wb, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + pixman_fixed_t max_vx, + pixman_bool_t zero_src) +{ + BILINEAR_DECLARE_VARIABLES; + __m64 pix; + + while (w--) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix); + store (dst, pix); + dst++; + } + + _mm_empty (); +} + +FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_cover_SRC, + scaled_bilinear_scanline_mmx_8888_8888_SRC, + uint32_t, uint32_t, uint32_t, + COVER, FLAG_NONE) +FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_pad_SRC, + scaled_bilinear_scanline_mmx_8888_8888_SRC, + uint32_t, uint32_t, uint32_t, + PAD, FLAG_NONE) +FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_none_SRC, + scaled_bilinear_scanline_mmx_8888_8888_SRC, + uint32_t, uint32_t, uint32_t, + NONE, FLAG_NONE) +FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_normal_SRC, + scaled_bilinear_scanline_mmx_8888_8888_SRC, + uint32_t, uint32_t, uint32_t, + NORMAL, FLAG_NONE) + +static force_inline void +scaled_bilinear_scanline_mmx_8888_8888_OVER (uint32_t * dst, + const uint32_t * mask, + const uint32_t * src_top, + const uint32_t * src_bottom, + int32_t w, + int wt, + int wb, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + pixman_fixed_t max_vx, + pixman_bool_t zero_src) +{ + BILINEAR_DECLARE_VARIABLES; + __m64 pix1, pix2; + + while (w) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + + if (!is_zero (pix1)) + { + pix2 = load (dst); + store8888 (dst, core_combine_over_u_pixel_mmx (pix1, pix2)); + } + + w--; + dst++; + } + + _mm_empty (); +} + +FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_cover_OVER, + scaled_bilinear_scanline_mmx_8888_8888_OVER, + uint32_t, uint32_t, uint32_t, + COVER, FLAG_NONE) +FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_pad_OVER, + scaled_bilinear_scanline_mmx_8888_8888_OVER, + uint32_t, uint32_t, uint32_t, + PAD, FLAG_NONE) +FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_none_OVER, + scaled_bilinear_scanline_mmx_8888_8888_OVER, + uint32_t, uint32_t, uint32_t, + NONE, FLAG_NONE) +FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_normal_OVER, + scaled_bilinear_scanline_mmx_8888_8888_OVER, + uint32_t, uint32_t, uint32_t, + NORMAL, FLAG_NONE) + +static force_inline void +scaled_bilinear_scanline_mmx_8888_8_8888_OVER (uint32_t * dst, + const uint8_t * mask, + const uint32_t * src_top, + const uint32_t * src_bottom, + int32_t w, + int wt, + int wb, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + pixman_fixed_t max_vx, + pixman_bool_t zero_src) +{ + BILINEAR_DECLARE_VARIABLES; + __m64 pix1, pix2; + uint32_t m; + + while (w) + { + m = (uint32_t) *mask++; + + if (m) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + + if (m == 0xff && is_opaque (pix1)) + { + store (dst, pix1); + } + else + { + __m64 ms, md, ma, msa; + + pix2 = load (dst); + ma = expand_alpha_rev (to_m64 (m)); + ms = _mm_unpacklo_pi8 (pix1, _mm_setzero_si64 ()); + md = _mm_unpacklo_pi8 (pix2, _mm_setzero_si64 ()); + + msa = expand_alpha (ms); + + store8888 (dst, (in_over (ms, msa, ma, md))); + } + } + else + { + BILINEAR_SKIP_ONE_PIXEL (); + } + + w--; + dst++; + } + + _mm_empty (); +} + +FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8_8888_cover_OVER, + scaled_bilinear_scanline_mmx_8888_8_8888_OVER, + uint32_t, uint8_t, uint32_t, + COVER, FLAG_HAVE_NON_SOLID_MASK) +FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8_8888_pad_OVER, + scaled_bilinear_scanline_mmx_8888_8_8888_OVER, + uint32_t, uint8_t, uint32_t, + PAD, FLAG_HAVE_NON_SOLID_MASK) +FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8_8888_none_OVER, + scaled_bilinear_scanline_mmx_8888_8_8888_OVER, + uint32_t, uint8_t, uint32_t, + NONE, FLAG_HAVE_NON_SOLID_MASK) +FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8_8888_normal_OVER, + scaled_bilinear_scanline_mmx_8888_8_8888_OVER, + uint32_t, uint8_t, uint32_t, + NORMAL, FLAG_HAVE_NON_SOLID_MASK) + +static uint32_t * +mmx_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask) +{ + int w = iter->width; + uint32_t *dst = iter->buffer; + uint32_t *src = (uint32_t *)iter->bits; + + iter->bits += iter->stride; + + while (w && ((uintptr_t)dst) & 7) + { + *dst++ = (*src++) | 0xff000000; + w--; + } + + while (w >= 8) + { + __m64 vsrc1 = ldq_u ((__m64 *)(src + 0)); + __m64 vsrc2 = ldq_u ((__m64 *)(src + 2)); + __m64 vsrc3 = ldq_u ((__m64 *)(src + 4)); + __m64 vsrc4 = ldq_u ((__m64 *)(src + 6)); + + *(__m64 *)(dst + 0) = _mm_or_si64 (vsrc1, MC (ff000000)); + *(__m64 *)(dst + 2) = _mm_or_si64 (vsrc2, MC (ff000000)); + *(__m64 *)(dst + 4) = _mm_or_si64 (vsrc3, MC (ff000000)); + *(__m64 *)(dst + 6) = _mm_or_si64 (vsrc4, MC (ff000000)); + + dst += 8; + src += 8; + w -= 8; + } + + while (w) + { + *dst++ = (*src++) | 0xff000000; + w--; + } + + _mm_empty (); + return iter->buffer; +} + +static uint32_t * +mmx_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask) +{ + int w = iter->width; + uint32_t *dst = iter->buffer; + uint16_t *src = (uint16_t *)iter->bits; + + iter->bits += iter->stride; + + while (w && ((uintptr_t)dst) & 0x0f) + { + uint16_t s = *src++; + + *dst++ = convert_0565_to_8888 (s); + w--; + } + + while (w >= 4) + { + __m64 vsrc = ldq_u ((__m64 *)src); + __m64 mm0, mm1; + + expand_4xpacked565 (vsrc, &mm0, &mm1, 1); + + *(__m64 *)(dst + 0) = mm0; + *(__m64 *)(dst + 2) = mm1; + + dst += 4; + src += 4; + w -= 4; + } + + while (w) + { + uint16_t s = *src++; + + *dst++ = convert_0565_to_8888 (s); + w--; + } + + _mm_empty (); + return iter->buffer; +} + +static uint32_t * +mmx_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask) +{ + int w = iter->width; + uint32_t *dst = iter->buffer; + uint8_t *src = iter->bits; + + iter->bits += iter->stride; + + while (w && (((uintptr_t)dst) & 15)) + { + *dst++ = *(src++) << 24; + w--; + } + + while (w >= 8) + { + __m64 mm0 = ldq_u ((__m64 *)src); + + __m64 mm1 = _mm_unpacklo_pi8 (_mm_setzero_si64(), mm0); + __m64 mm2 = _mm_unpackhi_pi8 (_mm_setzero_si64(), mm0); + __m64 mm3 = _mm_unpacklo_pi16 (_mm_setzero_si64(), mm1); + __m64 mm4 = _mm_unpackhi_pi16 (_mm_setzero_si64(), mm1); + __m64 mm5 = _mm_unpacklo_pi16 (_mm_setzero_si64(), mm2); + __m64 mm6 = _mm_unpackhi_pi16 (_mm_setzero_si64(), mm2); + + *(__m64 *)(dst + 0) = mm3; + *(__m64 *)(dst + 2) = mm4; + *(__m64 *)(dst + 4) = mm5; + *(__m64 *)(dst + 6) = mm6; + + dst += 8; + src += 8; + w -= 8; + } + + while (w) + { + *dst++ = *(src++) << 24; + w--; + } + + _mm_empty (); + return iter->buffer; +} + +#define IMAGE_FLAGS \ + (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \ + FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST) + +static const pixman_iter_info_t mmx_iters[] = +{ + { PIXMAN_x8r8g8b8, IMAGE_FLAGS, ITER_NARROW, + _pixman_iter_init_bits_stride, mmx_fetch_x8r8g8b8, NULL + }, + { PIXMAN_r5g6b5, IMAGE_FLAGS, ITER_NARROW, + _pixman_iter_init_bits_stride, mmx_fetch_r5g6b5, NULL + }, + { PIXMAN_a8, IMAGE_FLAGS, ITER_NARROW, + _pixman_iter_init_bits_stride, mmx_fetch_a8, NULL + }, + { PIXMAN_null }, +}; + +static const pixman_fast_path_t mmx_fast_paths[] = +{ + PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, mmx_composite_over_n_8_0565 ), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, mmx_composite_over_n_8_0565 ), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, mmx_composite_over_n_8_8888 ), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, mmx_composite_over_n_8_8888 ), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, mmx_composite_over_n_8_8888 ), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, mmx_composite_over_n_8_8888 ), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, mmx_composite_over_n_8888_8888_ca ), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, mmx_composite_over_n_8888_8888_ca ), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, mmx_composite_over_n_8888_0565_ca ), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, mmx_composite_over_n_8888_8888_ca ), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, mmx_composite_over_n_8888_8888_ca ), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, mmx_composite_over_n_8888_0565_ca ), + PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, a8r8g8b8, mmx_composite_over_pixbuf_8888 ), + PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, x8r8g8b8, mmx_composite_over_pixbuf_8888 ), + PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, r5g6b5, mmx_composite_over_pixbuf_0565 ), + PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, a8b8g8r8, mmx_composite_over_pixbuf_8888 ), + PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, x8b8g8r8, mmx_composite_over_pixbuf_8888 ), + PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, b5g6r5, mmx_composite_over_pixbuf_0565 ), + PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, solid, a8r8g8b8, mmx_composite_over_x888_n_8888 ), + PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, solid, x8r8g8b8, mmx_composite_over_x888_n_8888 ), + PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, solid, a8b8g8r8, mmx_composite_over_x888_n_8888 ), + PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, solid, x8b8g8r8, mmx_composite_over_x888_n_8888 ), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, mmx_composite_over_8888_n_8888 ), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, mmx_composite_over_8888_n_8888 ), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, mmx_composite_over_8888_n_8888 ), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, mmx_composite_over_8888_n_8888 ), + PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, mmx_composite_over_x888_8_8888 ), + PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, mmx_composite_over_x888_8_8888 ), + PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, mmx_composite_over_x888_8_8888 ), + PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, mmx_composite_over_x888_8_8888 ), + PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, mmx_composite_over_n_8888 ), + PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, mmx_composite_over_n_8888 ), + PIXMAN_STD_FAST_PATH (OVER, solid, null, r5g6b5, mmx_composite_over_n_0565 ), + PIXMAN_STD_FAST_PATH (OVER, solid, null, b5g6r5, mmx_composite_over_n_0565 ), + PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, x8r8g8b8, mmx_composite_copy_area ), + PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, x8b8g8r8, mmx_composite_copy_area ), + + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, mmx_composite_over_8888_8888 ), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, mmx_composite_over_8888_8888 ), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, mmx_composite_over_8888_0565 ), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, mmx_composite_over_8888_8888 ), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, mmx_composite_over_8888_8888 ), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, mmx_composite_over_8888_0565 ), + + PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, mmx_composite_over_reverse_n_8888), + PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, mmx_composite_over_reverse_n_8888), + + PIXMAN_STD_FAST_PATH (ADD, r5g6b5, null, r5g6b5, mmx_composite_add_0565_0565 ), + PIXMAN_STD_FAST_PATH (ADD, b5g6r5, null, b5g6r5, mmx_composite_add_0565_0565 ), + PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, mmx_composite_add_8888_8888 ), + PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, mmx_composite_add_8888_8888 ), + PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, mmx_composite_add_8_8 ), + PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, mmx_composite_add_n_8_8 ), + + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, mmx_composite_src_x888_0565 ), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, mmx_composite_src_x888_0565 ), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, mmx_composite_src_x888_0565 ), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, mmx_composite_src_x888_0565 ), + PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8r8g8b8, mmx_composite_src_n_8_8888 ), + PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8r8g8b8, mmx_composite_src_n_8_8888 ), + PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8b8g8r8, mmx_composite_src_n_8_8888 ), + PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8b8g8r8, mmx_composite_src_n_8_8888 ), + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, mmx_composite_copy_area ), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, mmx_composite_copy_area ), + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, mmx_composite_copy_area ), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, mmx_composite_copy_area ), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, mmx_composite_copy_area ), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, mmx_composite_copy_area ), + PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, mmx_composite_copy_area ), + PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, mmx_composite_copy_area ), + + PIXMAN_STD_FAST_PATH (IN, a8, null, a8, mmx_composite_in_8_8 ), + PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, mmx_composite_in_n_8_8 ), + + SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, mmx_8888_8888 ), + SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, mmx_8888_8888 ), + SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, mmx_8888_8888 ), + SIMPLE_BILINEAR_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, mmx_8888_8888 ), + SIMPLE_BILINEAR_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, mmx_8888_8888 ), + SIMPLE_BILINEAR_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, mmx_8888_8888 ), + + SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mmx_8888_8888 ), + SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, mmx_8888_8888 ), + SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mmx_8888_8888 ), + SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, mmx_8888_8888 ), + + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mmx_8888_8_8888 ), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, mmx_8888_8_8888 ), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mmx_8888_8_8888 ), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, mmx_8888_8_8888 ), + + { PIXMAN_OP_NONE }, +}; + +pixman_implementation_t * +_pixman_implementation_create_mmx (pixman_implementation_t *fallback) +{ + pixman_implementation_t *imp = _pixman_implementation_create (fallback, mmx_fast_paths); + + imp->combine_32[PIXMAN_OP_OVER] = mmx_combine_over_u; + imp->combine_32[PIXMAN_OP_OVER_REVERSE] = mmx_combine_over_reverse_u; + imp->combine_32[PIXMAN_OP_IN] = mmx_combine_in_u; + imp->combine_32[PIXMAN_OP_IN_REVERSE] = mmx_combine_in_reverse_u; + imp->combine_32[PIXMAN_OP_OUT] = mmx_combine_out_u; + imp->combine_32[PIXMAN_OP_OUT_REVERSE] = mmx_combine_out_reverse_u; + imp->combine_32[PIXMAN_OP_ATOP] = mmx_combine_atop_u; + imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = mmx_combine_atop_reverse_u; + imp->combine_32[PIXMAN_OP_XOR] = mmx_combine_xor_u; + imp->combine_32[PIXMAN_OP_ADD] = mmx_combine_add_u; + imp->combine_32[PIXMAN_OP_SATURATE] = mmx_combine_saturate_u; + + imp->combine_32_ca[PIXMAN_OP_SRC] = mmx_combine_src_ca; + imp->combine_32_ca[PIXMAN_OP_OVER] = mmx_combine_over_ca; + imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = mmx_combine_over_reverse_ca; + imp->combine_32_ca[PIXMAN_OP_IN] = mmx_combine_in_ca; + imp->combine_32_ca[PIXMAN_OP_IN_REVERSE] = mmx_combine_in_reverse_ca; + imp->combine_32_ca[PIXMAN_OP_OUT] = mmx_combine_out_ca; + imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = mmx_combine_out_reverse_ca; + imp->combine_32_ca[PIXMAN_OP_ATOP] = mmx_combine_atop_ca; + imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = mmx_combine_atop_reverse_ca; + imp->combine_32_ca[PIXMAN_OP_XOR] = mmx_combine_xor_ca; + imp->combine_32_ca[PIXMAN_OP_ADD] = mmx_combine_add_ca; + + imp->blt = mmx_blt; + imp->fill = mmx_fill; + + imp->iter_info = mmx_iters; + + return imp; +} + +#endif /* USE_X86_MMX || USE_ARM_IWMMXT || USE_LOONGSON_MMI */ diff --git a/src/pixman/pixman/pixman-noop.c b/src/pixman/pixman/pixman-noop.c new file mode 100644 index 0000000..e598904 --- /dev/null +++ b/src/pixman/pixman/pixman-noop.c @@ -0,0 +1,161 @@ +/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */ +/* + * Copyright © 2011 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif +#include <string.h> +#include <stdlib.h> +#include "pixman-private.h" +#include "pixman-combine32.h" +#include "pixman-inlines.h" + +static void +noop_composite (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + return; +} + +static uint32_t * +noop_get_scanline (pixman_iter_t *iter, const uint32_t *mask) +{ + uint32_t *result = iter->buffer; + + iter->buffer += iter->image->bits.rowstride; + + return result; +} + +static void +noop_init_solid_narrow (pixman_iter_t *iter, + const pixman_iter_info_t *info) +{ + pixman_image_t *image = iter->image; + uint32_t *buffer = iter->buffer; + uint32_t *end = buffer + iter->width; + uint32_t color; + + if (iter->image->type == SOLID) + color = image->solid.color_32; + else + color = image->bits.fetch_pixel_32 (&image->bits, 0, 0); + + while (buffer < end) + *(buffer++) = color; +} + +static void +noop_init_solid_wide (pixman_iter_t *iter, + const pixman_iter_info_t *info) +{ + pixman_image_t *image = iter->image; + argb_t *buffer = (argb_t *)iter->buffer; + argb_t *end = buffer + iter->width; + argb_t color; + + if (iter->image->type == SOLID) + color = image->solid.color_float; + else + color = image->bits.fetch_pixel_float (&image->bits, 0, 0); + + while (buffer < end) + *(buffer++) = color; +} + +static void +noop_init_direct_buffer (pixman_iter_t *iter, const pixman_iter_info_t *info) +{ + pixman_image_t *image = iter->image; + + iter->buffer = + image->bits.bits + iter->y * image->bits.rowstride + iter->x; +} + +static void +dest_write_back_direct (pixman_iter_t *iter) +{ + iter->buffer += iter->image->bits.rowstride; +} + +static const pixman_iter_info_t noop_iters[] = +{ + /* Source iters */ + { PIXMAN_any, + 0, ITER_IGNORE_ALPHA | ITER_IGNORE_RGB | ITER_SRC, + NULL, + _pixman_iter_get_scanline_noop, + NULL + }, + { PIXMAN_solid, + FAST_PATH_NO_ALPHA_MAP, ITER_NARROW | ITER_SRC, + noop_init_solid_narrow, + _pixman_iter_get_scanline_noop, + NULL, + }, + { PIXMAN_solid, + FAST_PATH_NO_ALPHA_MAP, ITER_WIDE | ITER_SRC, + noop_init_solid_wide, + _pixman_iter_get_scanline_noop, + NULL + }, + { PIXMAN_a8r8g8b8, + FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | + FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST, + ITER_NARROW | ITER_SRC, + noop_init_direct_buffer, + noop_get_scanline, + NULL + }, + /* Dest iters */ + { PIXMAN_a8r8g8b8, + FAST_PATH_STD_DEST_FLAGS, ITER_NARROW | ITER_DEST, + noop_init_direct_buffer, + _pixman_iter_get_scanline_noop, + dest_write_back_direct + }, + { PIXMAN_x8r8g8b8, + FAST_PATH_STD_DEST_FLAGS, ITER_NARROW | ITER_DEST | ITER_LOCALIZED_ALPHA, + noop_init_direct_buffer, + _pixman_iter_get_scanline_noop, + dest_write_back_direct + }, + { PIXMAN_null }, +}; + +static const pixman_fast_path_t noop_fast_paths[] = +{ + { PIXMAN_OP_DST, PIXMAN_any, 0, PIXMAN_any, 0, PIXMAN_any, 0, noop_composite }, + { PIXMAN_OP_NONE }, +}; + +pixman_implementation_t * +_pixman_implementation_create_noop (pixman_implementation_t *fallback) +{ + pixman_implementation_t *imp = + _pixman_implementation_create (fallback, noop_fast_paths); + + imp->iter_info = noop_iters; + + return imp; +} diff --git a/src/pixman/pixman/pixman-ppc.c b/src/pixman/pixman/pixman-ppc.c new file mode 100644 index 0000000..a6e7bb0 --- /dev/null +++ b/src/pixman/pixman/pixman-ppc.c @@ -0,0 +1,155 @@ +/* + * Copyright © 2000 SuSE, Inc. + * Copyright © 2007 Red Hat, Inc. + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of SuSE not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. SuSE makes no representations about the + * suitability of this software for any purpose. It is provided "as is" + * without express or implied warranty. + * + * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE + * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include "pixman-private.h" + +#ifdef USE_VMX + +/* The CPU detection code needs to be in a file not compiled with + * "-maltivec -mabi=altivec", as gcc would try to save vector register + * across function calls causing SIGILL on cpus without Altivec/vmx. + */ +#ifdef __APPLE__ +#include <sys/sysctl.h> + +static pixman_bool_t +pixman_have_vmx (void) +{ + int error, have_vmx; + size_t length = sizeof(have_vmx); + + error = sysctlbyname ("hw.optional.altivec", &have_vmx, &length, NULL, 0); + + if (error) + return FALSE; + + return have_vmx; +} + +#elif defined (__OpenBSD__) +#include <sys/param.h> +#include <sys/sysctl.h> +#include <machine/cpu.h> + +static pixman_bool_t +pixman_have_vmx (void) +{ + int error, have_vmx; + int mib[2] = { CTL_MACHDEP, CPU_ALTIVEC }; + size_t length = sizeof(have_vmx); + + error = sysctl (mib, 2, &have_vmx, &length, NULL, 0); + + if (error != 0) + return FALSE; + + return have_vmx; +} + +#elif defined (__linux__) + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <stdio.h> +#include <linux/auxvec.h> +#include <asm/cputable.h> + +static pixman_bool_t +pixman_have_vmx (void) +{ + int have_vmx = FALSE; + int fd; + struct + { + unsigned long type; + unsigned long value; + } aux; + + fd = open ("/proc/self/auxv", O_RDONLY); + if (fd >= 0) + { + while (read (fd, &aux, sizeof (aux)) == sizeof (aux)) + { + if (aux.type == AT_HWCAP && (aux.value & PPC_FEATURE_HAS_ALTIVEC)) + { + have_vmx = TRUE; + break; + } + } + + close (fd); + } + + return have_vmx; +} + +#else /* !__APPLE__ && !__OpenBSD__ && !__linux__ */ +#include <signal.h> +#include <setjmp.h> + +static jmp_buf jump_env; + +static void +vmx_test (int sig, + siginfo_t *si, + void * unused) +{ + longjmp (jump_env, 1); +} + +static pixman_bool_t +pixman_have_vmx (void) +{ + struct sigaction sa, osa; + int jmp_result; + + sa.sa_flags = SA_SIGINFO; + sigemptyset (&sa.sa_mask); + sa.sa_sigaction = vmx_test; + sigaction (SIGILL, &sa, &osa); + jmp_result = setjmp (jump_env); + if (jmp_result == 0) + { + asm volatile ( "vor 0, 0, 0" ); + } + sigaction (SIGILL, &osa, NULL); + return (jmp_result == 0); +} + +#endif /* __APPLE__ */ +#endif /* USE_VMX */ + +pixman_implementation_t * +_pixman_ppc_get_implementations (pixman_implementation_t *imp) +{ +#ifdef USE_VMX + if (!_pixman_disabled ("vmx") && pixman_have_vmx ()) + imp = _pixman_implementation_create_vmx (imp); +#endif + + return imp; +} diff --git a/src/pixman/pixman/pixman-private.h b/src/pixman/pixman/pixman-private.h new file mode 100644 index 0000000..6ca13b2 --- /dev/null +++ b/src/pixman/pixman/pixman-private.h @@ -0,0 +1,1153 @@ +#include <float.h> + +#ifndef PIXMAN_PRIVATE_H +#define PIXMAN_PRIVATE_H + +/* + * The defines which are shared between C and assembly code + */ + +/* bilinear interpolation precision (must be <= 8) */ +#define BILINEAR_INTERPOLATION_BITS 7 +#define BILINEAR_INTERPOLATION_RANGE (1 << BILINEAR_INTERPOLATION_BITS) + +/* + * C specific part + */ + +#ifndef __ASSEMBLER__ + +#ifndef PACKAGE +# error config.h must be included before pixman-private.h +#endif + +#define PIXMAN_DISABLE_DEPRECATED +#define PIXMAN_USE_INTERNAL_API + +#include "pixman.h" +#include <time.h> +#include <assert.h> +#include <stdio.h> +#include <string.h> +#include <stddef.h> + +#include "pixman-compiler.h" + +/* + * Images + */ +typedef struct image_common image_common_t; +typedef struct solid_fill solid_fill_t; +typedef struct gradient gradient_t; +typedef struct linear_gradient linear_gradient_t; +typedef struct horizontal_gradient horizontal_gradient_t; +typedef struct vertical_gradient vertical_gradient_t; +typedef struct conical_gradient conical_gradient_t; +typedef struct radial_gradient radial_gradient_t; +typedef struct bits_image bits_image_t; +typedef struct circle circle_t; + +typedef struct argb_t argb_t; + +struct argb_t +{ + float a; + float r; + float g; + float b; +}; + +typedef void (*fetch_scanline_t) (bits_image_t *image, + int x, + int y, + int width, + uint32_t *buffer, + const uint32_t *mask); + +typedef uint32_t (*fetch_pixel_32_t) (bits_image_t *image, + int x, + int y); + +typedef argb_t (*fetch_pixel_float_t) (bits_image_t *image, + int x, + int y); + +typedef void (*store_scanline_t) (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *values); + +typedef enum +{ + BITS, + LINEAR, + CONICAL, + RADIAL, + SOLID +} image_type_t; + +typedef void (*property_changed_func_t) (pixman_image_t *image); + +struct image_common +{ + image_type_t type; + int32_t ref_count; + pixman_region32_t clip_region; + int32_t alpha_count; /* How many times this image is being used as an alpha map */ + pixman_bool_t have_clip_region; /* FALSE if there is no clip */ + pixman_bool_t client_clip; /* Whether the source clip was + set by a client */ + pixman_bool_t clip_sources; /* Whether the clip applies when + * the image is used as a source + */ + pixman_bool_t dirty; + pixman_transform_t * transform; + pixman_repeat_t repeat; + pixman_filter_t filter; + pixman_fixed_t * filter_params; + int n_filter_params; + bits_image_t * alpha_map; + int alpha_origin_x; + int alpha_origin_y; + pixman_bool_t component_alpha; + property_changed_func_t property_changed; + + pixman_image_destroy_func_t destroy_func; + void * destroy_data; + + uint32_t flags; + pixman_format_code_t extended_format_code; +}; + +struct solid_fill +{ + image_common_t common; + pixman_color_t color; + + uint32_t color_32; + argb_t color_float; +}; + +struct gradient +{ + image_common_t common; + int n_stops; + pixman_gradient_stop_t *stops; +}; + +struct linear_gradient +{ + gradient_t common; + pixman_point_fixed_t p1; + pixman_point_fixed_t p2; +}; + +struct circle +{ + pixman_fixed_t x; + pixman_fixed_t y; + pixman_fixed_t radius; +}; + +struct radial_gradient +{ + gradient_t common; + + circle_t c1; + circle_t c2; + + circle_t delta; + double a; + double inva; + double mindr; +}; + +struct conical_gradient +{ + gradient_t common; + pixman_point_fixed_t center; + double angle; +}; + +struct bits_image +{ + image_common_t common; + pixman_format_code_t format; + const pixman_indexed_t * indexed; + int width; + int height; + uint32_t * bits; + uint32_t * free_me; + int rowstride; /* in number of uint32_t's */ + + fetch_scanline_t fetch_scanline_32; + fetch_pixel_32_t fetch_pixel_32; + store_scanline_t store_scanline_32; + + fetch_scanline_t fetch_scanline_float; + fetch_pixel_float_t fetch_pixel_float; + store_scanline_t store_scanline_float; + + /* Used for indirect access to the bits */ + pixman_read_memory_func_t read_func; + pixman_write_memory_func_t write_func; +}; + +union pixman_image +{ + image_type_t type; + image_common_t common; + bits_image_t bits; + gradient_t gradient; + linear_gradient_t linear; + conical_gradient_t conical; + radial_gradient_t radial; + solid_fill_t solid; +}; + +typedef struct pixman_iter_t pixman_iter_t; +typedef uint32_t *(* pixman_iter_get_scanline_t) (pixman_iter_t *iter, const uint32_t *mask); +typedef void (* pixman_iter_write_back_t) (pixman_iter_t *iter); +typedef void (* pixman_iter_fini_t) (pixman_iter_t *iter); + +typedef enum +{ + ITER_NARROW = (1 << 0), + ITER_WIDE = (1 << 1), + + /* "Localized alpha" is when the alpha channel is used only to compute + * the alpha value of the destination. This means that the computation + * of the RGB values of the result is independent of the alpha value. + * + * For example, the OVER operator has localized alpha for the + * destination, because the RGB values of the result can be computed + * without knowing the destination alpha. Similarly, ADD has localized + * alpha for both source and destination because the RGB values of the + * result can be computed without knowing the alpha value of source or + * destination. + * + * When he destination is xRGB, this is useful knowledge, because then + * we can treat it as if it were ARGB, which means in some cases we can + * avoid copying it to a temporary buffer. + */ + ITER_LOCALIZED_ALPHA = (1 << 2), + ITER_IGNORE_ALPHA = (1 << 3), + ITER_IGNORE_RGB = (1 << 4), + + /* These indicate whether the iterator is for a source + * or a destination image + */ + ITER_SRC = (1 << 5), + ITER_DEST = (1 << 6) +} iter_flags_t; + +struct pixman_iter_t +{ + /* These are initialized by _pixman_implementation_{src,dest}_init */ + pixman_image_t * image; + uint32_t * buffer; + int x, y; + int width; + int height; + iter_flags_t iter_flags; + uint32_t image_flags; + + /* These function pointers are initialized by the implementation */ + pixman_iter_get_scanline_t get_scanline; + pixman_iter_write_back_t write_back; + pixman_iter_fini_t fini; + + /* These fields are scratch data that implementations can use */ + void * data; + uint8_t * bits; + int stride; +}; + +typedef struct pixman_iter_info_t pixman_iter_info_t; +typedef void (* pixman_iter_initializer_t) (pixman_iter_t *iter, + const pixman_iter_info_t *info); +struct pixman_iter_info_t +{ + pixman_format_code_t format; + uint32_t image_flags; + iter_flags_t iter_flags; + pixman_iter_initializer_t initializer; + pixman_iter_get_scanline_t get_scanline; + pixman_iter_write_back_t write_back; +}; + +void +_pixman_bits_image_setup_accessors (bits_image_t *image); + +void +_pixman_bits_image_src_iter_init (pixman_image_t *image, pixman_iter_t *iter); + +void +_pixman_bits_image_dest_iter_init (pixman_image_t *image, pixman_iter_t *iter); + +void +_pixman_linear_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter); + +void +_pixman_radial_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter); + +void +_pixman_conical_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter); + +void +_pixman_image_init (pixman_image_t *image); + +pixman_bool_t +_pixman_bits_image_init (pixman_image_t * image, + pixman_format_code_t format, + int width, + int height, + uint32_t * bits, + int rowstride, + pixman_bool_t clear); +pixman_bool_t +_pixman_image_fini (pixman_image_t *image); + +pixman_image_t * +_pixman_image_allocate (void); + +pixman_bool_t +_pixman_init_gradient (gradient_t * gradient, + const pixman_gradient_stop_t *stops, + int n_stops); +void +_pixman_image_reset_clip_region (pixman_image_t *image); + +void +_pixman_image_validate (pixman_image_t *image); + +#define PIXMAN_IMAGE_GET_LINE(image, x, y, type, out_stride, line, mul) \ + do \ + { \ + uint32_t *__bits__; \ + int __stride__; \ + \ + __bits__ = image->bits.bits; \ + __stride__ = image->bits.rowstride; \ + (out_stride) = \ + __stride__ * (int) sizeof (uint32_t) / (int) sizeof (type); \ + (line) = \ + ((type *) __bits__) + (out_stride) * (y) + (mul) * (x); \ + } while (0) + +/* + * Gradient walker + */ +typedef struct +{ + float a_s, a_b; + float r_s, r_b; + float g_s, g_b; + float b_s, b_b; + pixman_fixed_t left_x; + pixman_fixed_t right_x; + + pixman_gradient_stop_t *stops; + int num_stops; + pixman_repeat_t repeat; + + pixman_bool_t need_reset; +} pixman_gradient_walker_t; + +void +_pixman_gradient_walker_init (pixman_gradient_walker_t *walker, + gradient_t * gradient, + pixman_repeat_t repeat); + +void +_pixman_gradient_walker_reset (pixman_gradient_walker_t *walker, + pixman_fixed_48_16_t pos); + +uint32_t +_pixman_gradient_walker_pixel (pixman_gradient_walker_t *walker, + pixman_fixed_48_16_t x); + +/* + * Edges + */ + +#define MAX_ALPHA(n) ((1 << (n)) - 1) +#define N_Y_FRAC(n) ((n) == 1 ? 1 : (1 << ((n) / 2)) - 1) +#define N_X_FRAC(n) ((n) == 1 ? 1 : (1 << ((n) / 2)) + 1) + +#define STEP_Y_SMALL(n) (pixman_fixed_1 / N_Y_FRAC (n)) +#define STEP_Y_BIG(n) (pixman_fixed_1 - (N_Y_FRAC (n) - 1) * STEP_Y_SMALL (n)) + +#define Y_FRAC_FIRST(n) (STEP_Y_BIG (n) / 2) +#define Y_FRAC_LAST(n) (Y_FRAC_FIRST (n) + (N_Y_FRAC (n) - 1) * STEP_Y_SMALL (n)) + +#define STEP_X_SMALL(n) (pixman_fixed_1 / N_X_FRAC (n)) +#define STEP_X_BIG(n) (pixman_fixed_1 - (N_X_FRAC (n) - 1) * STEP_X_SMALL (n)) + +#define X_FRAC_FIRST(n) (STEP_X_BIG (n) / 2) +#define X_FRAC_LAST(n) (X_FRAC_FIRST (n) + (N_X_FRAC (n) - 1) * STEP_X_SMALL (n)) + +#define RENDER_SAMPLES_X(x, n) \ + ((n) == 1? 0 : (pixman_fixed_frac (x) + \ + X_FRAC_FIRST (n)) / STEP_X_SMALL (n)) + +void +pixman_rasterize_edges_accessors (pixman_image_t *image, + pixman_edge_t * l, + pixman_edge_t * r, + pixman_fixed_t t, + pixman_fixed_t b); + +/* + * Implementations + */ +typedef struct pixman_implementation_t pixman_implementation_t; + +typedef struct +{ + pixman_op_t op; + pixman_image_t * src_image; + pixman_image_t * mask_image; + pixman_image_t * dest_image; + int32_t src_x; + int32_t src_y; + int32_t mask_x; + int32_t mask_y; + int32_t dest_x; + int32_t dest_y; + int32_t width; + int32_t height; + + uint32_t src_flags; + uint32_t mask_flags; + uint32_t dest_flags; +} pixman_composite_info_t; + +#define PIXMAN_COMPOSITE_ARGS(info) \ + MAYBE_UNUSED pixman_op_t op = info->op; \ + MAYBE_UNUSED pixman_image_t * src_image = info->src_image; \ + MAYBE_UNUSED pixman_image_t * mask_image = info->mask_image; \ + MAYBE_UNUSED pixman_image_t * dest_image = info->dest_image; \ + MAYBE_UNUSED int32_t src_x = info->src_x; \ + MAYBE_UNUSED int32_t src_y = info->src_y; \ + MAYBE_UNUSED int32_t mask_x = info->mask_x; \ + MAYBE_UNUSED int32_t mask_y = info->mask_y; \ + MAYBE_UNUSED int32_t dest_x = info->dest_x; \ + MAYBE_UNUSED int32_t dest_y = info->dest_y; \ + MAYBE_UNUSED int32_t width = info->width; \ + MAYBE_UNUSED int32_t height = info->height + +typedef void (*pixman_combine_32_func_t) (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width); + +typedef void (*pixman_combine_float_func_t) (pixman_implementation_t *imp, + pixman_op_t op, + float * dest, + const float * src, + const float * mask, + int n_pixels); + +typedef void (*pixman_composite_func_t) (pixman_implementation_t *imp, + pixman_composite_info_t *info); +typedef pixman_bool_t (*pixman_blt_func_t) (pixman_implementation_t *imp, + uint32_t * src_bits, + uint32_t * dst_bits, + int src_stride, + int dst_stride, + int src_bpp, + int dst_bpp, + int src_x, + int src_y, + int dest_x, + int dest_y, + int width, + int height); +typedef pixman_bool_t (*pixman_fill_func_t) (pixman_implementation_t *imp, + uint32_t * bits, + int stride, + int bpp, + int x, + int y, + int width, + int height, + uint32_t filler); + +void _pixman_setup_combiner_functions_32 (pixman_implementation_t *imp); +void _pixman_setup_combiner_functions_float (pixman_implementation_t *imp); + +typedef struct +{ + pixman_op_t op; + pixman_format_code_t src_format; + uint32_t src_flags; + pixman_format_code_t mask_format; + uint32_t mask_flags; + pixman_format_code_t dest_format; + uint32_t dest_flags; + pixman_composite_func_t func; +} pixman_fast_path_t; + +struct pixman_implementation_t +{ + pixman_implementation_t * toplevel; + pixman_implementation_t * fallback; + const pixman_fast_path_t * fast_paths; + const pixman_iter_info_t * iter_info; + + pixman_blt_func_t blt; + pixman_fill_func_t fill; + + pixman_combine_32_func_t combine_32[PIXMAN_N_OPERATORS]; + pixman_combine_32_func_t combine_32_ca[PIXMAN_N_OPERATORS]; + pixman_combine_float_func_t combine_float[PIXMAN_N_OPERATORS]; + pixman_combine_float_func_t combine_float_ca[PIXMAN_N_OPERATORS]; +}; + +uint32_t +_pixman_image_get_solid (pixman_implementation_t *imp, + pixman_image_t * image, + pixman_format_code_t format); + +pixman_implementation_t * +_pixman_implementation_create (pixman_implementation_t *fallback, + const pixman_fast_path_t *fast_paths); + +void +_pixman_implementation_lookup_composite (pixman_implementation_t *toplevel, + pixman_op_t op, + pixman_format_code_t src_format, + uint32_t src_flags, + pixman_format_code_t mask_format, + uint32_t mask_flags, + pixman_format_code_t dest_format, + uint32_t dest_flags, + pixman_implementation_t **out_imp, + pixman_composite_func_t *out_func); + +pixman_combine_32_func_t +_pixman_implementation_lookup_combiner (pixman_implementation_t *imp, + pixman_op_t op, + pixman_bool_t component_alpha, + pixman_bool_t wide); + +pixman_bool_t +_pixman_implementation_blt (pixman_implementation_t *imp, + uint32_t * src_bits, + uint32_t * dst_bits, + int src_stride, + int dst_stride, + int src_bpp, + int dst_bpp, + int src_x, + int src_y, + int dest_x, + int dest_y, + int width, + int height); + +pixman_bool_t +_pixman_implementation_fill (pixman_implementation_t *imp, + uint32_t * bits, + int stride, + int bpp, + int x, + int y, + int width, + int height, + uint32_t filler); + +void +_pixman_implementation_iter_init (pixman_implementation_t *imp, + pixman_iter_t *iter, + pixman_image_t *image, + int x, + int y, + int width, + int height, + uint8_t *buffer, + iter_flags_t flags, + uint32_t image_flags); + +/* Specific implementations */ +pixman_implementation_t * +_pixman_implementation_create_general (void); + +pixman_implementation_t * +_pixman_implementation_create_fast_path (pixman_implementation_t *fallback); + +pixman_implementation_t * +_pixman_implementation_create_noop (pixman_implementation_t *fallback); + +#if defined USE_X86_MMX || defined USE_ARM_IWMMXT || defined USE_LOONGSON_MMI +pixman_implementation_t * +_pixman_implementation_create_mmx (pixman_implementation_t *fallback); +#endif + +#ifdef USE_SSE2 +pixman_implementation_t * +_pixman_implementation_create_sse2 (pixman_implementation_t *fallback); +#endif + +#ifdef USE_SSSE3 +pixman_implementation_t * +_pixman_implementation_create_ssse3 (pixman_implementation_t *fallback); +#endif + +#ifdef USE_ARM_SIMD +pixman_implementation_t * +_pixman_implementation_create_arm_simd (pixman_implementation_t *fallback); +#endif + +#ifdef USE_ARM_NEON +pixman_implementation_t * +_pixman_implementation_create_arm_neon (pixman_implementation_t *fallback); +#endif + +#ifdef USE_MIPS_DSPR2 +pixman_implementation_t * +_pixman_implementation_create_mips_dspr2 (pixman_implementation_t *fallback); +#endif + +#ifdef USE_VMX +pixman_implementation_t * +_pixman_implementation_create_vmx (pixman_implementation_t *fallback); +#endif + +pixman_bool_t +_pixman_implementation_disabled (const char *name); + +pixman_implementation_t * +_pixman_x86_get_implementations (pixman_implementation_t *imp); + +pixman_implementation_t * +_pixman_arm_get_implementations (pixman_implementation_t *imp); + +pixman_implementation_t * +_pixman_ppc_get_implementations (pixman_implementation_t *imp); + +pixman_implementation_t * +_pixman_mips_get_implementations (pixman_implementation_t *imp); + +pixman_implementation_t * +_pixman_choose_implementation (void); + +pixman_bool_t +_pixman_disabled (const char *name); + + +/* + * Utilities + */ +pixman_bool_t +_pixman_compute_composite_region32 (pixman_region32_t * region, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dest_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height); +uint32_t * +_pixman_iter_get_scanline_noop (pixman_iter_t *iter, const uint32_t *mask); + +void +_pixman_iter_init_bits_stride (pixman_iter_t *iter, const pixman_iter_info_t *info); + +/* These "formats" all have depth 0, so they + * will never clash with any real ones + */ +#define PIXMAN_null PIXMAN_FORMAT (0, 0, 0, 0, 0, 0) +#define PIXMAN_solid PIXMAN_FORMAT (0, 1, 0, 0, 0, 0) +#define PIXMAN_pixbuf PIXMAN_FORMAT (0, 2, 0, 0, 0, 0) +#define PIXMAN_rpixbuf PIXMAN_FORMAT (0, 3, 0, 0, 0, 0) +#define PIXMAN_unknown PIXMAN_FORMAT (0, 4, 0, 0, 0, 0) +#define PIXMAN_any PIXMAN_FORMAT (0, 5, 0, 0, 0, 0) + +#define PIXMAN_OP_any (PIXMAN_N_OPERATORS + 1) + +#define FAST_PATH_ID_TRANSFORM (1 << 0) +#define FAST_PATH_NO_ALPHA_MAP (1 << 1) +#define FAST_PATH_NO_CONVOLUTION_FILTER (1 << 2) +#define FAST_PATH_NO_PAD_REPEAT (1 << 3) +#define FAST_PATH_NO_REFLECT_REPEAT (1 << 4) +#define FAST_PATH_NO_ACCESSORS (1 << 5) +#define FAST_PATH_NARROW_FORMAT (1 << 6) +#define FAST_PATH_COMPONENT_ALPHA (1 << 8) +#define FAST_PATH_SAMPLES_OPAQUE (1 << 7) +#define FAST_PATH_UNIFIED_ALPHA (1 << 9) +#define FAST_PATH_SCALE_TRANSFORM (1 << 10) +#define FAST_PATH_NEAREST_FILTER (1 << 11) +#define FAST_PATH_HAS_TRANSFORM (1 << 12) +#define FAST_PATH_IS_OPAQUE (1 << 13) +#define FAST_PATH_NO_NORMAL_REPEAT (1 << 14) +#define FAST_PATH_NO_NONE_REPEAT (1 << 15) +#define FAST_PATH_X_UNIT_POSITIVE (1 << 16) +#define FAST_PATH_AFFINE_TRANSFORM (1 << 17) +#define FAST_PATH_Y_UNIT_ZERO (1 << 18) +#define FAST_PATH_BILINEAR_FILTER (1 << 19) +#define FAST_PATH_ROTATE_90_TRANSFORM (1 << 20) +#define FAST_PATH_ROTATE_180_TRANSFORM (1 << 21) +#define FAST_PATH_ROTATE_270_TRANSFORM (1 << 22) +#define FAST_PATH_SAMPLES_COVER_CLIP_NEAREST (1 << 23) +#define FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR (1 << 24) +#define FAST_PATH_BITS_IMAGE (1 << 25) +#define FAST_PATH_SEPARABLE_CONVOLUTION_FILTER (1 << 26) + +#define FAST_PATH_PAD_REPEAT \ + (FAST_PATH_NO_NONE_REPEAT | \ + FAST_PATH_NO_NORMAL_REPEAT | \ + FAST_PATH_NO_REFLECT_REPEAT) + +#define FAST_PATH_NORMAL_REPEAT \ + (FAST_PATH_NO_NONE_REPEAT | \ + FAST_PATH_NO_PAD_REPEAT | \ + FAST_PATH_NO_REFLECT_REPEAT) + +#define FAST_PATH_NONE_REPEAT \ + (FAST_PATH_NO_NORMAL_REPEAT | \ + FAST_PATH_NO_PAD_REPEAT | \ + FAST_PATH_NO_REFLECT_REPEAT) + +#define FAST_PATH_REFLECT_REPEAT \ + (FAST_PATH_NO_NONE_REPEAT | \ + FAST_PATH_NO_NORMAL_REPEAT | \ + FAST_PATH_NO_PAD_REPEAT) + +#define FAST_PATH_STANDARD_FLAGS \ + (FAST_PATH_NO_CONVOLUTION_FILTER | \ + FAST_PATH_NO_ACCESSORS | \ + FAST_PATH_NO_ALPHA_MAP | \ + FAST_PATH_NARROW_FORMAT) + +#define FAST_PATH_STD_DEST_FLAGS \ + (FAST_PATH_NO_ACCESSORS | \ + FAST_PATH_NO_ALPHA_MAP | \ + FAST_PATH_NARROW_FORMAT) + +#define SOURCE_FLAGS(format) \ + (FAST_PATH_STANDARD_FLAGS | \ + ((PIXMAN_ ## format == PIXMAN_solid) ? \ + 0 : (FAST_PATH_SAMPLES_COVER_CLIP_NEAREST | FAST_PATH_NEAREST_FILTER | FAST_PATH_ID_TRANSFORM))) + +#define MASK_FLAGS(format, extra) \ + ((PIXMAN_ ## format == PIXMAN_null) ? 0 : (SOURCE_FLAGS (format) | extra)) + +#define FAST_PATH(op, src, src_flags, mask, mask_flags, dest, dest_flags, func) \ + PIXMAN_OP_ ## op, \ + PIXMAN_ ## src, \ + src_flags, \ + PIXMAN_ ## mask, \ + mask_flags, \ + PIXMAN_ ## dest, \ + dest_flags, \ + func + +#define PIXMAN_STD_FAST_PATH(op, src, mask, dest, func) \ + { FAST_PATH ( \ + op, \ + src, SOURCE_FLAGS (src), \ + mask, MASK_FLAGS (mask, FAST_PATH_UNIFIED_ALPHA), \ + dest, FAST_PATH_STD_DEST_FLAGS, \ + func) } + +#define PIXMAN_STD_FAST_PATH_CA(op, src, mask, dest, func) \ + { FAST_PATH ( \ + op, \ + src, SOURCE_FLAGS (src), \ + mask, MASK_FLAGS (mask, FAST_PATH_COMPONENT_ALPHA), \ + dest, FAST_PATH_STD_DEST_FLAGS, \ + func) } + +extern pixman_implementation_t *global_implementation; + +static force_inline pixman_implementation_t * +get_implementation (void) +{ +#ifndef TOOLCHAIN_SUPPORTS_ATTRIBUTE_CONSTRUCTOR + if (!global_implementation) + global_implementation = _pixman_choose_implementation (); +#endif + return global_implementation; +} + +/* This function is exported for the sake of the test suite and not part + * of the ABI. + */ +PIXMAN_EXPORT pixman_implementation_t * +_pixman_internal_only_get_implementation (void); + +/* Memory allocation helpers */ +void * +pixman_malloc_ab (unsigned int n, unsigned int b); + +void * +pixman_malloc_abc (unsigned int a, unsigned int b, unsigned int c); + +void * +pixman_malloc_ab_plus_c (unsigned int a, unsigned int b, unsigned int c); + +pixman_bool_t +_pixman_multiply_overflows_size (size_t a, size_t b); + +pixman_bool_t +_pixman_multiply_overflows_int (unsigned int a, unsigned int b); + +pixman_bool_t +_pixman_addition_overflows_int (unsigned int a, unsigned int b); + +/* Compositing utilities */ +void +pixman_expand_to_float (argb_t *dst, + const uint32_t *src, + pixman_format_code_t format, + int width); + +void +pixman_contract_from_float (uint32_t *dst, + const argb_t *src, + int width); + +/* Region Helpers */ +pixman_bool_t +pixman_region32_copy_from_region16 (pixman_region32_t *dst, + pixman_region16_t *src); + +pixman_bool_t +pixman_region16_copy_from_region32 (pixman_region16_t *dst, + pixman_region32_t *src); + +/* Doubly linked lists */ +typedef struct pixman_link_t pixman_link_t; +struct pixman_link_t +{ + pixman_link_t *next; + pixman_link_t *prev; +}; + +typedef struct pixman_list_t pixman_list_t; +struct pixman_list_t +{ + pixman_link_t *head; + pixman_link_t *tail; +}; + +static force_inline void +pixman_list_init (pixman_list_t *list) +{ + list->head = (pixman_link_t *)list; + list->tail = (pixman_link_t *)list; +} + +static force_inline void +pixman_list_prepend (pixman_list_t *list, pixman_link_t *link) +{ + link->next = list->head; + link->prev = (pixman_link_t *)list; + list->head->prev = link; + list->head = link; +} + +static force_inline void +pixman_list_unlink (pixman_link_t *link) +{ + link->prev->next = link->next; + link->next->prev = link->prev; +} + +static force_inline void +pixman_list_move_to_front (pixman_list_t *list, pixman_link_t *link) +{ + pixman_list_unlink (link); + pixman_list_prepend (list, link); +} + +/* Misc macros */ + +#ifndef FALSE +# define FALSE 0 +#endif + +#ifndef TRUE +# define TRUE 1 +#endif + +#ifndef MIN +# define MIN(a, b) ((a < b) ? a : b) +#endif + +#ifndef MAX +# define MAX(a, b) ((a > b) ? a : b) +#endif + +/* Integer division that rounds towards -infinity */ +#define DIV(a, b) \ + ((((a) < 0) == ((b) < 0)) ? (a) / (b) : \ + ((a) - (b) + 1 - (((b) < 0) << 1)) / (b)) + +/* Modulus that produces the remainder wrt. DIV */ +#define MOD(a, b) ((a) < 0 ? ((b) - ((-(a) - 1) % (b))) - 1 : (a) % (b)) + +#define CLIP(v, low, high) ((v) < (low) ? (low) : ((v) > (high) ? (high) : (v))) + +#define FLOAT_IS_ZERO(f) (-FLT_MIN < (f) && (f) < FLT_MIN) + +/* Conversion between 8888 and 0565 */ + +static force_inline uint16_t +convert_8888_to_0565 (uint32_t s) +{ + /* The following code can be compiled into just 4 instructions on ARM */ + uint32_t a, b; + a = (s >> 3) & 0x1F001F; + b = s & 0xFC00; + a |= a >> 5; + a |= b >> 5; + return (uint16_t)a; +} + +static force_inline uint32_t +convert_0565_to_0888 (uint16_t s) +{ + return (((((s) << 3) & 0xf8) | (((s) >> 2) & 0x7)) | + ((((s) << 5) & 0xfc00) | (((s) >> 1) & 0x300)) | + ((((s) << 8) & 0xf80000) | (((s) << 3) & 0x70000))); +} + +static force_inline uint32_t +convert_0565_to_8888 (uint16_t s) +{ + return convert_0565_to_0888 (s) | 0xff000000; +} + +/* Trivial versions that are useful in macros */ + +static force_inline uint32_t +convert_8888_to_8888 (uint32_t s) +{ + return s; +} + +static force_inline uint32_t +convert_x888_to_8888 (uint32_t s) +{ + return s | 0xff000000; +} + +static force_inline uint16_t +convert_0565_to_0565 (uint16_t s) +{ + return s; +} + +#define PIXMAN_FORMAT_IS_WIDE(f) \ + (PIXMAN_FORMAT_A (f) > 8 || \ + PIXMAN_FORMAT_R (f) > 8 || \ + PIXMAN_FORMAT_G (f) > 8 || \ + PIXMAN_FORMAT_B (f) > 8 || \ + PIXMAN_FORMAT_TYPE (f) == PIXMAN_TYPE_ARGB_SRGB) + +#ifdef WORDS_BIGENDIAN +# define SCREEN_SHIFT_LEFT(x,n) ((x) << (n)) +# define SCREEN_SHIFT_RIGHT(x,n) ((x) >> (n)) +#else +# define SCREEN_SHIFT_LEFT(x,n) ((x) >> (n)) +# define SCREEN_SHIFT_RIGHT(x,n) ((x) << (n)) +#endif + +static force_inline uint32_t +unorm_to_unorm (uint32_t val, int from_bits, int to_bits) +{ + uint32_t result; + + if (from_bits == 0) + return 0; + + /* Delete any extra bits */ + val &= ((1 << from_bits) - 1); + + if (from_bits >= to_bits) + return val >> (from_bits - to_bits); + + /* Start out with the high bit of val in the high bit of result. */ + result = val << (to_bits - from_bits); + + /* Copy the bits in result, doubling the number of bits each time, until + * we fill all to_bits. Unrolled manually because from_bits and to_bits + * are usually known statically, so the compiler can turn all of this + * into a few shifts. + */ +#define REPLICATE() \ + do \ + { \ + if (from_bits < to_bits) \ + { \ + result |= result >> from_bits; \ + \ + from_bits *= 2; \ + } \ + } \ + while (0) + + REPLICATE(); + REPLICATE(); + REPLICATE(); + REPLICATE(); + REPLICATE(); + + return result; +} + +uint16_t pixman_float_to_unorm (float f, int n_bits); +float pixman_unorm_to_float (uint16_t u, int n_bits); + +/* + * Various debugging code + */ + +#undef DEBUG + +#define COMPILE_TIME_ASSERT(x) \ + do { typedef int compile_time_assertion [(x)?1:-1]; } while (0) + +/* Turn on debugging depending on what type of release this is + */ +#if (((PIXMAN_VERSION_MICRO % 2) == 0) && ((PIXMAN_VERSION_MINOR % 2) == 1)) + +/* Debugging gets turned on for development releases because these + * are the things that end up in bleeding edge distributions such + * as Rawhide etc. + * + * For performance reasons we don't turn it on for stable releases or + * random git checkouts. (Random git checkouts are often used for + * performance work). + */ + +# define DEBUG + +#endif + +void +_pixman_log_error (const char *function, const char *message); + +#define return_if_fail(expr) \ + do \ + { \ + if (unlikely (!(expr))) \ + { \ + _pixman_log_error (FUNC, "The expression " # expr " was false"); \ + return; \ + } \ + } \ + while (0) + +#define return_val_if_fail(expr, retval) \ + do \ + { \ + if (unlikely (!(expr))) \ + { \ + _pixman_log_error (FUNC, "The expression " # expr " was false"); \ + return (retval); \ + } \ + } \ + while (0) + +#define critical_if_fail(expr) \ + do \ + { \ + if (unlikely (!(expr))) \ + _pixman_log_error (FUNC, "The expression " # expr " was false"); \ + } \ + while (0) + +/* + * Matrix + */ + +typedef struct { pixman_fixed_48_16_t v[3]; } pixman_vector_48_16_t; + +pixman_bool_t +pixman_transform_point_31_16 (const pixman_transform_t *t, + const pixman_vector_48_16_t *v, + pixman_vector_48_16_t *result); + +void +pixman_transform_point_31_16_3d (const pixman_transform_t *t, + const pixman_vector_48_16_t *v, + pixman_vector_48_16_t *result); + +void +pixman_transform_point_31_16_affine (const pixman_transform_t *t, + const pixman_vector_48_16_t *v, + pixman_vector_48_16_t *result); + +/* + * Timers + */ + +#ifdef PIXMAN_TIMERS + +static inline uint64_t +oil_profile_stamp_rdtsc (void) +{ + uint32_t hi, lo; + + __asm__ __volatile__ ("rdtsc\n" : "=a" (lo), "=d" (hi)); + + return lo | (((uint64_t)hi) << 32); +} + +#define OIL_STAMP oil_profile_stamp_rdtsc + +typedef struct pixman_timer_t pixman_timer_t; + +struct pixman_timer_t +{ + int initialized; + const char * name; + uint64_t n_times; + uint64_t total; + pixman_timer_t *next; +}; + +extern int timer_defined; + +void pixman_timer_register (pixman_timer_t *timer); + +#define TIMER_BEGIN(tname) \ + { \ + static pixman_timer_t timer ## tname; \ + uint64_t begin ## tname; \ + \ + if (!timer ## tname.initialized) \ + { \ + timer ## tname.initialized = 1; \ + timer ## tname.name = # tname; \ + pixman_timer_register (&timer ## tname); \ + } \ + \ + timer ## tname.n_times++; \ + begin ## tname = OIL_STAMP (); + +#define TIMER_END(tname) \ + timer ## tname.total += OIL_STAMP () - begin ## tname; \ + } + +#else + +#define TIMER_BEGIN(tname) +#define TIMER_END(tname) + +#endif /* PIXMAN_TIMERS */ + +#endif /* __ASSEMBLER__ */ + +#endif /* PIXMAN_PRIVATE_H */ diff --git a/src/pixman/pixman/pixman-radial-gradient.c b/src/pixman/pixman/pixman-radial-gradient.c new file mode 100644 index 0000000..6a21796 --- /dev/null +++ b/src/pixman/pixman/pixman-radial-gradient.c @@ -0,0 +1,471 @@ +/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */ +/* + * + * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc. + * Copyright © 2000 SuSE, Inc. + * 2005 Lars Knoll & Zack Rusin, Trolltech + * Copyright © 2007 Red Hat, Inc. + * + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of Keith Packard not be used in + * advertising or publicity pertaining to distribution of the software without + * specific, written prior permission. Keith Packard makes no + * representations about the suitability of this software for any purpose. It + * is provided "as is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif +#include <stdlib.h> +#include <math.h> +#include "pixman-private.h" + +static inline pixman_fixed_32_32_t +dot (pixman_fixed_48_16_t x1, + pixman_fixed_48_16_t y1, + pixman_fixed_48_16_t z1, + pixman_fixed_48_16_t x2, + pixman_fixed_48_16_t y2, + pixman_fixed_48_16_t z2) +{ + /* + * Exact computation, assuming that the input values can + * be represented as pixman_fixed_16_16_t + */ + return x1 * x2 + y1 * y2 + z1 * z2; +} + +static inline double +fdot (double x1, + double y1, + double z1, + double x2, + double y2, + double z2) +{ + /* + * Error can be unbound in some special cases. + * Using clever dot product algorithms (for example compensated + * dot product) would improve this but make the code much less + * obvious + */ + return x1 * x2 + y1 * y2 + z1 * z2; +} + +static uint32_t +radial_compute_color (double a, + double b, + double c, + double inva, + double dr, + double mindr, + pixman_gradient_walker_t *walker, + pixman_repeat_t repeat) +{ + /* + * In this function error propagation can lead to bad results: + * - discr can have an unbound error (if b*b-a*c is very small), + * potentially making it the opposite sign of what it should have been + * (thus clearing a pixel that would have been colored or vice-versa) + * or propagating the error to sqrtdiscr; + * if discr has the wrong sign or b is very small, this can lead to bad + * results + * + * - the algorithm used to compute the solutions of the quadratic + * equation is not numerically stable (but saves one division compared + * to the numerically stable one); + * this can be a problem if a*c is much smaller than b*b + * + * - the above problems are worse if a is small (as inva becomes bigger) + */ + double discr; + + if (a == 0) + { + double t; + + if (b == 0) + return 0; + + t = pixman_fixed_1 / 2 * c / b; + if (repeat == PIXMAN_REPEAT_NONE) + { + if (0 <= t && t <= pixman_fixed_1) + return _pixman_gradient_walker_pixel (walker, t); + } + else + { + if (t * dr >= mindr) + return _pixman_gradient_walker_pixel (walker, t); + } + + return 0; + } + + discr = fdot (b, a, 0, b, -c, 0); + if (discr >= 0) + { + double sqrtdiscr, t0, t1; + + sqrtdiscr = sqrt (discr); + t0 = (b + sqrtdiscr) * inva; + t1 = (b - sqrtdiscr) * inva; + + /* + * The root that must be used is the biggest one that belongs + * to the valid range ([0,1] for PIXMAN_REPEAT_NONE, any + * solution that results in a positive radius otherwise). + * + * If a > 0, t0 is the biggest solution, so if it is valid, it + * is the correct result. + * + * If a < 0, only one of the solutions can be valid, so the + * order in which they are tested is not important. + */ + if (repeat == PIXMAN_REPEAT_NONE) + { + if (0 <= t0 && t0 <= pixman_fixed_1) + return _pixman_gradient_walker_pixel (walker, t0); + else if (0 <= t1 && t1 <= pixman_fixed_1) + return _pixman_gradient_walker_pixel (walker, t1); + } + else + { + if (t0 * dr >= mindr) + return _pixman_gradient_walker_pixel (walker, t0); + else if (t1 * dr >= mindr) + return _pixman_gradient_walker_pixel (walker, t1); + } + } + + return 0; +} + +static uint32_t * +radial_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask) +{ + /* + * Implementation of radial gradients following the PDF specification. + * See section 8.7.4.5.4 Type 3 (Radial) Shadings of the PDF Reference + * Manual (PDF 32000-1:2008 at the time of this writing). + * + * In the radial gradient problem we are given two circles (c₁,r₁) and + * (c₂,r₂) that define the gradient itself. + * + * Mathematically the gradient can be defined as the family of circles + * + * ((1-t)·c₁ + t·(c₂), (1-t)·r₁ + t·r₂) + * + * excluding those circles whose radius would be < 0. When a point + * belongs to more than one circle, the one with a bigger t is the only + * one that contributes to its color. When a point does not belong + * to any of the circles, it is transparent black, i.e. RGBA (0, 0, 0, 0). + * Further limitations on the range of values for t are imposed when + * the gradient is not repeated, namely t must belong to [0,1]. + * + * The graphical result is the same as drawing the valid (radius > 0) + * circles with increasing t in [-inf, +inf] (or in [0,1] if the gradient + * is not repeated) using SOURCE operator composition. + * + * It looks like a cone pointing towards the viewer if the ending circle + * is smaller than the starting one, a cone pointing inside the page if + * the starting circle is the smaller one and like a cylinder if they + * have the same radius. + * + * What we actually do is, given the point whose color we are interested + * in, compute the t values for that point, solving for t in: + * + * length((1-t)·c₁ + t·(c₂) - p) = (1-t)·r₁ + t·r₂ + * + * Let's rewrite it in a simpler way, by defining some auxiliary + * variables: + * + * cd = c₂ - c₁ + * pd = p - c₁ + * dr = r₂ - r₁ + * length(t·cd - pd) = r₁ + t·dr + * + * which actually means + * + * hypot(t·cdx - pdx, t·cdy - pdy) = r₁ + t·dr + * + * or + * + * ⎷((t·cdx - pdx)² + (t·cdy - pdy)²) = r₁ + t·dr. + * + * If we impose (as stated earlier) that r₁ + t·dr >= 0, it becomes: + * + * (t·cdx - pdx)² + (t·cdy - pdy)² = (r₁ + t·dr)² + * + * where we can actually expand the squares and solve for t: + * + * t²cdx² - 2t·cdx·pdx + pdx² + t²cdy² - 2t·cdy·pdy + pdy² = + * = r₁² + 2·r₁·t·dr + t²·dr² + * + * (cdx² + cdy² - dr²)t² - 2(cdx·pdx + cdy·pdy + r₁·dr)t + + * (pdx² + pdy² - r₁²) = 0 + * + * A = cdx² + cdy² - dr² + * B = pdx·cdx + pdy·cdy + r₁·dr + * C = pdx² + pdy² - r₁² + * At² - 2Bt + C = 0 + * + * The solutions (unless the equation degenerates because of A = 0) are: + * + * t = (B ± ⎷(B² - A·C)) / A + * + * The solution we are going to prefer is the bigger one, unless the + * radius associated to it is negative (or it falls outside the valid t + * range). + * + * Additional observations (useful for optimizations): + * A does not depend on p + * + * A < 0 <=> one of the two circles completely contains the other one + * <=> for every p, the radiuses associated with the two t solutions + * have opposite sign + */ + pixman_image_t *image = iter->image; + int x = iter->x; + int y = iter->y; + int width = iter->width; + uint32_t *buffer = iter->buffer; + + gradient_t *gradient = (gradient_t *)image; + radial_gradient_t *radial = (radial_gradient_t *)image; + uint32_t *end = buffer + width; + pixman_gradient_walker_t walker; + pixman_vector_t v, unit; + + /* reference point is the center of the pixel */ + v.vector[0] = pixman_int_to_fixed (x) + pixman_fixed_1 / 2; + v.vector[1] = pixman_int_to_fixed (y) + pixman_fixed_1 / 2; + v.vector[2] = pixman_fixed_1; + + _pixman_gradient_walker_init (&walker, gradient, image->common.repeat); + + if (image->common.transform) + { + if (!pixman_transform_point_3d (image->common.transform, &v)) + return iter->buffer; + + unit.vector[0] = image->common.transform->matrix[0][0]; + unit.vector[1] = image->common.transform->matrix[1][0]; + unit.vector[2] = image->common.transform->matrix[2][0]; + } + else + { + unit.vector[0] = pixman_fixed_1; + unit.vector[1] = 0; + unit.vector[2] = 0; + } + + if (unit.vector[2] == 0 && v.vector[2] == pixman_fixed_1) + { + /* + * Given: + * + * t = (B ± ⎷(B² - A·C)) / A + * + * where + * + * A = cdx² + cdy² - dr² + * B = pdx·cdx + pdy·cdy + r₁·dr + * C = pdx² + pdy² - r₁² + * det = B² - A·C + * + * Since we have an affine transformation, we know that (pdx, pdy) + * increase linearly with each pixel, + * + * pdx = pdx₀ + n·ux, + * pdy = pdy₀ + n·uy, + * + * we can then express B, C and det through multiple differentiation. + */ + pixman_fixed_32_32_t b, db, c, dc, ddc; + + /* warning: this computation may overflow */ + v.vector[0] -= radial->c1.x; + v.vector[1] -= radial->c1.y; + + /* + * B and C are computed and updated exactly. + * If fdot was used instead of dot, in the worst case it would + * lose 11 bits of precision in each of the multiplication and + * summing up would zero out all the bit that were preserved, + * thus making the result 0 instead of the correct one. + * This would mean a worst case of unbound relative error or + * about 2^10 absolute error + */ + b = dot (v.vector[0], v.vector[1], radial->c1.radius, + radial->delta.x, radial->delta.y, radial->delta.radius); + db = dot (unit.vector[0], unit.vector[1], 0, + radial->delta.x, radial->delta.y, 0); + + c = dot (v.vector[0], v.vector[1], + -((pixman_fixed_48_16_t) radial->c1.radius), + v.vector[0], v.vector[1], radial->c1.radius); + dc = dot (2 * (pixman_fixed_48_16_t) v.vector[0] + unit.vector[0], + 2 * (pixman_fixed_48_16_t) v.vector[1] + unit.vector[1], + 0, + unit.vector[0], unit.vector[1], 0); + ddc = 2 * dot (unit.vector[0], unit.vector[1], 0, + unit.vector[0], unit.vector[1], 0); + + while (buffer < end) + { + if (!mask || *mask++) + { + *buffer = radial_compute_color (radial->a, b, c, + radial->inva, + radial->delta.radius, + radial->mindr, + &walker, + image->common.repeat); + } + + b += db; + c += dc; + dc += ddc; + ++buffer; + } + } + else + { + /* projective */ + /* Warning: + * error propagation guarantees are much looser than in the affine case + */ + while (buffer < end) + { + if (!mask || *mask++) + { + if (v.vector[2] != 0) + { + double pdx, pdy, invv2, b, c; + + invv2 = 1. * pixman_fixed_1 / v.vector[2]; + + pdx = v.vector[0] * invv2 - radial->c1.x; + /* / pixman_fixed_1 */ + + pdy = v.vector[1] * invv2 - radial->c1.y; + /* / pixman_fixed_1 */ + + b = fdot (pdx, pdy, radial->c1.radius, + radial->delta.x, radial->delta.y, + radial->delta.radius); + /* / pixman_fixed_1 / pixman_fixed_1 */ + + c = fdot (pdx, pdy, -radial->c1.radius, + pdx, pdy, radial->c1.radius); + /* / pixman_fixed_1 / pixman_fixed_1 */ + + *buffer = radial_compute_color (radial->a, b, c, + radial->inva, + radial->delta.radius, + radial->mindr, + &walker, + image->common.repeat); + } + else + { + *buffer = 0; + } + } + + ++buffer; + + v.vector[0] += unit.vector[0]; + v.vector[1] += unit.vector[1]; + v.vector[2] += unit.vector[2]; + } + } + + iter->y++; + return iter->buffer; +} + +static uint32_t * +radial_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask) +{ + uint32_t *buffer = radial_get_scanline_narrow (iter, NULL); + + pixman_expand_to_float ( + (argb_t *)buffer, buffer, PIXMAN_a8r8g8b8, iter->width); + + return buffer; +} + +void +_pixman_radial_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter) +{ + if (iter->iter_flags & ITER_NARROW) + iter->get_scanline = radial_get_scanline_narrow; + else + iter->get_scanline = radial_get_scanline_wide; +} + +PIXMAN_EXPORT pixman_image_t * +pixman_image_create_radial_gradient (const pixman_point_fixed_t * inner, + const pixman_point_fixed_t * outer, + pixman_fixed_t inner_radius, + pixman_fixed_t outer_radius, + const pixman_gradient_stop_t *stops, + int n_stops) +{ + pixman_image_t *image; + radial_gradient_t *radial; + + image = _pixman_image_allocate (); + + if (!image) + return NULL; + + radial = &image->radial; + + if (!_pixman_init_gradient (&radial->common, stops, n_stops)) + { + free (image); + return NULL; + } + + image->type = RADIAL; + + radial->c1.x = inner->x; + radial->c1.y = inner->y; + radial->c1.radius = inner_radius; + radial->c2.x = outer->x; + radial->c2.y = outer->y; + radial->c2.radius = outer_radius; + + /* warning: this computations may overflow */ + radial->delta.x = radial->c2.x - radial->c1.x; + radial->delta.y = radial->c2.y - radial->c1.y; + radial->delta.radius = radial->c2.radius - radial->c1.radius; + + /* computed exactly, then cast to double -> every bit of the double + representation is correct (53 bits) */ + radial->a = dot (radial->delta.x, radial->delta.y, -radial->delta.radius, + radial->delta.x, radial->delta.y, radial->delta.radius); + if (radial->a != 0) + radial->inva = 1. * pixman_fixed_1 / radial->a; + + radial->mindr = -1. * pixman_fixed_1 * radial->c1.radius; + + return image; +} diff --git a/src/pixman/pixman/pixman-region.c b/src/pixman/pixman/pixman-region.c new file mode 100644 index 0000000..59bc9c7 --- /dev/null +++ b/src/pixman/pixman/pixman-region.c @@ -0,0 +1,2792 @@ +/* + * Copyright 1987, 1988, 1989, 1998 The Open Group + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation. + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * OPEN GROUP BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Except as contained in this notice, the name of The Open Group shall not be + * used in advertising or otherwise to promote the sale, use or other dealings + * in this Software without prior written authorization from The Open Group. + * + * Copyright 1987, 1988, 1989 by + * Digital Equipment Corporation, Maynard, Massachusetts. + * + * All Rights Reserved + * + * Permission to use, copy, modify, and distribute this software and its + * documentation for any purpose and without fee is hereby granted, + * provided that the above copyright notice appear in all copies and that + * both that copyright notice and this permission notice appear in + * supporting documentation, and that the name of Digital not be + * used in advertising or publicity pertaining to distribution of the + * software without specific, written prior permission. + * + * DIGITAL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING + * ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL + * DIGITAL BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR + * ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, + * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, + * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + * + * Copyright © 1998 Keith Packard + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of Keith Packard not be used in + * advertising or publicity pertaining to distribution of the software without + * specific, written prior permission. Keith Packard makes no + * representations about the suitability of this software for any purpose. It + * is provided "as is" without express or implied warranty. + * + * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * PERFORMANCE OF THIS SOFTWARE. + */ + +#include <stdlib.h> +#include <limits.h> +#include <string.h> +#include <stdio.h> +#include "pixman-private.h" + +#define PIXREGION_NIL(reg) ((reg)->data && !(reg)->data->numRects) +/* not a region */ +#define PIXREGION_NAR(reg) ((reg)->data == pixman_broken_data) +#define PIXREGION_NUMRECTS(reg) ((reg)->data ? (reg)->data->numRects : 1) +#define PIXREGION_SIZE(reg) ((reg)->data ? (reg)->data->size : 0) +#define PIXREGION_RECTS(reg) \ + ((reg)->data ? (box_type_t *)((reg)->data + 1) \ + : &(reg)->extents) +#define PIXREGION_BOXPTR(reg) ((box_type_t *)((reg)->data + 1)) +#define PIXREGION_BOX(reg, i) (&PIXREGION_BOXPTR (reg)[i]) +#define PIXREGION_TOP(reg) PIXREGION_BOX (reg, (reg)->data->numRects) +#define PIXREGION_END(reg) PIXREGION_BOX (reg, (reg)->data->numRects - 1) + +#define GOOD_RECT(rect) ((rect)->x1 < (rect)->x2 && (rect)->y1 < (rect)->y2) +#define BAD_RECT(rect) ((rect)->x1 > (rect)->x2 || (rect)->y1 > (rect)->y2) + +#ifdef DEBUG + +#define GOOD(reg) \ + do \ + { \ + if (!PREFIX (_selfcheck (reg))) \ + _pixman_log_error (FUNC, "Malformed region " # reg); \ + } while (0) + +#else + +#define GOOD(reg) + +#endif + +static const box_type_t PREFIX (_empty_box_) = { 0, 0, 0, 0 }; +static const region_data_type_t PREFIX (_empty_data_) = { 0, 0 }; +#if defined (__llvm__) && !defined (__clang__) +static const volatile region_data_type_t PREFIX (_broken_data_) = { 0, 0 }; +#else +static const region_data_type_t PREFIX (_broken_data_) = { 0, 0 }; +#endif + +static box_type_t *pixman_region_empty_box = + (box_type_t *)&PREFIX (_empty_box_); +static region_data_type_t *pixman_region_empty_data = + (region_data_type_t *)&PREFIX (_empty_data_); +static region_data_type_t *pixman_broken_data = + (region_data_type_t *)&PREFIX (_broken_data_); + +static pixman_bool_t +pixman_break (region_type_t *region); + +/* + * The functions in this file implement the Region abstraction used extensively + * throughout the X11 sample server. A Region is simply a set of disjoint + * (non-overlapping) rectangles, plus an "extent" rectangle which is the + * smallest single rectangle that contains all the non-overlapping rectangles. + * + * A Region is implemented as a "y-x-banded" array of rectangles. This array + * imposes two degrees of order. First, all rectangles are sorted by top side + * y coordinate first (y1), and then by left side x coordinate (x1). + * + * Furthermore, the rectangles are grouped into "bands". Each rectangle in a + * band has the same top y coordinate (y1), and each has the same bottom y + * coordinate (y2). Thus all rectangles in a band differ only in their left + * and right side (x1 and x2). Bands are implicit in the array of rectangles: + * there is no separate list of band start pointers. + * + * The y-x band representation does not minimize rectangles. In particular, + * if a rectangle vertically crosses a band (the rectangle has scanlines in + * the y1 to y2 area spanned by the band), then the rectangle may be broken + * down into two or more smaller rectangles stacked one atop the other. + * + * ----------- ----------- + * | | | | band 0 + * | | -------- ----------- -------- + * | | | | in y-x banded | | | | band 1 + * | | | | form is | | | | + * ----------- | | ----------- -------- + * | | | | band 2 + * -------- -------- + * + * An added constraint on the rectangles is that they must cover as much + * horizontal area as possible: no two rectangles within a band are allowed + * to touch. + * + * Whenever possible, bands will be merged together to cover a greater vertical + * distance (and thus reduce the number of rectangles). Two bands can be merged + * only if the bottom of one touches the top of the other and they have + * rectangles in the same places (of the same width, of course). + * + * Adam de Boor wrote most of the original region code. Joel McCormack + * substantially modified or rewrote most of the core arithmetic routines, and + * added pixman_region_validate in order to support several speed improvements + * to pixman_region_validate_tree. Bob Scheifler changed the representation + * to be more compact when empty or a single rectangle, and did a bunch of + * gratuitous reformatting. Carl Worth did further gratuitous reformatting + * while re-merging the server and client region code into libpixregion. + * Soren Sandmann did even more gratuitous reformatting. + */ + +/* true iff two Boxes overlap */ +#define EXTENTCHECK(r1, r2) \ + (!( ((r1)->x2 <= (r2)->x1) || \ + ((r1)->x1 >= (r2)->x2) || \ + ((r1)->y2 <= (r2)->y1) || \ + ((r1)->y1 >= (r2)->y2) ) ) + +/* true iff (x,y) is in Box */ +#define INBOX(r, x, y) \ + ( ((r)->x2 > x) && \ + ((r)->x1 <= x) && \ + ((r)->y2 > y) && \ + ((r)->y1 <= y) ) + +/* true iff Box r1 contains Box r2 */ +#define SUBSUMES(r1, r2) \ + ( ((r1)->x1 <= (r2)->x1) && \ + ((r1)->x2 >= (r2)->x2) && \ + ((r1)->y1 <= (r2)->y1) && \ + ((r1)->y2 >= (r2)->y2) ) + +static size_t +PIXREGION_SZOF (size_t n) +{ + size_t size = n * sizeof(box_type_t); + + if (n > UINT32_MAX / sizeof(box_type_t)) + return 0; + + if (sizeof(region_data_type_t) > UINT32_MAX - size) + return 0; + + return size + sizeof(region_data_type_t); +} + +static region_data_type_t * +alloc_data (size_t n) +{ + size_t sz = PIXREGION_SZOF (n); + + if (!sz) + return NULL; + + return malloc (sz); +} + +#define FREE_DATA(reg) if ((reg)->data && (reg)->data->size) free ((reg)->data) + +#define RECTALLOC_BAIL(region, n, bail) \ + do \ + { \ + if (!(region)->data || \ + (((region)->data->numRects + (n)) > (region)->data->size)) \ + { \ + if (!pixman_rect_alloc (region, n)) \ + goto bail; \ + } \ + } while (0) + +#define RECTALLOC(region, n) \ + do \ + { \ + if (!(region)->data || \ + (((region)->data->numRects + (n)) > (region)->data->size)) \ + { \ + if (!pixman_rect_alloc (region, n)) { \ + return FALSE; \ + } \ + } \ + } while (0) + +#define ADDRECT(next_rect, nx1, ny1, nx2, ny2) \ + do \ + { \ + next_rect->x1 = nx1; \ + next_rect->y1 = ny1; \ + next_rect->x2 = nx2; \ + next_rect->y2 = ny2; \ + next_rect++; \ + } \ + while (0) + +#define NEWRECT(region, next_rect, nx1, ny1, nx2, ny2) \ + do \ + { \ + if (!(region)->data || \ + ((region)->data->numRects == (region)->data->size)) \ + { \ + if (!pixman_rect_alloc (region, 1)) \ + return FALSE; \ + next_rect = PIXREGION_TOP (region); \ + } \ + ADDRECT (next_rect, nx1, ny1, nx2, ny2); \ + region->data->numRects++; \ + critical_if_fail (region->data->numRects <= region->data->size); \ + } while (0) + +#define DOWNSIZE(reg, numRects) \ + do \ + { \ + if (((numRects) < ((reg)->data->size >> 1)) && \ + ((reg)->data->size > 50)) \ + { \ + region_data_type_t * new_data; \ + size_t data_size = PIXREGION_SZOF (numRects); \ + \ + if (!data_size) \ + { \ + new_data = NULL; \ + } \ + else \ + { \ + new_data = (region_data_type_t *) \ + realloc ((reg)->data, data_size); \ + } \ + \ + if (new_data) \ + { \ + new_data->size = (numRects); \ + (reg)->data = new_data; \ + } \ + } \ + } while (0) + +PIXMAN_EXPORT pixman_bool_t +PREFIX (_equal) (region_type_t *reg1, region_type_t *reg2) +{ + int i; + box_type_t *rects1; + box_type_t *rects2; + + if (reg1->extents.x1 != reg2->extents.x1) + return FALSE; + + if (reg1->extents.x2 != reg2->extents.x2) + return FALSE; + + if (reg1->extents.y1 != reg2->extents.y1) + return FALSE; + + if (reg1->extents.y2 != reg2->extents.y2) + return FALSE; + + if (PIXREGION_NUMRECTS (reg1) != PIXREGION_NUMRECTS (reg2)) + return FALSE; + + rects1 = PIXREGION_RECTS (reg1); + rects2 = PIXREGION_RECTS (reg2); + + for (i = 0; i != PIXREGION_NUMRECTS (reg1); i++) + { + if (rects1[i].x1 != rects2[i].x1) + return FALSE; + + if (rects1[i].x2 != rects2[i].x2) + return FALSE; + + if (rects1[i].y1 != rects2[i].y1) + return FALSE; + + if (rects1[i].y2 != rects2[i].y2) + return FALSE; + } + + return TRUE; +} + +int +PREFIX (_print) (region_type_t *rgn) +{ + int num, size; + int i; + box_type_t * rects; + + num = PIXREGION_NUMRECTS (rgn); + size = PIXREGION_SIZE (rgn); + rects = PIXREGION_RECTS (rgn); + + fprintf (stderr, "num: %d size: %d\n", num, size); + fprintf (stderr, "extents: %d %d %d %d\n", + rgn->extents.x1, + rgn->extents.y1, + rgn->extents.x2, + rgn->extents.y2); + + for (i = 0; i < num; i++) + { + fprintf (stderr, "%d %d %d %d \n", + rects[i].x1, rects[i].y1, rects[i].x2, rects[i].y2); + } + + fprintf (stderr, "\n"); + + return(num); +} + + +PIXMAN_EXPORT void +PREFIX (_init) (region_type_t *region) +{ + region->extents = *pixman_region_empty_box; + region->data = pixman_region_empty_data; +} + +PIXMAN_EXPORT void +PREFIX (_init_rect) (region_type_t * region, + int x, + int y, + unsigned int width, + unsigned int height) +{ + region->extents.x1 = x; + region->extents.y1 = y; + region->extents.x2 = x + width; + region->extents.y2 = y + height; + + if (!GOOD_RECT (®ion->extents)) + { + if (BAD_RECT (®ion->extents)) + _pixman_log_error (FUNC, "Invalid rectangle passed"); + PREFIX (_init) (region); + return; + } + + region->data = NULL; +} + +PIXMAN_EXPORT void +PREFIX (_init_with_extents) (region_type_t *region, box_type_t *extents) +{ + if (!GOOD_RECT (extents)) + { + if (BAD_RECT (extents)) + _pixman_log_error (FUNC, "Invalid rectangle passed"); + PREFIX (_init) (region); + return; + } + region->extents = *extents; + + region->data = NULL; +} + +PIXMAN_EXPORT void +PREFIX (_fini) (region_type_t *region) +{ + GOOD (region); + FREE_DATA (region); +} + +PIXMAN_EXPORT int +PREFIX (_n_rects) (region_type_t *region) +{ + return PIXREGION_NUMRECTS (region); +} + +PIXMAN_EXPORT box_type_t * +PREFIX (_rectangles) (region_type_t *region, + int *n_rects) +{ + if (n_rects) + *n_rects = PIXREGION_NUMRECTS (region); + + return PIXREGION_RECTS (region); +} + +static pixman_bool_t +pixman_break (region_type_t *region) +{ + FREE_DATA (region); + + region->extents = *pixman_region_empty_box; + region->data = pixman_broken_data; + + return FALSE; +} + +static pixman_bool_t +pixman_rect_alloc (region_type_t * region, + int n) +{ + region_data_type_t *data; + + if (!region->data) + { + n++; + region->data = alloc_data (n); + + if (!region->data) + return pixman_break (region); + + region->data->numRects = 1; + *PIXREGION_BOXPTR (region) = region->extents; + } + else if (!region->data->size) + { + region->data = alloc_data (n); + + if (!region->data) + return pixman_break (region); + + region->data->numRects = 0; + } + else + { + size_t data_size; + + if (n == 1) + { + n = region->data->numRects; + if (n > 500) /* XXX pick numbers out of a hat */ + n = 250; + } + + n += region->data->numRects; + data_size = PIXREGION_SZOF (n); + + if (!data_size) + { + data = NULL; + } + else + { + data = (region_data_type_t *) + realloc (region->data, PIXREGION_SZOF (n)); + } + + if (!data) + return pixman_break (region); + + region->data = data; + } + + region->data->size = n; + + return TRUE; +} + +PIXMAN_EXPORT pixman_bool_t +PREFIX (_copy) (region_type_t *dst, region_type_t *src) +{ + GOOD (dst); + GOOD (src); + + if (dst == src) + return TRUE; + + dst->extents = src->extents; + + if (!src->data || !src->data->size) + { + FREE_DATA (dst); + dst->data = src->data; + return TRUE; + } + + if (!dst->data || (dst->data->size < src->data->numRects)) + { + FREE_DATA (dst); + + dst->data = alloc_data (src->data->numRects); + + if (!dst->data) + return pixman_break (dst); + + dst->data->size = src->data->numRects; + } + + dst->data->numRects = src->data->numRects; + + memmove ((char *)PIXREGION_BOXPTR (dst), (char *)PIXREGION_BOXPTR (src), + dst->data->numRects * sizeof(box_type_t)); + + return TRUE; +} + +/*====================================================================== + * Generic Region Operator + *====================================================================*/ + +/*- + *----------------------------------------------------------------------- + * pixman_coalesce -- + * Attempt to merge the boxes in the current band with those in the + * previous one. We are guaranteed that the current band extends to + * the end of the rects array. Used only by pixman_op. + * + * Results: + * The new index for the previous band. + * + * Side Effects: + * If coalescing takes place: + * - rectangles in the previous band will have their y2 fields + * altered. + * - region->data->numRects will be decreased. + * + *----------------------------------------------------------------------- + */ +static inline int +pixman_coalesce (region_type_t * region, /* Region to coalesce */ + int prev_start, /* Index of start of previous band */ + int cur_start) /* Index of start of current band */ +{ + box_type_t *prev_box; /* Current box in previous band */ + box_type_t *cur_box; /* Current box in current band */ + int numRects; /* Number rectangles in both bands */ + int y2; /* Bottom of current band */ + + /* + * Figure out how many rectangles are in the band. + */ + numRects = cur_start - prev_start; + critical_if_fail (numRects == region->data->numRects - cur_start); + + if (!numRects) return cur_start; + + /* + * The bands may only be coalesced if the bottom of the previous + * matches the top scanline of the current. + */ + prev_box = PIXREGION_BOX (region, prev_start); + cur_box = PIXREGION_BOX (region, cur_start); + if (prev_box->y2 != cur_box->y1) return cur_start; + + /* + * Make sure the bands have boxes in the same places. This + * assumes that boxes have been added in such a way that they + * cover the most area possible. I.e. two boxes in a band must + * have some horizontal space between them. + */ + y2 = cur_box->y2; + + do + { + if ((prev_box->x1 != cur_box->x1) || (prev_box->x2 != cur_box->x2)) + return (cur_start); + + prev_box++; + cur_box++; + numRects--; + } + while (numRects); + + /* + * The bands may be merged, so set the bottom y of each box + * in the previous band to the bottom y of the current band. + */ + numRects = cur_start - prev_start; + region->data->numRects -= numRects; + + do + { + prev_box--; + prev_box->y2 = y2; + numRects--; + } + while (numRects); + + return prev_start; +} + +/* Quicky macro to avoid trivial reject procedure calls to pixman_coalesce */ + +#define COALESCE(new_reg, prev_band, cur_band) \ + do \ + { \ + if (cur_band - prev_band == new_reg->data->numRects - cur_band) \ + prev_band = pixman_coalesce (new_reg, prev_band, cur_band); \ + else \ + prev_band = cur_band; \ + } while (0) + +/*- + *----------------------------------------------------------------------- + * pixman_region_append_non_o -- + * Handle a non-overlapping band for the union and subtract operations. + * Just adds the (top/bottom-clipped) rectangles into the region. + * Doesn't have to check for subsumption or anything. + * + * Results: + * None. + * + * Side Effects: + * region->data->numRects is incremented and the rectangles overwritten + * with the rectangles we're passed. + * + *----------------------------------------------------------------------- + */ +static inline pixman_bool_t +pixman_region_append_non_o (region_type_t * region, + box_type_t * r, + box_type_t * r_end, + int y1, + int y2) +{ + box_type_t *next_rect; + int new_rects; + + new_rects = r_end - r; + + critical_if_fail (y1 < y2); + critical_if_fail (new_rects != 0); + + /* Make sure we have enough space for all rectangles to be added */ + RECTALLOC (region, new_rects); + next_rect = PIXREGION_TOP (region); + region->data->numRects += new_rects; + + do + { + critical_if_fail (r->x1 < r->x2); + ADDRECT (next_rect, r->x1, y1, r->x2, y2); + r++; + } + while (r != r_end); + + return TRUE; +} + +#define FIND_BAND(r, r_band_end, r_end, ry1) \ + do \ + { \ + ry1 = r->y1; \ + r_band_end = r + 1; \ + while ((r_band_end != r_end) && (r_band_end->y1 == ry1)) { \ + r_band_end++; \ + } \ + } while (0) + +#define APPEND_REGIONS(new_reg, r, r_end) \ + do \ + { \ + int new_rects; \ + if ((new_rects = r_end - r)) { \ + RECTALLOC_BAIL (new_reg, new_rects, bail); \ + memmove ((char *)PIXREGION_TOP (new_reg), (char *)r, \ + new_rects * sizeof(box_type_t)); \ + new_reg->data->numRects += new_rects; \ + } \ + } while (0) + +/*- + *----------------------------------------------------------------------- + * pixman_op -- + * Apply an operation to two regions. Called by pixman_region_union, pixman_region_inverse, + * pixman_region_subtract, pixman_region_intersect.... Both regions MUST have at least one + * rectangle, and cannot be the same object. + * + * Results: + * TRUE if successful. + * + * Side Effects: + * The new region is overwritten. + * overlap set to TRUE if overlap_func ever returns TRUE. + * + * Notes: + * The idea behind this function is to view the two regions as sets. + * Together they cover a rectangle of area that this function divides + * into horizontal bands where points are covered only by one region + * or by both. For the first case, the non_overlap_func is called with + * each the band and the band's upper and lower extents. For the + * second, the overlap_func is called to process the entire band. It + * is responsible for clipping the rectangles in the band, though + * this function provides the boundaries. + * At the end of each band, the new region is coalesced, if possible, + * to reduce the number of rectangles in the region. + * + *----------------------------------------------------------------------- + */ + +typedef pixman_bool_t (*overlap_proc_ptr) (region_type_t *region, + box_type_t * r1, + box_type_t * r1_end, + box_type_t * r2, + box_type_t * r2_end, + int y1, + int y2); + +static pixman_bool_t +pixman_op (region_type_t * new_reg, /* Place to store result */ + region_type_t * reg1, /* First region in operation */ + region_type_t * reg2, /* 2d region in operation */ + overlap_proc_ptr overlap_func, /* Function to call for over- + * lapping bands */ + int append_non1, /* Append non-overlapping bands + * in region 1 ? + */ + int append_non2 /* Append non-overlapping bands + * in region 2 ? + */ + ) +{ + box_type_t *r1; /* Pointer into first region */ + box_type_t *r2; /* Pointer into 2d region */ + box_type_t *r1_end; /* End of 1st region */ + box_type_t *r2_end; /* End of 2d region */ + int ybot; /* Bottom of intersection */ + int ytop; /* Top of intersection */ + region_data_type_t *old_data; /* Old data for new_reg */ + int prev_band; /* Index of start of + * previous band in new_reg */ + int cur_band; /* Index of start of current + * band in new_reg */ + box_type_t * r1_band_end; /* End of current band in r1 */ + box_type_t * r2_band_end; /* End of current band in r2 */ + int top; /* Top of non-overlapping band */ + int bot; /* Bottom of non-overlapping band*/ + int r1y1; /* Temps for r1->y1 and r2->y1 */ + int r2y1; + int new_size; + int numRects; + + /* + * Break any region computed from a broken region + */ + if (PIXREGION_NAR (reg1) || PIXREGION_NAR (reg2)) + return pixman_break (new_reg); + + /* + * Initialization: + * set r1, r2, r1_end and r2_end appropriately, save the rectangles + * of the destination region until the end in case it's one of + * the two source regions, then mark the "new" region empty, allocating + * another array of rectangles for it to use. + */ + + r1 = PIXREGION_RECTS (reg1); + new_size = PIXREGION_NUMRECTS (reg1); + r1_end = r1 + new_size; + + numRects = PIXREGION_NUMRECTS (reg2); + r2 = PIXREGION_RECTS (reg2); + r2_end = r2 + numRects; + + critical_if_fail (r1 != r1_end); + critical_if_fail (r2 != r2_end); + + old_data = (region_data_type_t *)NULL; + + if (((new_reg == reg1) && (new_size > 1)) || + ((new_reg == reg2) && (numRects > 1))) + { + old_data = new_reg->data; + new_reg->data = pixman_region_empty_data; + } + + /* guess at new size */ + if (numRects > new_size) + new_size = numRects; + + new_size <<= 1; + + if (!new_reg->data) + new_reg->data = pixman_region_empty_data; + else if (new_reg->data->size) + new_reg->data->numRects = 0; + + if (new_size > new_reg->data->size) + { + if (!pixman_rect_alloc (new_reg, new_size)) + { + free (old_data); + return FALSE; + } + } + + /* + * Initialize ybot. + * In the upcoming loop, ybot and ytop serve different functions depending + * on whether the band being handled is an overlapping or non-overlapping + * band. + * In the case of a non-overlapping band (only one of the regions + * has points in the band), ybot is the bottom of the most recent + * intersection and thus clips the top of the rectangles in that band. + * ytop is the top of the next intersection between the two regions and + * serves to clip the bottom of the rectangles in the current band. + * For an overlapping band (where the two regions intersect), ytop clips + * the top of the rectangles of both regions and ybot clips the bottoms. + */ + + ybot = MIN (r1->y1, r2->y1); + + /* + * prev_band serves to mark the start of the previous band so rectangles + * can be coalesced into larger rectangles. qv. pixman_coalesce, above. + * In the beginning, there is no previous band, so prev_band == cur_band + * (cur_band is set later on, of course, but the first band will always + * start at index 0). prev_band and cur_band must be indices because of + * the possible expansion, and resultant moving, of the new region's + * array of rectangles. + */ + prev_band = 0; + + do + { + /* + * This algorithm proceeds one source-band (as opposed to a + * destination band, which is determined by where the two regions + * intersect) at a time. r1_band_end and r2_band_end serve to mark the + * rectangle after the last one in the current band for their + * respective regions. + */ + critical_if_fail (r1 != r1_end); + critical_if_fail (r2 != r2_end); + + FIND_BAND (r1, r1_band_end, r1_end, r1y1); + FIND_BAND (r2, r2_band_end, r2_end, r2y1); + + /* + * First handle the band that doesn't intersect, if any. + * + * Note that attention is restricted to one band in the + * non-intersecting region at once, so if a region has n + * bands between the current position and the next place it overlaps + * the other, this entire loop will be passed through n times. + */ + if (r1y1 < r2y1) + { + if (append_non1) + { + top = MAX (r1y1, ybot); + bot = MIN (r1->y2, r2y1); + if (top != bot) + { + cur_band = new_reg->data->numRects; + if (!pixman_region_append_non_o (new_reg, r1, r1_band_end, top, bot)) + goto bail; + COALESCE (new_reg, prev_band, cur_band); + } + } + ytop = r2y1; + } + else if (r2y1 < r1y1) + { + if (append_non2) + { + top = MAX (r2y1, ybot); + bot = MIN (r2->y2, r1y1); + + if (top != bot) + { + cur_band = new_reg->data->numRects; + + if (!pixman_region_append_non_o (new_reg, r2, r2_band_end, top, bot)) + goto bail; + + COALESCE (new_reg, prev_band, cur_band); + } + } + ytop = r1y1; + } + else + { + ytop = r1y1; + } + + /* + * Now see if we've hit an intersecting band. The two bands only + * intersect if ybot > ytop + */ + ybot = MIN (r1->y2, r2->y2); + if (ybot > ytop) + { + cur_band = new_reg->data->numRects; + + if (!(*overlap_func)(new_reg, + r1, r1_band_end, + r2, r2_band_end, + ytop, ybot)) + { + goto bail; + } + + COALESCE (new_reg, prev_band, cur_band); + } + + /* + * If we've finished with a band (y2 == ybot) we skip forward + * in the region to the next band. + */ + if (r1->y2 == ybot) + r1 = r1_band_end; + + if (r2->y2 == ybot) + r2 = r2_band_end; + + } + while (r1 != r1_end && r2 != r2_end); + + /* + * Deal with whichever region (if any) still has rectangles left. + * + * We only need to worry about banding and coalescing for the very first + * band left. After that, we can just group all remaining boxes, + * regardless of how many bands, into one final append to the list. + */ + + if ((r1 != r1_end) && append_non1) + { + /* Do first non_overlap1Func call, which may be able to coalesce */ + FIND_BAND (r1, r1_band_end, r1_end, r1y1); + + cur_band = new_reg->data->numRects; + + if (!pixman_region_append_non_o (new_reg, + r1, r1_band_end, + MAX (r1y1, ybot), r1->y2)) + { + goto bail; + } + + COALESCE (new_reg, prev_band, cur_band); + + /* Just append the rest of the boxes */ + APPEND_REGIONS (new_reg, r1_band_end, r1_end); + } + else if ((r2 != r2_end) && append_non2) + { + /* Do first non_overlap2Func call, which may be able to coalesce */ + FIND_BAND (r2, r2_band_end, r2_end, r2y1); + + cur_band = new_reg->data->numRects; + + if (!pixman_region_append_non_o (new_reg, + r2, r2_band_end, + MAX (r2y1, ybot), r2->y2)) + { + goto bail; + } + + COALESCE (new_reg, prev_band, cur_band); + + /* Append rest of boxes */ + APPEND_REGIONS (new_reg, r2_band_end, r2_end); + } + + free (old_data); + + if (!(numRects = new_reg->data->numRects)) + { + FREE_DATA (new_reg); + new_reg->data = pixman_region_empty_data; + } + else if (numRects == 1) + { + new_reg->extents = *PIXREGION_BOXPTR (new_reg); + FREE_DATA (new_reg); + new_reg->data = (region_data_type_t *)NULL; + } + else + { + DOWNSIZE (new_reg, numRects); + } + + return TRUE; + +bail: + free (old_data); + + return pixman_break (new_reg); +} + +/*- + *----------------------------------------------------------------------- + * pixman_set_extents -- + * Reset the extents of a region to what they should be. Called by + * pixman_region_subtract and pixman_region_intersect as they can't + * figure it out along the way or do so easily, as pixman_region_union can. + * + * Results: + * None. + * + * Side Effects: + * The region's 'extents' structure is overwritten. + * + *----------------------------------------------------------------------- + */ +static void +pixman_set_extents (region_type_t *region) +{ + box_type_t *box, *box_end; + + if (!region->data) + return; + + if (!region->data->size) + { + region->extents.x2 = region->extents.x1; + region->extents.y2 = region->extents.y1; + return; + } + + box = PIXREGION_BOXPTR (region); + box_end = PIXREGION_END (region); + + /* + * Since box is the first rectangle in the region, it must have the + * smallest y1 and since box_end is the last rectangle in the region, + * it must have the largest y2, because of banding. Initialize x1 and + * x2 from box and box_end, resp., as good things to initialize them + * to... + */ + region->extents.x1 = box->x1; + region->extents.y1 = box->y1; + region->extents.x2 = box_end->x2; + region->extents.y2 = box_end->y2; + + critical_if_fail (region->extents.y1 < region->extents.y2); + + while (box <= box_end) + { + if (box->x1 < region->extents.x1) + region->extents.x1 = box->x1; + if (box->x2 > region->extents.x2) + region->extents.x2 = box->x2; + box++; + } + + critical_if_fail (region->extents.x1 < region->extents.x2); +} + +/*====================================================================== + * Region Intersection + *====================================================================*/ +/*- + *----------------------------------------------------------------------- + * pixman_region_intersect_o -- + * Handle an overlapping band for pixman_region_intersect. + * + * Results: + * TRUE if successful. + * + * Side Effects: + * Rectangles may be added to the region. + * + *----------------------------------------------------------------------- + */ +/*ARGSUSED*/ +static pixman_bool_t +pixman_region_intersect_o (region_type_t *region, + box_type_t * r1, + box_type_t * r1_end, + box_type_t * r2, + box_type_t * r2_end, + int y1, + int y2) +{ + int x1; + int x2; + box_type_t * next_rect; + + next_rect = PIXREGION_TOP (region); + + critical_if_fail (y1 < y2); + critical_if_fail (r1 != r1_end && r2 != r2_end); + + do + { + x1 = MAX (r1->x1, r2->x1); + x2 = MIN (r1->x2, r2->x2); + + /* + * If there's any overlap between the two rectangles, add that + * overlap to the new region. + */ + if (x1 < x2) + NEWRECT (region, next_rect, x1, y1, x2, y2); + + /* + * Advance the pointer(s) with the leftmost right side, since the next + * rectangle on that list may still overlap the other region's + * current rectangle. + */ + if (r1->x2 == x2) + { + r1++; + } + if (r2->x2 == x2) + { + r2++; + } + } + while ((r1 != r1_end) && (r2 != r2_end)); + + return TRUE; +} + +PIXMAN_EXPORT pixman_bool_t +PREFIX (_intersect) (region_type_t * new_reg, + region_type_t * reg1, + region_type_t * reg2) +{ + GOOD (reg1); + GOOD (reg2); + GOOD (new_reg); + + /* check for trivial reject */ + if (PIXREGION_NIL (reg1) || PIXREGION_NIL (reg2) || + !EXTENTCHECK (®1->extents, ®2->extents)) + { + /* Covers about 20% of all cases */ + FREE_DATA (new_reg); + new_reg->extents.x2 = new_reg->extents.x1; + new_reg->extents.y2 = new_reg->extents.y1; + if (PIXREGION_NAR (reg1) || PIXREGION_NAR (reg2)) + { + new_reg->data = pixman_broken_data; + return FALSE; + } + else + { + new_reg->data = pixman_region_empty_data; + } + } + else if (!reg1->data && !reg2->data) + { + /* Covers about 80% of cases that aren't trivially rejected */ + new_reg->extents.x1 = MAX (reg1->extents.x1, reg2->extents.x1); + new_reg->extents.y1 = MAX (reg1->extents.y1, reg2->extents.y1); + new_reg->extents.x2 = MIN (reg1->extents.x2, reg2->extents.x2); + new_reg->extents.y2 = MIN (reg1->extents.y2, reg2->extents.y2); + + FREE_DATA (new_reg); + + new_reg->data = (region_data_type_t *)NULL; + } + else if (!reg2->data && SUBSUMES (®2->extents, ®1->extents)) + { + return PREFIX (_copy) (new_reg, reg1); + } + else if (!reg1->data && SUBSUMES (®1->extents, ®2->extents)) + { + return PREFIX (_copy) (new_reg, reg2); + } + else if (reg1 == reg2) + { + return PREFIX (_copy) (new_reg, reg1); + } + else + { + /* General purpose intersection */ + + if (!pixman_op (new_reg, reg1, reg2, pixman_region_intersect_o, FALSE, FALSE)) + return FALSE; + + pixman_set_extents (new_reg); + } + + GOOD (new_reg); + return(TRUE); +} + +#define MERGERECT(r) \ + do \ + { \ + if (r->x1 <= x2) \ + { \ + /* Merge with current rectangle */ \ + if (x2 < r->x2) \ + x2 = r->x2; \ + } \ + else \ + { \ + /* Add current rectangle, start new one */ \ + NEWRECT (region, next_rect, x1, y1, x2, y2); \ + x1 = r->x1; \ + x2 = r->x2; \ + } \ + r++; \ + } while (0) + +/*====================================================================== + * Region Union + *====================================================================*/ + +/*- + *----------------------------------------------------------------------- + * pixman_region_union_o -- + * Handle an overlapping band for the union operation. Picks the + * left-most rectangle each time and merges it into the region. + * + * Results: + * TRUE if successful. + * + * Side Effects: + * region is overwritten. + * overlap is set to TRUE if any boxes overlap. + * + *----------------------------------------------------------------------- + */ +static pixman_bool_t +pixman_region_union_o (region_type_t *region, + box_type_t * r1, + box_type_t * r1_end, + box_type_t * r2, + box_type_t * r2_end, + int y1, + int y2) +{ + box_type_t *next_rect; + int x1; /* left and right side of current union */ + int x2; + + critical_if_fail (y1 < y2); + critical_if_fail (r1 != r1_end && r2 != r2_end); + + next_rect = PIXREGION_TOP (region); + + /* Start off current rectangle */ + if (r1->x1 < r2->x1) + { + x1 = r1->x1; + x2 = r1->x2; + r1++; + } + else + { + x1 = r2->x1; + x2 = r2->x2; + r2++; + } + while (r1 != r1_end && r2 != r2_end) + { + if (r1->x1 < r2->x1) + MERGERECT (r1); + else + MERGERECT (r2); + } + + /* Finish off whoever (if any) is left */ + if (r1 != r1_end) + { + do + { + MERGERECT (r1); + } + while (r1 != r1_end); + } + else if (r2 != r2_end) + { + do + { + MERGERECT (r2); + } + while (r2 != r2_end); + } + + /* Add current rectangle */ + NEWRECT (region, next_rect, x1, y1, x2, y2); + + return TRUE; +} + +PIXMAN_EXPORT pixman_bool_t +PREFIX(_intersect_rect) (region_type_t *dest, + region_type_t *source, + int x, int y, + unsigned int width, + unsigned int height) +{ + region_type_t region; + + region.data = NULL; + region.extents.x1 = x; + region.extents.y1 = y; + region.extents.x2 = x + width; + region.extents.y2 = y + height; + + return PREFIX(_intersect) (dest, source, ®ion); +} + +/* Convenience function for performing union of region with a + * single rectangle + */ +PIXMAN_EXPORT pixman_bool_t +PREFIX (_union_rect) (region_type_t *dest, + region_type_t *source, + int x, + int y, + unsigned int width, + unsigned int height) +{ + region_type_t region; + + region.extents.x1 = x; + region.extents.y1 = y; + region.extents.x2 = x + width; + region.extents.y2 = y + height; + + if (!GOOD_RECT (®ion.extents)) + { + if (BAD_RECT (®ion.extents)) + _pixman_log_error (FUNC, "Invalid rectangle passed"); + return PREFIX (_copy) (dest, source); + } + + region.data = NULL; + + return PREFIX (_union) (dest, source, ®ion); +} + +PIXMAN_EXPORT pixman_bool_t +PREFIX (_union) (region_type_t *new_reg, + region_type_t *reg1, + region_type_t *reg2) +{ + /* Return TRUE if some overlap + * between reg1, reg2 + */ + GOOD (reg1); + GOOD (reg2); + GOOD (new_reg); + + /* checks all the simple cases */ + + /* + * Region 1 and 2 are the same + */ + if (reg1 == reg2) + return PREFIX (_copy) (new_reg, reg1); + + /* + * Region 1 is empty + */ + if (PIXREGION_NIL (reg1)) + { + if (PIXREGION_NAR (reg1)) + return pixman_break (new_reg); + + if (new_reg != reg2) + return PREFIX (_copy) (new_reg, reg2); + + return TRUE; + } + + /* + * Region 2 is empty + */ + if (PIXREGION_NIL (reg2)) + { + if (PIXREGION_NAR (reg2)) + return pixman_break (new_reg); + + if (new_reg != reg1) + return PREFIX (_copy) (new_reg, reg1); + + return TRUE; + } + + /* + * Region 1 completely subsumes region 2 + */ + if (!reg1->data && SUBSUMES (®1->extents, ®2->extents)) + { + if (new_reg != reg1) + return PREFIX (_copy) (new_reg, reg1); + + return TRUE; + } + + /* + * Region 2 completely subsumes region 1 + */ + if (!reg2->data && SUBSUMES (®2->extents, ®1->extents)) + { + if (new_reg != reg2) + return PREFIX (_copy) (new_reg, reg2); + + return TRUE; + } + + if (!pixman_op (new_reg, reg1, reg2, pixman_region_union_o, TRUE, TRUE)) + return FALSE; + + new_reg->extents.x1 = MIN (reg1->extents.x1, reg2->extents.x1); + new_reg->extents.y1 = MIN (reg1->extents.y1, reg2->extents.y1); + new_reg->extents.x2 = MAX (reg1->extents.x2, reg2->extents.x2); + new_reg->extents.y2 = MAX (reg1->extents.y2, reg2->extents.y2); + + GOOD (new_reg); + + return TRUE; +} + +/*====================================================================== + * Batch Rectangle Union + *====================================================================*/ + +#define EXCHANGE_RECTS(a, b) \ + { \ + box_type_t t; \ + t = rects[a]; \ + rects[a] = rects[b]; \ + rects[b] = t; \ + } + +static void +quick_sort_rects ( + box_type_t rects[], + int numRects) +{ + int y1; + int x1; + int i, j; + box_type_t *r; + + /* Always called with numRects > 1 */ + + do + { + if (numRects == 2) + { + if (rects[0].y1 > rects[1].y1 || + (rects[0].y1 == rects[1].y1 && rects[0].x1 > rects[1].x1)) + { + EXCHANGE_RECTS (0, 1); + } + + return; + } + + /* Choose partition element, stick in location 0 */ + EXCHANGE_RECTS (0, numRects >> 1); + y1 = rects[0].y1; + x1 = rects[0].x1; + + /* Partition array */ + i = 0; + j = numRects; + + do + { + r = &(rects[i]); + do + { + r++; + i++; + } + while (i != numRects && (r->y1 < y1 || (r->y1 == y1 && r->x1 < x1))); + + r = &(rects[j]); + do + { + r--; + j--; + } + while (y1 < r->y1 || (y1 == r->y1 && x1 < r->x1)); + + if (i < j) + EXCHANGE_RECTS (i, j); + } + while (i < j); + + /* Move partition element back to middle */ + EXCHANGE_RECTS (0, j); + + /* Recurse */ + if (numRects - j - 1 > 1) + quick_sort_rects (&rects[j + 1], numRects - j - 1); + + numRects = j; + } + while (numRects > 1); +} + +/*- + *----------------------------------------------------------------------- + * pixman_region_validate -- + * + * Take a ``region'' which is a non-y-x-banded random collection of + * rectangles, and compute a nice region which is the union of all the + * rectangles. + * + * Results: + * TRUE if successful. + * + * Side Effects: + * The passed-in ``region'' may be modified. + * overlap set to TRUE if any retangles overlapped, + * else FALSE; + * + * Strategy: + * Step 1. Sort the rectangles into ascending order with primary key y1 + * and secondary key x1. + * + * Step 2. Split the rectangles into the minimum number of proper y-x + * banded regions. This may require horizontally merging + * rectangles, and vertically coalescing bands. With any luck, + * this step in an identity transformation (ala the Box widget), + * or a coalescing into 1 box (ala Menus). + * + * Step 3. Merge the separate regions down to a single region by calling + * pixman_region_union. Maximize the work each pixman_region_union call does by using + * a binary merge. + * + *----------------------------------------------------------------------- + */ + +static pixman_bool_t +validate (region_type_t * badreg) +{ + /* Descriptor for regions under construction in Step 2. */ + typedef struct + { + region_type_t reg; + int prev_band; + int cur_band; + } region_info_t; + + region_info_t stack_regions[64]; + + int numRects; /* Original numRects for badreg */ + region_info_t *ri; /* Array of current regions */ + int num_ri; /* Number of entries used in ri */ + int size_ri; /* Number of entries available in ri */ + int i; /* Index into rects */ + int j; /* Index into ri */ + region_info_t *rit; /* &ri[j] */ + region_type_t *reg; /* ri[j].reg */ + box_type_t *box; /* Current box in rects */ + box_type_t *ri_box; /* Last box in ri[j].reg */ + region_type_t *hreg; /* ri[j_half].reg */ + pixman_bool_t ret = TRUE; + + if (!badreg->data) + { + GOOD (badreg); + return TRUE; + } + + numRects = badreg->data->numRects; + if (!numRects) + { + if (PIXREGION_NAR (badreg)) + return FALSE; + GOOD (badreg); + return TRUE; + } + + if (badreg->extents.x1 < badreg->extents.x2) + { + if ((numRects) == 1) + { + FREE_DATA (badreg); + badreg->data = (region_data_type_t *) NULL; + } + else + { + DOWNSIZE (badreg, numRects); + } + + GOOD (badreg); + + return TRUE; + } + + /* Step 1: Sort the rects array into ascending (y1, x1) order */ + quick_sort_rects (PIXREGION_BOXPTR (badreg), numRects); + + /* Step 2: Scatter the sorted array into the minimum number of regions */ + + /* Set up the first region to be the first rectangle in badreg */ + /* Note that step 2 code will never overflow the ri[0].reg rects array */ + ri = stack_regions; + size_ri = sizeof (stack_regions) / sizeof (stack_regions[0]); + num_ri = 1; + ri[0].prev_band = 0; + ri[0].cur_band = 0; + ri[0].reg = *badreg; + box = PIXREGION_BOXPTR (&ri[0].reg); + ri[0].reg.extents = *box; + ri[0].reg.data->numRects = 1; + badreg->extents = *pixman_region_empty_box; + badreg->data = pixman_region_empty_data; + + /* Now scatter rectangles into the minimum set of valid regions. If the + * next rectangle to be added to a region would force an existing rectangle + * in the region to be split up in order to maintain y-x banding, just + * forget it. Try the next region. If it doesn't fit cleanly into any + * region, make a new one. + */ + + for (i = numRects; --i > 0;) + { + box++; + /* Look for a region to append box to */ + for (j = num_ri, rit = ri; --j >= 0; rit++) + { + reg = &rit->reg; + ri_box = PIXREGION_END (reg); + + if (box->y1 == ri_box->y1 && box->y2 == ri_box->y2) + { + /* box is in same band as ri_box. Merge or append it */ + if (box->x1 <= ri_box->x2) + { + /* Merge it with ri_box */ + if (box->x2 > ri_box->x2) + ri_box->x2 = box->x2; + } + else + { + RECTALLOC_BAIL (reg, 1, bail); + *PIXREGION_TOP (reg) = *box; + reg->data->numRects++; + } + + goto next_rect; /* So sue me */ + } + else if (box->y1 >= ri_box->y2) + { + /* Put box into new band */ + if (reg->extents.x2 < ri_box->x2) + reg->extents.x2 = ri_box->x2; + + if (reg->extents.x1 > box->x1) + reg->extents.x1 = box->x1; + + COALESCE (reg, rit->prev_band, rit->cur_band); + rit->cur_band = reg->data->numRects; + RECTALLOC_BAIL (reg, 1, bail); + *PIXREGION_TOP (reg) = *box; + reg->data->numRects++; + + goto next_rect; + } + /* Well, this region was inappropriate. Try the next one. */ + } /* for j */ + + /* Uh-oh. No regions were appropriate. Create a new one. */ + if (size_ri == num_ri) + { + size_t data_size; + + /* Oops, allocate space for new region information */ + size_ri <<= 1; + + data_size = size_ri * sizeof(region_info_t); + if (data_size / size_ri != sizeof(region_info_t)) + goto bail; + + if (ri == stack_regions) + { + rit = malloc (data_size); + if (!rit) + goto bail; + memcpy (rit, ri, num_ri * sizeof (region_info_t)); + } + else + { + rit = (region_info_t *) realloc (ri, data_size); + if (!rit) + goto bail; + } + ri = rit; + rit = &ri[num_ri]; + } + num_ri++; + rit->prev_band = 0; + rit->cur_band = 0; + rit->reg.extents = *box; + rit->reg.data = (region_data_type_t *)NULL; + + /* MUST force allocation */ + if (!pixman_rect_alloc (&rit->reg, (i + num_ri) / num_ri)) + goto bail; + + next_rect: ; + } /* for i */ + + /* Make a final pass over each region in order to COALESCE and set + * extents.x2 and extents.y2 + */ + for (j = num_ri, rit = ri; --j >= 0; rit++) + { + reg = &rit->reg; + ri_box = PIXREGION_END (reg); + reg->extents.y2 = ri_box->y2; + + if (reg->extents.x2 < ri_box->x2) + reg->extents.x2 = ri_box->x2; + + COALESCE (reg, rit->prev_band, rit->cur_band); + + if (reg->data->numRects == 1) /* keep unions happy below */ + { + FREE_DATA (reg); + reg->data = (region_data_type_t *)NULL; + } + } + + /* Step 3: Union all regions into a single region */ + while (num_ri > 1) + { + int half = num_ri / 2; + for (j = num_ri & 1; j < (half + (num_ri & 1)); j++) + { + reg = &ri[j].reg; + hreg = &ri[j + half].reg; + + if (!pixman_op (reg, reg, hreg, pixman_region_union_o, TRUE, TRUE)) + ret = FALSE; + + if (hreg->extents.x1 < reg->extents.x1) + reg->extents.x1 = hreg->extents.x1; + + if (hreg->extents.y1 < reg->extents.y1) + reg->extents.y1 = hreg->extents.y1; + + if (hreg->extents.x2 > reg->extents.x2) + reg->extents.x2 = hreg->extents.x2; + + if (hreg->extents.y2 > reg->extents.y2) + reg->extents.y2 = hreg->extents.y2; + + FREE_DATA (hreg); + } + + num_ri -= half; + + if (!ret) + goto bail; + } + + *badreg = ri[0].reg; + + if (ri != stack_regions) + free (ri); + + GOOD (badreg); + return ret; + +bail: + for (i = 0; i < num_ri; i++) + FREE_DATA (&ri[i].reg); + + if (ri != stack_regions) + free (ri); + + return pixman_break (badreg); +} + +/*====================================================================== + * Region Subtraction + *====================================================================*/ + +/*- + *----------------------------------------------------------------------- + * pixman_region_subtract_o -- + * Overlapping band subtraction. x1 is the left-most point not yet + * checked. + * + * Results: + * TRUE if successful. + * + * Side Effects: + * region may have rectangles added to it. + * + *----------------------------------------------------------------------- + */ +/*ARGSUSED*/ +static pixman_bool_t +pixman_region_subtract_o (region_type_t * region, + box_type_t * r1, + box_type_t * r1_end, + box_type_t * r2, + box_type_t * r2_end, + int y1, + int y2) +{ + box_type_t * next_rect; + int x1; + + x1 = r1->x1; + + critical_if_fail (y1 < y2); + critical_if_fail (r1 != r1_end && r2 != r2_end); + + next_rect = PIXREGION_TOP (region); + + do + { + if (r2->x2 <= x1) + { + /* + * Subtrahend entirely to left of minuend: go to next subtrahend. + */ + r2++; + } + else if (r2->x1 <= x1) + { + /* + * Subtrahend precedes minuend: nuke left edge of minuend. + */ + x1 = r2->x2; + if (x1 >= r1->x2) + { + /* + * Minuend completely covered: advance to next minuend and + * reset left fence to edge of new minuend. + */ + r1++; + if (r1 != r1_end) + x1 = r1->x1; + } + else + { + /* + * Subtrahend now used up since it doesn't extend beyond + * minuend + */ + r2++; + } + } + else if (r2->x1 < r1->x2) + { + /* + * Left part of subtrahend covers part of minuend: add uncovered + * part of minuend to region and skip to next subtrahend. + */ + critical_if_fail (x1 < r2->x1); + NEWRECT (region, next_rect, x1, y1, r2->x1, y2); + + x1 = r2->x2; + if (x1 >= r1->x2) + { + /* + * Minuend used up: advance to new... + */ + r1++; + if (r1 != r1_end) + x1 = r1->x1; + } + else + { + /* + * Subtrahend used up + */ + r2++; + } + } + else + { + /* + * Minuend used up: add any remaining piece before advancing. + */ + if (r1->x2 > x1) + NEWRECT (region, next_rect, x1, y1, r1->x2, y2); + + r1++; + + if (r1 != r1_end) + x1 = r1->x1; + } + } + while ((r1 != r1_end) && (r2 != r2_end)); + + /* + * Add remaining minuend rectangles to region. + */ + while (r1 != r1_end) + { + critical_if_fail (x1 < r1->x2); + + NEWRECT (region, next_rect, x1, y1, r1->x2, y2); + + r1++; + if (r1 != r1_end) + x1 = r1->x1; + } + return TRUE; +} + +/*- + *----------------------------------------------------------------------- + * pixman_region_subtract -- + * Subtract reg_s from reg_m and leave the result in reg_d. + * S stands for subtrahend, M for minuend and D for difference. + * + * Results: + * TRUE if successful. + * + * Side Effects: + * reg_d is overwritten. + * + *----------------------------------------------------------------------- + */ +PIXMAN_EXPORT pixman_bool_t +PREFIX (_subtract) (region_type_t *reg_d, + region_type_t *reg_m, + region_type_t *reg_s) +{ + GOOD (reg_m); + GOOD (reg_s); + GOOD (reg_d); + + /* check for trivial rejects */ + if (PIXREGION_NIL (reg_m) || PIXREGION_NIL (reg_s) || + !EXTENTCHECK (®_m->extents, ®_s->extents)) + { + if (PIXREGION_NAR (reg_s)) + return pixman_break (reg_d); + + return PREFIX (_copy) (reg_d, reg_m); + } + else if (reg_m == reg_s) + { + FREE_DATA (reg_d); + reg_d->extents.x2 = reg_d->extents.x1; + reg_d->extents.y2 = reg_d->extents.y1; + reg_d->data = pixman_region_empty_data; + + return TRUE; + } + + /* Add those rectangles in region 1 that aren't in region 2, + do yucky subtraction for overlaps, and + just throw away rectangles in region 2 that aren't in region 1 */ + if (!pixman_op (reg_d, reg_m, reg_s, pixman_region_subtract_o, TRUE, FALSE)) + return FALSE; + + /* + * Can't alter reg_d's extents before we call pixman_op because + * it might be one of the source regions and pixman_op depends + * on the extents of those regions being unaltered. Besides, this + * way there's no checking against rectangles that will be nuked + * due to coalescing, so we have to examine fewer rectangles. + */ + pixman_set_extents (reg_d); + GOOD (reg_d); + return TRUE; +} + +/*====================================================================== + * Region Inversion + *====================================================================*/ + +/*- + *----------------------------------------------------------------------- + * pixman_region_inverse -- + * Take a region and a box and return a region that is everything + * in the box but not in the region. The careful reader will note + * that this is the same as subtracting the region from the box... + * + * Results: + * TRUE. + * + * Side Effects: + * new_reg is overwritten. + * + *----------------------------------------------------------------------- + */ +PIXMAN_EXPORT pixman_bool_t +PREFIX (_inverse) (region_type_t *new_reg, /* Destination region */ + region_type_t *reg1, /* Region to invert */ + box_type_t * inv_rect) /* Bounding box for inversion */ +{ + region_type_t inv_reg; /* Quick and dirty region made from the + * bounding box */ + GOOD (reg1); + GOOD (new_reg); + + /* check for trivial rejects */ + if (PIXREGION_NIL (reg1) || !EXTENTCHECK (inv_rect, ®1->extents)) + { + if (PIXREGION_NAR (reg1)) + return pixman_break (new_reg); + + new_reg->extents = *inv_rect; + FREE_DATA (new_reg); + new_reg->data = (region_data_type_t *)NULL; + + return TRUE; + } + + /* Add those rectangles in region 1 that aren't in region 2, + * do yucky subtraction for overlaps, and + * just throw away rectangles in region 2 that aren't in region 1 + */ + inv_reg.extents = *inv_rect; + inv_reg.data = (region_data_type_t *)NULL; + if (!pixman_op (new_reg, &inv_reg, reg1, pixman_region_subtract_o, TRUE, FALSE)) + return FALSE; + + /* + * Can't alter new_reg's extents before we call pixman_op because + * it might be one of the source regions and pixman_op depends + * on the extents of those regions being unaltered. Besides, this + * way there's no checking against rectangles that will be nuked + * due to coalescing, so we have to examine fewer rectangles. + */ + pixman_set_extents (new_reg); + GOOD (new_reg); + return TRUE; +} + +/* In time O(log n), locate the first box whose y2 is greater than y. + * Return @end if no such box exists. + */ +static box_type_t * +find_box_for_y (box_type_t *begin, box_type_t *end, int y) +{ + box_type_t *mid; + + if (end == begin) + return end; + + if (end - begin == 1) + { + if (begin->y2 > y) + return begin; + else + return end; + } + + mid = begin + (end - begin) / 2; + if (mid->y2 > y) + { + /* If no box is found in [begin, mid], the function + * will return @mid, which is then known to be the + * correct answer. + */ + return find_box_for_y (begin, mid, y); + } + else + { + return find_box_for_y (mid, end, y); + } +} + +/* + * rect_in(region, rect) + * This routine takes a pointer to a region and a pointer to a box + * and determines if the box is outside/inside/partly inside the region. + * + * The idea is to travel through the list of rectangles trying to cover the + * passed box with them. Anytime a piece of the rectangle isn't covered + * by a band of rectangles, part_out is set TRUE. Any time a rectangle in + * the region covers part of the box, part_in is set TRUE. The process ends + * when either the box has been completely covered (we reached a band that + * doesn't overlap the box, part_in is TRUE and part_out is false), the + * box has been partially covered (part_in == part_out == TRUE -- because of + * the banding, the first time this is true we know the box is only + * partially in the region) or is outside the region (we reached a band + * that doesn't overlap the box at all and part_in is false) + */ +PIXMAN_EXPORT pixman_region_overlap_t +PREFIX (_contains_rectangle) (region_type_t * region, + box_type_t * prect) +{ + box_type_t * pbox; + box_type_t * pbox_end; + int part_in, part_out; + int numRects; + int x, y; + + GOOD (region); + + numRects = PIXREGION_NUMRECTS (region); + + /* useful optimization */ + if (!numRects || !EXTENTCHECK (®ion->extents, prect)) + return(PIXMAN_REGION_OUT); + + if (numRects == 1) + { + /* We know that it must be PIXMAN_REGION_IN or PIXMAN_REGION_PART */ + if (SUBSUMES (®ion->extents, prect)) + return(PIXMAN_REGION_IN); + else + return(PIXMAN_REGION_PART); + } + + part_out = FALSE; + part_in = FALSE; + + /* (x,y) starts at upper left of rect, moving to the right and down */ + x = prect->x1; + y = prect->y1; + + /* can stop when both part_out and part_in are TRUE, or we reach prect->y2 */ + for (pbox = PIXREGION_BOXPTR (region), pbox_end = pbox + numRects; + pbox != pbox_end; + pbox++) + { + /* getting up to speed or skipping remainder of band */ + if (pbox->y2 <= y) + { + if ((pbox = find_box_for_y (pbox, pbox_end, y)) == pbox_end) + break; + } + + if (pbox->y1 > y) + { + part_out = TRUE; /* missed part of rectangle above */ + if (part_in || (pbox->y1 >= prect->y2)) + break; + y = pbox->y1; /* x guaranteed to be == prect->x1 */ + } + + if (pbox->x2 <= x) + continue; /* not far enough over yet */ + + if (pbox->x1 > x) + { + part_out = TRUE; /* missed part of rectangle to left */ + if (part_in) + break; + } + + if (pbox->x1 < prect->x2) + { + part_in = TRUE; /* definitely overlap */ + if (part_out) + break; + } + + if (pbox->x2 >= prect->x2) + { + y = pbox->y2; /* finished with this band */ + if (y >= prect->y2) + break; + x = prect->x1; /* reset x out to left again */ + } + else + { + /* + * Because boxes in a band are maximal width, if the first box + * to overlap the rectangle doesn't completely cover it in that + * band, the rectangle must be partially out, since some of it + * will be uncovered in that band. part_in will have been set true + * by now... + */ + part_out = TRUE; + break; + } + } + + if (part_in) + { + if (y < prect->y2) + return PIXMAN_REGION_PART; + else + return PIXMAN_REGION_IN; + } + else + { + return PIXMAN_REGION_OUT; + } +} + +/* PREFIX(_translate) (region, x, y) + * translates in place + */ + +PIXMAN_EXPORT void +PREFIX (_translate) (region_type_t *region, int x, int y) +{ + overflow_int_t x1, x2, y1, y2; + int nbox; + box_type_t * pbox; + + GOOD (region); + region->extents.x1 = x1 = region->extents.x1 + x; + region->extents.y1 = y1 = region->extents.y1 + y; + region->extents.x2 = x2 = region->extents.x2 + x; + region->extents.y2 = y2 = region->extents.y2 + y; + + if (((x1 - PIXMAN_REGION_MIN) | (y1 - PIXMAN_REGION_MIN) | (PIXMAN_REGION_MAX - x2) | (PIXMAN_REGION_MAX - y2)) >= 0) + { + if (region->data && (nbox = region->data->numRects)) + { + for (pbox = PIXREGION_BOXPTR (region); nbox--; pbox++) + { + pbox->x1 += x; + pbox->y1 += y; + pbox->x2 += x; + pbox->y2 += y; + } + } + return; + } + + if (((x2 - PIXMAN_REGION_MIN) | (y2 - PIXMAN_REGION_MIN) | (PIXMAN_REGION_MAX - x1) | (PIXMAN_REGION_MAX - y1)) <= 0) + { + region->extents.x2 = region->extents.x1; + region->extents.y2 = region->extents.y1; + FREE_DATA (region); + region->data = pixman_region_empty_data; + return; + } + + if (x1 < PIXMAN_REGION_MIN) + region->extents.x1 = PIXMAN_REGION_MIN; + else if (x2 > PIXMAN_REGION_MAX) + region->extents.x2 = PIXMAN_REGION_MAX; + + if (y1 < PIXMAN_REGION_MIN) + region->extents.y1 = PIXMAN_REGION_MIN; + else if (y2 > PIXMAN_REGION_MAX) + region->extents.y2 = PIXMAN_REGION_MAX; + + if (region->data && (nbox = region->data->numRects)) + { + box_type_t * pbox_out; + + for (pbox_out = pbox = PIXREGION_BOXPTR (region); nbox--; pbox++) + { + pbox_out->x1 = x1 = pbox->x1 + x; + pbox_out->y1 = y1 = pbox->y1 + y; + pbox_out->x2 = x2 = pbox->x2 + x; + pbox_out->y2 = y2 = pbox->y2 + y; + + if (((x2 - PIXMAN_REGION_MIN) | (y2 - PIXMAN_REGION_MIN) | + (PIXMAN_REGION_MAX - x1) | (PIXMAN_REGION_MAX - y1)) <= 0) + { + region->data->numRects--; + continue; + } + + if (x1 < PIXMAN_REGION_MIN) + pbox_out->x1 = PIXMAN_REGION_MIN; + else if (x2 > PIXMAN_REGION_MAX) + pbox_out->x2 = PIXMAN_REGION_MAX; + + if (y1 < PIXMAN_REGION_MIN) + pbox_out->y1 = PIXMAN_REGION_MIN; + else if (y2 > PIXMAN_REGION_MAX) + pbox_out->y2 = PIXMAN_REGION_MAX; + + pbox_out++; + } + + if (pbox_out != pbox) + { + if (region->data->numRects == 1) + { + region->extents = *PIXREGION_BOXPTR (region); + FREE_DATA (region); + region->data = (region_data_type_t *)NULL; + } + else + { + pixman_set_extents (region); + } + } + } + + GOOD (region); +} + +PIXMAN_EXPORT void +PREFIX (_reset) (region_type_t *region, box_type_t *box) +{ + GOOD (region); + + critical_if_fail (GOOD_RECT (box)); + + region->extents = *box; + + FREE_DATA (region); + + region->data = NULL; +} + +PIXMAN_EXPORT void +PREFIX (_clear) (region_type_t *region) +{ + GOOD (region); + FREE_DATA (region); + + region->extents = *pixman_region_empty_box; + region->data = pixman_region_empty_data; +} + +/* box is "return" value */ +PIXMAN_EXPORT int +PREFIX (_contains_point) (region_type_t * region, + int x, int y, + box_type_t * box) +{ + box_type_t *pbox, *pbox_end; + int numRects; + + GOOD (region); + numRects = PIXREGION_NUMRECTS (region); + + if (!numRects || !INBOX (®ion->extents, x, y)) + return(FALSE); + + if (numRects == 1) + { + if (box) + *box = region->extents; + + return(TRUE); + } + + pbox = PIXREGION_BOXPTR (region); + pbox_end = pbox + numRects; + + pbox = find_box_for_y (pbox, pbox_end, y); + + for (;pbox != pbox_end; pbox++) + { + if ((y < pbox->y1) || (x < pbox->x1)) + break; /* missed it */ + + if (x >= pbox->x2) + continue; /* not there yet */ + + if (box) + *box = *pbox; + + return(TRUE); + } + + return(FALSE); +} + +PIXMAN_EXPORT int +PREFIX (_not_empty) (region_type_t * region) +{ + GOOD (region); + + return(!PIXREGION_NIL (region)); +} + +PIXMAN_EXPORT box_type_t * +PREFIX (_extents) (region_type_t * region) +{ + GOOD (region); + + return(®ion->extents); +} + +/* + * Clip a list of scanlines to a region. The caller has allocated the + * space. FSorted is non-zero if the scanline origins are in ascending order. + * + * returns the number of new, clipped scanlines. + */ + +PIXMAN_EXPORT pixman_bool_t +PREFIX (_selfcheck) (region_type_t *reg) +{ + int i, numRects; + + if ((reg->extents.x1 > reg->extents.x2) || + (reg->extents.y1 > reg->extents.y2)) + { + return FALSE; + } + + numRects = PIXREGION_NUMRECTS (reg); + if (!numRects) + { + return ((reg->extents.x1 == reg->extents.x2) && + (reg->extents.y1 == reg->extents.y2) && + (reg->data->size || (reg->data == pixman_region_empty_data))); + } + else if (numRects == 1) + { + return (!reg->data); + } + else + { + box_type_t * pbox_p, * pbox_n; + box_type_t box; + + pbox_p = PIXREGION_RECTS (reg); + box = *pbox_p; + box.y2 = pbox_p[numRects - 1].y2; + pbox_n = pbox_p + 1; + + for (i = numRects; --i > 0; pbox_p++, pbox_n++) + { + if ((pbox_n->x1 >= pbox_n->x2) || + (pbox_n->y1 >= pbox_n->y2)) + { + return FALSE; + } + + if (pbox_n->x1 < box.x1) + box.x1 = pbox_n->x1; + + if (pbox_n->x2 > box.x2) + box.x2 = pbox_n->x2; + + if ((pbox_n->y1 < pbox_p->y1) || + ((pbox_n->y1 == pbox_p->y1) && + ((pbox_n->x1 < pbox_p->x2) || (pbox_n->y2 != pbox_p->y2)))) + { + return FALSE; + } + } + + return ((box.x1 == reg->extents.x1) && + (box.x2 == reg->extents.x2) && + (box.y1 == reg->extents.y1) && + (box.y2 == reg->extents.y2)); + } +} + +PIXMAN_EXPORT pixman_bool_t +PREFIX (_init_rects) (region_type_t *region, + const box_type_t *boxes, int count) +{ + box_type_t *rects; + int displacement; + int i; + + /* if it's 1, then we just want to set the extents, so call + * the existing method. */ + if (count == 1) + { + PREFIX (_init_rect) (region, + boxes[0].x1, + boxes[0].y1, + boxes[0].x2 - boxes[0].x1, + boxes[0].y2 - boxes[0].y1); + return TRUE; + } + + PREFIX (_init) (region); + + /* if it's 0, don't call pixman_rect_alloc -- 0 rectangles is + * a special case, and causing pixman_rect_alloc would cause + * us to leak memory (because the 0-rect case should be the + * static pixman_region_empty_data data). + */ + if (count == 0) + return TRUE; + + if (!pixman_rect_alloc (region, count)) + return FALSE; + + rects = PIXREGION_RECTS (region); + + /* Copy in the rects */ + memcpy (rects, boxes, sizeof(box_type_t) * count); + region->data->numRects = count; + + /* Eliminate empty and malformed rectangles */ + displacement = 0; + + for (i = 0; i < count; ++i) + { + box_type_t *box = &rects[i]; + + if (box->x1 >= box->x2 || box->y1 >= box->y2) + displacement++; + else if (displacement) + rects[i - displacement] = rects[i]; + } + + region->data->numRects -= displacement; + + /* If eliminating empty rectangles caused there + * to be only 0 or 1 rectangles, deal with that. + */ + if (region->data->numRects == 0) + { + FREE_DATA (region); + PREFIX (_init) (region); + + return TRUE; + } + + if (region->data->numRects == 1) + { + region->extents = rects[0]; + + FREE_DATA (region); + region->data = NULL; + + GOOD (region); + + return TRUE; + } + + /* Validate */ + region->extents.x1 = region->extents.x2 = 0; + + return validate (region); +} + +#define READ(_ptr) (*(_ptr)) + +static inline box_type_t * +bitmap_addrect (region_type_t *reg, + box_type_t *r, + box_type_t **first_rect, + int rx1, int ry1, + int rx2, int ry2) +{ + if ((rx1 < rx2) && (ry1 < ry2) && + (!(reg->data->numRects && + ((r-1)->y1 == ry1) && ((r-1)->y2 == ry2) && + ((r-1)->x1 <= rx1) && ((r-1)->x2 >= rx2)))) + { + if (reg->data->numRects == reg->data->size) + { + if (!pixman_rect_alloc (reg, 1)) + return NULL; + *first_rect = PIXREGION_BOXPTR(reg); + r = *first_rect + reg->data->numRects; + } + r->x1 = rx1; + r->y1 = ry1; + r->x2 = rx2; + r->y2 = ry2; + reg->data->numRects++; + if (r->x1 < reg->extents.x1) + reg->extents.x1 = r->x1; + if (r->x2 > reg->extents.x2) + reg->extents.x2 = r->x2; + r++; + } + return r; +} + +/* Convert bitmap clip mask into clipping region. + * First, goes through each line and makes boxes by noting the transitions + * from 0 to 1 and 1 to 0. + * Then it coalesces the current line with the previous if they have boxes + * at the same X coordinates. + * Stride is in number of uint32_t per line. + */ +PIXMAN_EXPORT void +PREFIX (_init_from_image) (region_type_t *region, + pixman_image_t *image) +{ + uint32_t mask0 = 0xffffffff & ~SCREEN_SHIFT_RIGHT(0xffffffff, 1); + box_type_t *first_rect, *rects, *prect_line_start; + box_type_t *old_rect, *new_rect; + uint32_t *pw, w, *pw_line, *pw_line_end; + int irect_prev_start, irect_line_start; + int h, base, rx1 = 0, crects; + int ib; + pixman_bool_t in_box, same; + int width, height, stride; + + PREFIX(_init) (region); + + critical_if_fail (region->data); + + return_if_fail (image->type == BITS); + return_if_fail (image->bits.format == PIXMAN_a1); + + pw_line = pixman_image_get_data (image); + width = pixman_image_get_width (image); + height = pixman_image_get_height (image); + stride = pixman_image_get_stride (image) / 4; + + first_rect = PIXREGION_BOXPTR(region); + rects = first_rect; + + region->extents.x1 = width - 1; + region->extents.x2 = 0; + irect_prev_start = -1; + for (h = 0; h < height; h++) + { + pw = pw_line; + pw_line += stride; + irect_line_start = rects - first_rect; + + /* If the Screen left most bit of the word is set, we're starting in + * a box */ + if (READ(pw) & mask0) + { + in_box = TRUE; + rx1 = 0; + } + else + { + in_box = FALSE; + } + + /* Process all words which are fully in the pixmap */ + pw_line_end = pw + (width >> 5); + for (base = 0; pw < pw_line_end; base += 32) + { + w = READ(pw++); + if (in_box) + { + if (!~w) + continue; + } + else + { + if (!w) + continue; + } + for (ib = 0; ib < 32; ib++) + { + /* If the Screen left most bit of the word is set, we're + * starting a box */ + if (w & mask0) + { + if (!in_box) + { + rx1 = base + ib; + /* start new box */ + in_box = TRUE; + } + } + else + { + if (in_box) + { + /* end box */ + rects = bitmap_addrect (region, rects, &first_rect, + rx1, h, base + ib, h + 1); + if (rects == NULL) + goto error; + in_box = FALSE; + } + } + /* Shift the word VISUALLY left one. */ + w = SCREEN_SHIFT_LEFT(w, 1); + } + } + + if (width & 31) + { + /* Process final partial word on line */ + w = READ(pw++); + for (ib = 0; ib < (width & 31); ib++) + { + /* If the Screen left most bit of the word is set, we're + * starting a box */ + if (w & mask0) + { + if (!in_box) + { + rx1 = base + ib; + /* start new box */ + in_box = TRUE; + } + } + else + { + if (in_box) + { + /* end box */ + rects = bitmap_addrect(region, rects, &first_rect, + rx1, h, base + ib, h + 1); + if (rects == NULL) + goto error; + in_box = FALSE; + } + } + /* Shift the word VISUALLY left one. */ + w = SCREEN_SHIFT_LEFT(w, 1); + } + } + /* If scanline ended with last bit set, end the box */ + if (in_box) + { + rects = bitmap_addrect(region, rects, &first_rect, + rx1, h, base + (width & 31), h + 1); + if (rects == NULL) + goto error; + } + /* if all rectangles on this line have the same x-coords as + * those on the previous line, then add 1 to all the previous y2s and + * throw away all the rectangles from this line + */ + same = FALSE; + if (irect_prev_start != -1) + { + crects = irect_line_start - irect_prev_start; + if (crects != 0 && + crects == ((rects - first_rect) - irect_line_start)) + { + old_rect = first_rect + irect_prev_start; + new_rect = prect_line_start = first_rect + irect_line_start; + same = TRUE; + while (old_rect < prect_line_start) + { + if ((old_rect->x1 != new_rect->x1) || + (old_rect->x2 != new_rect->x2)) + { + same = FALSE; + break; + } + old_rect++; + new_rect++; + } + if (same) + { + old_rect = first_rect + irect_prev_start; + while (old_rect < prect_line_start) + { + old_rect->y2 += 1; + old_rect++; + } + rects -= crects; + region->data->numRects -= crects; + } + } + } + if(!same) + irect_prev_start = irect_line_start; + } + if (!region->data->numRects) + { + region->extents.x1 = region->extents.x2 = 0; + } + else + { + region->extents.y1 = PIXREGION_BOXPTR(region)->y1; + region->extents.y2 = PIXREGION_END(region)->y2; + if (region->data->numRects == 1) + { + free (region->data); + region->data = NULL; + } + } + + error: + return; +} diff --git a/src/pixman/pixman/pixman-region16.c b/src/pixman/pixman/pixman-region16.c new file mode 100644 index 0000000..d88d338 --- /dev/null +++ b/src/pixman/pixman/pixman-region16.c @@ -0,0 +1,67 @@ +/* + * Copyright © 2008 Red Hat, Inc. + * + * Permission to use, copy, modify, distribute, and sell this software + * and its documentation for any purpose is hereby granted without + * fee, provided that the above copyright notice appear in all copies + * and that both that copyright notice and this permission notice + * appear in supporting documentation, and that the name of + * Red Hat, Inc. not be used in advertising or publicity pertaining to + * distribution of the software without specific, written prior + * permission. Red Hat, Inc. makes no representations about the + * suitability of this software for any purpose. It is provided "as + * is" without express or implied warranty. + * + * RED HAT, INC. DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS, IN NO EVENT SHALL RED HAT, INC. BE LIABLE FOR ANY SPECIAL, + * INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER + * RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR + * IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Author: Soren Sandmann <sandmann@redhat.com> + */ +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#undef PIXMAN_DISABLE_DEPRECATED + +#include "pixman-private.h" + +#include <stdlib.h> + +typedef pixman_box16_t box_type_t; +typedef pixman_region16_data_t region_data_type_t; +typedef pixman_region16_t region_type_t; +typedef int32_t overflow_int_t; + +typedef struct { + int x, y; +} point_type_t; + +#define PREFIX(x) pixman_region##x + +#define PIXMAN_REGION_MAX INT16_MAX +#define PIXMAN_REGION_MIN INT16_MIN + +#include "pixman-region.c" + +/* This function exists only to make it possible to preserve the X ABI - + * it should go away at first opportunity. + * + * The problem is that the X ABI exports the three structs and has used + * them through macros. So the X server calls this function with + * the addresses of those structs which makes the existing code continue to + * work. + */ +PIXMAN_EXPORT void +pixman_region_set_static_pointers (pixman_box16_t *empty_box, + pixman_region16_data_t *empty_data, + pixman_region16_data_t *broken_data) +{ + pixman_region_empty_box = empty_box; + pixman_region_empty_data = empty_data; + pixman_broken_data = broken_data; +} diff --git a/src/pixman/pixman/pixman-region32.c b/src/pixman/pixman/pixman-region32.c new file mode 100644 index 0000000..abd6b1a --- /dev/null +++ b/src/pixman/pixman/pixman-region32.c @@ -0,0 +1,47 @@ +/* + * Copyright © 2008 Red Hat, Inc. + * + * Permission to use, copy, modify, distribute, and sell this software + * and its documentation for any purpose is hereby granted without + * fee, provided that the above copyright notice appear in all copies + * and that both that copyright notice and this permission notice + * appear in supporting documentation, and that the name of + * Red Hat, Inc. not be used in advertising or publicity pertaining to + * distribution of the software without specific, written prior + * permission. Red Hat, Inc. makes no representations about the + * suitability of this software for any purpose. It is provided "as + * is" without express or implied warranty. + * + * RED HAT, INC. DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS, IN NO EVENT SHALL RED HAT, INC. BE LIABLE FOR ANY SPECIAL, + * INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER + * RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR + * IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Author: Soren Sandmann <sandmann@redhat.com> + */ +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include "pixman-private.h" + +#include <stdlib.h> + +typedef pixman_box32_t box_type_t; +typedef pixman_region32_data_t region_data_type_t; +typedef pixman_region32_t region_type_t; +typedef int64_t overflow_int_t; + +typedef struct { + int x, y; +} point_type_t; + +#define PREFIX(x) pixman_region32##x + +#define PIXMAN_REGION_MAX INT32_MAX +#define PIXMAN_REGION_MIN INT32_MIN + +#include "pixman-region.c" diff --git a/src/pixman/pixman/pixman-solid-fill.c b/src/pixman/pixman/pixman-solid-fill.c new file mode 100644 index 0000000..5f9fef6 --- /dev/null +++ b/src/pixman/pixman/pixman-solid-fill.c @@ -0,0 +1,67 @@ +/* + * Copyright © 2000 SuSE, Inc. + * Copyright © 2007, 2009 Red Hat, Inc. + * Copyright © 2009 Soren Sandmann + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of SuSE not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. SuSE makes no representations about the + * suitability of this software for any purpose. It is provided "as is" + * without express or implied warranty. + * + * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE + * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif +#include "pixman-private.h" + +static uint32_t +color_to_uint32 (const pixman_color_t *color) +{ + return + (color->alpha >> 8 << 24) | + (color->red >> 8 << 16) | + (color->green & 0xff00) | + (color->blue >> 8); +} + +static argb_t +color_to_float (const pixman_color_t *color) +{ + argb_t result; + + result.a = pixman_unorm_to_float (color->alpha, 16); + result.r = pixman_unorm_to_float (color->red, 16); + result.g = pixman_unorm_to_float (color->green, 16); + result.b = pixman_unorm_to_float (color->blue, 16); + + return result; +} + +PIXMAN_EXPORT pixman_image_t * +pixman_image_create_solid_fill (const pixman_color_t *color) +{ + pixman_image_t *img = _pixman_image_allocate (); + + if (!img) + return NULL; + + img->type = SOLID; + img->solid.color = *color; + img->solid.color_32 = color_to_uint32 (color); + img->solid.color_float = color_to_float (color); + + return img; +} + diff --git a/src/pixman/pixman/pixman-sse2.c b/src/pixman/pixman/pixman-sse2.c new file mode 100644 index 0000000..a6e7808 --- /dev/null +++ b/src/pixman/pixman/pixman-sse2.c @@ -0,0 +1,6540 @@ +/* + * Copyright © 2008 Rodrigo Kumpera + * Copyright © 2008 André Tupinambá + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of Red Hat not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. Red Hat makes no representations about the + * suitability of this software for any purpose. It is provided "as is" + * without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + * + * Author: Rodrigo Kumpera (kumpera@gmail.com) + * André Tupinambá (andrelrt@gmail.com) + * + * Based on work by Owen Taylor and Søren Sandmann + */ +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +/* PSHUFD is slow on a lot of old processors, and new processors have SSSE3 */ +#define PSHUFD_IS_FAST 0 + +#include <xmmintrin.h> /* for _mm_shuffle_pi16 and _MM_SHUFFLE */ +#include <emmintrin.h> /* for SSE2 intrinsics */ +#include "pixman-private.h" +#include "pixman-combine32.h" +#include "pixman-inlines.h" + +static __m128i mask_0080; +static __m128i mask_00ff; +static __m128i mask_0101; +static __m128i mask_ffff; +static __m128i mask_ff000000; +static __m128i mask_alpha; + +static __m128i mask_565_r; +static __m128i mask_565_g1, mask_565_g2; +static __m128i mask_565_b; +static __m128i mask_red; +static __m128i mask_green; +static __m128i mask_blue; + +static __m128i mask_565_fix_rb; +static __m128i mask_565_fix_g; + +static __m128i mask_565_rb; +static __m128i mask_565_pack_multiplier; + +static force_inline __m128i +unpack_32_1x128 (uint32_t data) +{ + return _mm_unpacklo_epi8 (_mm_cvtsi32_si128 (data), _mm_setzero_si128 ()); +} + +static force_inline void +unpack_128_2x128 (__m128i data, __m128i* data_lo, __m128i* data_hi) +{ + *data_lo = _mm_unpacklo_epi8 (data, _mm_setzero_si128 ()); + *data_hi = _mm_unpackhi_epi8 (data, _mm_setzero_si128 ()); +} + +static force_inline __m128i +unpack_565_to_8888 (__m128i lo) +{ + __m128i r, g, b, rb, t; + + r = _mm_and_si128 (_mm_slli_epi32 (lo, 8), mask_red); + g = _mm_and_si128 (_mm_slli_epi32 (lo, 5), mask_green); + b = _mm_and_si128 (_mm_slli_epi32 (lo, 3), mask_blue); + + rb = _mm_or_si128 (r, b); + t = _mm_and_si128 (rb, mask_565_fix_rb); + t = _mm_srli_epi32 (t, 5); + rb = _mm_or_si128 (rb, t); + + t = _mm_and_si128 (g, mask_565_fix_g); + t = _mm_srli_epi32 (t, 6); + g = _mm_or_si128 (g, t); + + return _mm_or_si128 (rb, g); +} + +static force_inline void +unpack_565_128_4x128 (__m128i data, + __m128i* data0, + __m128i* data1, + __m128i* data2, + __m128i* data3) +{ + __m128i lo, hi; + + lo = _mm_unpacklo_epi16 (data, _mm_setzero_si128 ()); + hi = _mm_unpackhi_epi16 (data, _mm_setzero_si128 ()); + + lo = unpack_565_to_8888 (lo); + hi = unpack_565_to_8888 (hi); + + unpack_128_2x128 (lo, data0, data1); + unpack_128_2x128 (hi, data2, data3); +} + +static force_inline uint16_t +pack_565_32_16 (uint32_t pixel) +{ + return (uint16_t) (((pixel >> 8) & 0xf800) | + ((pixel >> 5) & 0x07e0) | + ((pixel >> 3) & 0x001f)); +} + +static force_inline __m128i +pack_2x128_128 (__m128i lo, __m128i hi) +{ + return _mm_packus_epi16 (lo, hi); +} + +static force_inline __m128i +pack_565_2packedx128_128 (__m128i lo, __m128i hi) +{ + __m128i rb0 = _mm_and_si128 (lo, mask_565_rb); + __m128i rb1 = _mm_and_si128 (hi, mask_565_rb); + + __m128i t0 = _mm_madd_epi16 (rb0, mask_565_pack_multiplier); + __m128i t1 = _mm_madd_epi16 (rb1, mask_565_pack_multiplier); + + __m128i g0 = _mm_and_si128 (lo, mask_green); + __m128i g1 = _mm_and_si128 (hi, mask_green); + + t0 = _mm_or_si128 (t0, g0); + t1 = _mm_or_si128 (t1, g1); + + /* Simulates _mm_packus_epi32 */ + t0 = _mm_slli_epi32 (t0, 16 - 5); + t1 = _mm_slli_epi32 (t1, 16 - 5); + t0 = _mm_srai_epi32 (t0, 16); + t1 = _mm_srai_epi32 (t1, 16); + return _mm_packs_epi32 (t0, t1); +} + +static force_inline __m128i +pack_565_2x128_128 (__m128i lo, __m128i hi) +{ + __m128i data; + __m128i r, g1, g2, b; + + data = pack_2x128_128 (lo, hi); + + r = _mm_and_si128 (data, mask_565_r); + g1 = _mm_and_si128 (_mm_slli_epi32 (data, 3), mask_565_g1); + g2 = _mm_and_si128 (_mm_srli_epi32 (data, 5), mask_565_g2); + b = _mm_and_si128 (_mm_srli_epi32 (data, 3), mask_565_b); + + return _mm_or_si128 (_mm_or_si128 (_mm_or_si128 (r, g1), g2), b); +} + +static force_inline __m128i +pack_565_4x128_128 (__m128i* xmm0, __m128i* xmm1, __m128i* xmm2, __m128i* xmm3) +{ + return _mm_packus_epi16 (pack_565_2x128_128 (*xmm0, *xmm1), + pack_565_2x128_128 (*xmm2, *xmm3)); +} + +static force_inline int +is_opaque (__m128i x) +{ + __m128i ffs = _mm_cmpeq_epi8 (x, x); + + return (_mm_movemask_epi8 (_mm_cmpeq_epi8 (x, ffs)) & 0x8888) == 0x8888; +} + +static force_inline int +is_zero (__m128i x) +{ + return _mm_movemask_epi8 ( + _mm_cmpeq_epi8 (x, _mm_setzero_si128 ())) == 0xffff; +} + +static force_inline int +is_transparent (__m128i x) +{ + return (_mm_movemask_epi8 ( + _mm_cmpeq_epi8 (x, _mm_setzero_si128 ())) & 0x8888) == 0x8888; +} + +static force_inline __m128i +expand_pixel_32_1x128 (uint32_t data) +{ + return _mm_shuffle_epi32 (unpack_32_1x128 (data), _MM_SHUFFLE (1, 0, 1, 0)); +} + +static force_inline __m128i +expand_alpha_1x128 (__m128i data) +{ + return _mm_shufflehi_epi16 (_mm_shufflelo_epi16 (data, + _MM_SHUFFLE (3, 3, 3, 3)), + _MM_SHUFFLE (3, 3, 3, 3)); +} + +static force_inline void +expand_alpha_2x128 (__m128i data_lo, + __m128i data_hi, + __m128i* alpha_lo, + __m128i* alpha_hi) +{ + __m128i lo, hi; + + lo = _mm_shufflelo_epi16 (data_lo, _MM_SHUFFLE (3, 3, 3, 3)); + hi = _mm_shufflelo_epi16 (data_hi, _MM_SHUFFLE (3, 3, 3, 3)); + + *alpha_lo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE (3, 3, 3, 3)); + *alpha_hi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE (3, 3, 3, 3)); +} + +static force_inline void +expand_alpha_rev_2x128 (__m128i data_lo, + __m128i data_hi, + __m128i* alpha_lo, + __m128i* alpha_hi) +{ + __m128i lo, hi; + + lo = _mm_shufflelo_epi16 (data_lo, _MM_SHUFFLE (0, 0, 0, 0)); + hi = _mm_shufflelo_epi16 (data_hi, _MM_SHUFFLE (0, 0, 0, 0)); + *alpha_lo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE (0, 0, 0, 0)); + *alpha_hi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE (0, 0, 0, 0)); +} + +static force_inline void +pix_multiply_2x128 (__m128i* data_lo, + __m128i* data_hi, + __m128i* alpha_lo, + __m128i* alpha_hi, + __m128i* ret_lo, + __m128i* ret_hi) +{ + __m128i lo, hi; + + lo = _mm_mullo_epi16 (*data_lo, *alpha_lo); + hi = _mm_mullo_epi16 (*data_hi, *alpha_hi); + lo = _mm_adds_epu16 (lo, mask_0080); + hi = _mm_adds_epu16 (hi, mask_0080); + *ret_lo = _mm_mulhi_epu16 (lo, mask_0101); + *ret_hi = _mm_mulhi_epu16 (hi, mask_0101); +} + +static force_inline void +pix_add_multiply_2x128 (__m128i* src_lo, + __m128i* src_hi, + __m128i* alpha_dst_lo, + __m128i* alpha_dst_hi, + __m128i* dst_lo, + __m128i* dst_hi, + __m128i* alpha_src_lo, + __m128i* alpha_src_hi, + __m128i* ret_lo, + __m128i* ret_hi) +{ + __m128i t1_lo, t1_hi; + __m128i t2_lo, t2_hi; + + pix_multiply_2x128 (src_lo, src_hi, alpha_dst_lo, alpha_dst_hi, &t1_lo, &t1_hi); + pix_multiply_2x128 (dst_lo, dst_hi, alpha_src_lo, alpha_src_hi, &t2_lo, &t2_hi); + + *ret_lo = _mm_adds_epu8 (t1_lo, t2_lo); + *ret_hi = _mm_adds_epu8 (t1_hi, t2_hi); +} + +static force_inline void +negate_2x128 (__m128i data_lo, + __m128i data_hi, + __m128i* neg_lo, + __m128i* neg_hi) +{ + *neg_lo = _mm_xor_si128 (data_lo, mask_00ff); + *neg_hi = _mm_xor_si128 (data_hi, mask_00ff); +} + +static force_inline void +invert_colors_2x128 (__m128i data_lo, + __m128i data_hi, + __m128i* inv_lo, + __m128i* inv_hi) +{ + __m128i lo, hi; + + lo = _mm_shufflelo_epi16 (data_lo, _MM_SHUFFLE (3, 0, 1, 2)); + hi = _mm_shufflelo_epi16 (data_hi, _MM_SHUFFLE (3, 0, 1, 2)); + *inv_lo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE (3, 0, 1, 2)); + *inv_hi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE (3, 0, 1, 2)); +} + +static force_inline void +over_2x128 (__m128i* src_lo, + __m128i* src_hi, + __m128i* alpha_lo, + __m128i* alpha_hi, + __m128i* dst_lo, + __m128i* dst_hi) +{ + __m128i t1, t2; + + negate_2x128 (*alpha_lo, *alpha_hi, &t1, &t2); + + pix_multiply_2x128 (dst_lo, dst_hi, &t1, &t2, dst_lo, dst_hi); + + *dst_lo = _mm_adds_epu8 (*src_lo, *dst_lo); + *dst_hi = _mm_adds_epu8 (*src_hi, *dst_hi); +} + +static force_inline void +over_rev_non_pre_2x128 (__m128i src_lo, + __m128i src_hi, + __m128i* dst_lo, + __m128i* dst_hi) +{ + __m128i lo, hi; + __m128i alpha_lo, alpha_hi; + + expand_alpha_2x128 (src_lo, src_hi, &alpha_lo, &alpha_hi); + + lo = _mm_or_si128 (alpha_lo, mask_alpha); + hi = _mm_or_si128 (alpha_hi, mask_alpha); + + invert_colors_2x128 (src_lo, src_hi, &src_lo, &src_hi); + + pix_multiply_2x128 (&src_lo, &src_hi, &lo, &hi, &lo, &hi); + + over_2x128 (&lo, &hi, &alpha_lo, &alpha_hi, dst_lo, dst_hi); +} + +static force_inline void +in_over_2x128 (__m128i* src_lo, + __m128i* src_hi, + __m128i* alpha_lo, + __m128i* alpha_hi, + __m128i* mask_lo, + __m128i* mask_hi, + __m128i* dst_lo, + __m128i* dst_hi) +{ + __m128i s_lo, s_hi; + __m128i a_lo, a_hi; + + pix_multiply_2x128 (src_lo, src_hi, mask_lo, mask_hi, &s_lo, &s_hi); + pix_multiply_2x128 (alpha_lo, alpha_hi, mask_lo, mask_hi, &a_lo, &a_hi); + + over_2x128 (&s_lo, &s_hi, &a_lo, &a_hi, dst_lo, dst_hi); +} + +/* load 4 pixels from a 16-byte boundary aligned address */ +static force_inline __m128i +load_128_aligned (__m128i* src) +{ + return _mm_load_si128 (src); +} + +/* load 4 pixels from a unaligned address */ +static force_inline __m128i +load_128_unaligned (const __m128i* src) +{ + return _mm_loadu_si128 (src); +} + +/* save 4 pixels using Write Combining memory on a 16-byte + * boundary aligned address + */ +static force_inline void +save_128_write_combining (__m128i* dst, + __m128i data) +{ + _mm_stream_si128 (dst, data); +} + +/* save 4 pixels on a 16-byte boundary aligned address */ +static force_inline void +save_128_aligned (__m128i* dst, + __m128i data) +{ + _mm_store_si128 (dst, data); +} + +/* save 4 pixels on a unaligned address */ +static force_inline void +save_128_unaligned (__m128i* dst, + __m128i data) +{ + _mm_storeu_si128 (dst, data); +} + +static force_inline __m128i +load_32_1x128 (uint32_t data) +{ + return _mm_cvtsi32_si128 (data); +} + +static force_inline __m128i +expand_alpha_rev_1x128 (__m128i data) +{ + return _mm_shufflelo_epi16 (data, _MM_SHUFFLE (0, 0, 0, 0)); +} + +static force_inline __m128i +expand_pixel_8_1x128 (uint8_t data) +{ + return _mm_shufflelo_epi16 ( + unpack_32_1x128 ((uint32_t)data), _MM_SHUFFLE (0, 0, 0, 0)); +} + +static force_inline __m128i +pix_multiply_1x128 (__m128i data, + __m128i alpha) +{ + return _mm_mulhi_epu16 (_mm_adds_epu16 (_mm_mullo_epi16 (data, alpha), + mask_0080), + mask_0101); +} + +static force_inline __m128i +pix_add_multiply_1x128 (__m128i* src, + __m128i* alpha_dst, + __m128i* dst, + __m128i* alpha_src) +{ + __m128i t1 = pix_multiply_1x128 (*src, *alpha_dst); + __m128i t2 = pix_multiply_1x128 (*dst, *alpha_src); + + return _mm_adds_epu8 (t1, t2); +} + +static force_inline __m128i +negate_1x128 (__m128i data) +{ + return _mm_xor_si128 (data, mask_00ff); +} + +static force_inline __m128i +invert_colors_1x128 (__m128i data) +{ + return _mm_shufflelo_epi16 (data, _MM_SHUFFLE (3, 0, 1, 2)); +} + +static force_inline __m128i +over_1x128 (__m128i src, __m128i alpha, __m128i dst) +{ + return _mm_adds_epu8 (src, pix_multiply_1x128 (dst, negate_1x128 (alpha))); +} + +static force_inline __m128i +in_over_1x128 (__m128i* src, __m128i* alpha, __m128i* mask, __m128i* dst) +{ + return over_1x128 (pix_multiply_1x128 (*src, *mask), + pix_multiply_1x128 (*alpha, *mask), + *dst); +} + +static force_inline __m128i +over_rev_non_pre_1x128 (__m128i src, __m128i dst) +{ + __m128i alpha = expand_alpha_1x128 (src); + + return over_1x128 (pix_multiply_1x128 (invert_colors_1x128 (src), + _mm_or_si128 (alpha, mask_alpha)), + alpha, + dst); +} + +static force_inline uint32_t +pack_1x128_32 (__m128i data) +{ + return _mm_cvtsi128_si32 (_mm_packus_epi16 (data, _mm_setzero_si128 ())); +} + +static force_inline __m128i +expand565_16_1x128 (uint16_t pixel) +{ + __m128i m = _mm_cvtsi32_si128 (pixel); + + m = unpack_565_to_8888 (m); + + return _mm_unpacklo_epi8 (m, _mm_setzero_si128 ()); +} + +static force_inline uint32_t +core_combine_over_u_pixel_sse2 (uint32_t src, uint32_t dst) +{ + uint8_t a; + __m128i xmms; + + a = src >> 24; + + if (a == 0xff) + { + return src; + } + else if (src) + { + xmms = unpack_32_1x128 (src); + return pack_1x128_32 ( + over_1x128 (xmms, expand_alpha_1x128 (xmms), + unpack_32_1x128 (dst))); + } + + return dst; +} + +static force_inline uint32_t +combine1 (const uint32_t *ps, const uint32_t *pm) +{ + uint32_t s = *ps; + + if (pm) + { + __m128i ms, mm; + + mm = unpack_32_1x128 (*pm); + mm = expand_alpha_1x128 (mm); + + ms = unpack_32_1x128 (s); + ms = pix_multiply_1x128 (ms, mm); + + s = pack_1x128_32 (ms); + } + + return s; +} + +static force_inline __m128i +combine4 (const __m128i *ps, const __m128i *pm) +{ + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_msk_lo, xmm_msk_hi; + __m128i s; + + if (pm) + { + xmm_msk_lo = load_128_unaligned (pm); + + if (is_transparent (xmm_msk_lo)) + return _mm_setzero_si128 (); + } + + s = load_128_unaligned (ps); + + if (pm) + { + unpack_128_2x128 (s, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_msk_lo, &xmm_msk_lo, &xmm_msk_hi); + + expand_alpha_2x128 (xmm_msk_lo, xmm_msk_hi, &xmm_msk_lo, &xmm_msk_hi); + + pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_msk_lo, &xmm_msk_hi, + &xmm_src_lo, &xmm_src_hi); + + s = pack_2x128_128 (xmm_src_lo, xmm_src_hi); + } + + return s; +} + +static force_inline void +core_combine_over_u_sse2_mask (uint32_t * pd, + const uint32_t* ps, + const uint32_t* pm, + int w) +{ + uint32_t s, d; + + /* Align dst on a 16-byte boundary */ + while (w && ((uintptr_t)pd & 15)) + { + d = *pd; + s = combine1 (ps, pm); + + if (s) + *pd = core_combine_over_u_pixel_sse2 (s, d); + pd++; + ps++; + pm++; + w--; + } + + while (w >= 4) + { + __m128i mask = load_128_unaligned ((__m128i *)pm); + + if (!is_zero (mask)) + { + __m128i src; + __m128i src_hi, src_lo; + __m128i mask_hi, mask_lo; + __m128i alpha_hi, alpha_lo; + + src = load_128_unaligned ((__m128i *)ps); + + if (is_opaque (_mm_and_si128 (src, mask))) + { + save_128_aligned ((__m128i *)pd, src); + } + else + { + __m128i dst = load_128_aligned ((__m128i *)pd); + __m128i dst_hi, dst_lo; + + unpack_128_2x128 (mask, &mask_lo, &mask_hi); + unpack_128_2x128 (src, &src_lo, &src_hi); + + expand_alpha_2x128 (mask_lo, mask_hi, &mask_lo, &mask_hi); + pix_multiply_2x128 (&src_lo, &src_hi, + &mask_lo, &mask_hi, + &src_lo, &src_hi); + + unpack_128_2x128 (dst, &dst_lo, &dst_hi); + + expand_alpha_2x128 (src_lo, src_hi, + &alpha_lo, &alpha_hi); + + over_2x128 (&src_lo, &src_hi, &alpha_lo, &alpha_hi, + &dst_lo, &dst_hi); + + save_128_aligned ( + (__m128i *)pd, + pack_2x128_128 (dst_lo, dst_hi)); + } + } + + pm += 4; + ps += 4; + pd += 4; + w -= 4; + } + while (w) + { + d = *pd; + s = combine1 (ps, pm); + + if (s) + *pd = core_combine_over_u_pixel_sse2 (s, d); + pd++; + ps++; + pm++; + + w--; + } +} + +static force_inline void +core_combine_over_u_sse2_no_mask (uint32_t * pd, + const uint32_t* ps, + int w) +{ + uint32_t s, d; + + /* Align dst on a 16-byte boundary */ + while (w && ((uintptr_t)pd & 15)) + { + d = *pd; + s = *ps; + + if (s) + *pd = core_combine_over_u_pixel_sse2 (s, d); + pd++; + ps++; + w--; + } + + while (w >= 4) + { + __m128i src; + __m128i src_hi, src_lo, dst_hi, dst_lo; + __m128i alpha_hi, alpha_lo; + + src = load_128_unaligned ((__m128i *)ps); + + if (!is_zero (src)) + { + if (is_opaque (src)) + { + save_128_aligned ((__m128i *)pd, src); + } + else + { + __m128i dst = load_128_aligned ((__m128i *)pd); + + unpack_128_2x128 (src, &src_lo, &src_hi); + unpack_128_2x128 (dst, &dst_lo, &dst_hi); + + expand_alpha_2x128 (src_lo, src_hi, + &alpha_lo, &alpha_hi); + over_2x128 (&src_lo, &src_hi, &alpha_lo, &alpha_hi, + &dst_lo, &dst_hi); + + save_128_aligned ( + (__m128i *)pd, + pack_2x128_128 (dst_lo, dst_hi)); + } + } + + ps += 4; + pd += 4; + w -= 4; + } + while (w) + { + d = *pd; + s = *ps; + + if (s) + *pd = core_combine_over_u_pixel_sse2 (s, d); + pd++; + ps++; + + w--; + } +} + +static force_inline void +sse2_combine_over_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * pd, + const uint32_t * ps, + const uint32_t * pm, + int w) +{ + if (pm) + core_combine_over_u_sse2_mask (pd, ps, pm, w); + else + core_combine_over_u_sse2_no_mask (pd, ps, w); +} + +static void +sse2_combine_over_reverse_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * pd, + const uint32_t * ps, + const uint32_t * pm, + int w) +{ + uint32_t s, d; + + __m128i xmm_dst_lo, xmm_dst_hi; + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_alpha_lo, xmm_alpha_hi; + + /* Align dst on a 16-byte boundary */ + while (w && + ((uintptr_t)pd & 15)) + { + d = *pd; + s = combine1 (ps, pm); + + *pd++ = core_combine_over_u_pixel_sse2 (d, s); + w--; + ps++; + if (pm) + pm++; + } + + while (w >= 4) + { + /* I'm loading unaligned because I'm not sure + * about the address alignment. + */ + xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm); + xmm_dst_hi = load_128_aligned ((__m128i*) pd); + + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + + expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, + &xmm_alpha_lo, &xmm_alpha_hi); + + over_2x128 (&xmm_dst_lo, &xmm_dst_hi, + &xmm_alpha_lo, &xmm_alpha_hi, + &xmm_src_lo, &xmm_src_hi); + + /* rebuid the 4 pixel data and save*/ + save_128_aligned ((__m128i*)pd, + pack_2x128_128 (xmm_src_lo, xmm_src_hi)); + + w -= 4; + ps += 4; + pd += 4; + + if (pm) + pm += 4; + } + + while (w) + { + d = *pd; + s = combine1 (ps, pm); + + *pd++ = core_combine_over_u_pixel_sse2 (d, s); + ps++; + w--; + if (pm) + pm++; + } +} + +static force_inline uint32_t +core_combine_in_u_pixel_sse2 (uint32_t src, uint32_t dst) +{ + uint32_t maska = src >> 24; + + if (maska == 0) + { + return 0; + } + else if (maska != 0xff) + { + return pack_1x128_32 ( + pix_multiply_1x128 (unpack_32_1x128 (dst), + expand_alpha_1x128 (unpack_32_1x128 (src)))); + } + + return dst; +} + +static void +sse2_combine_in_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * pd, + const uint32_t * ps, + const uint32_t * pm, + int w) +{ + uint32_t s, d; + + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + + while (w && ((uintptr_t)pd & 15)) + { + s = combine1 (ps, pm); + d = *pd; + + *pd++ = core_combine_in_u_pixel_sse2 (d, s); + w--; + ps++; + if (pm) + pm++; + } + + while (w >= 4) + { + xmm_dst_hi = load_128_aligned ((__m128i*) pd); + xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*) pm); + + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_dst_lo, &xmm_dst_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ((__m128i*)pd, + pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + ps += 4; + pd += 4; + w -= 4; + if (pm) + pm += 4; + } + + while (w) + { + s = combine1 (ps, pm); + d = *pd; + + *pd++ = core_combine_in_u_pixel_sse2 (d, s); + w--; + ps++; + if (pm) + pm++; + } +} + +static void +sse2_combine_in_reverse_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * pd, + const uint32_t * ps, + const uint32_t * pm, + int w) +{ + uint32_t s, d; + + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + + while (w && ((uintptr_t)pd & 15)) + { + s = combine1 (ps, pm); + d = *pd; + + *pd++ = core_combine_in_u_pixel_sse2 (s, d); + ps++; + w--; + if (pm) + pm++; + } + + while (w >= 4) + { + xmm_dst_hi = load_128_aligned ((__m128i*) pd); + xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*)pm); + + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi, + &xmm_src_lo, &xmm_src_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + ps += 4; + pd += 4; + w -= 4; + if (pm) + pm += 4; + } + + while (w) + { + s = combine1 (ps, pm); + d = *pd; + + *pd++ = core_combine_in_u_pixel_sse2 (s, d); + w--; + ps++; + if (pm) + pm++; + } +} + +static void +sse2_combine_out_reverse_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * pd, + const uint32_t * ps, + const uint32_t * pm, + int w) +{ + while (w && ((uintptr_t)pd & 15)) + { + uint32_t s = combine1 (ps, pm); + uint32_t d = *pd; + + *pd++ = pack_1x128_32 ( + pix_multiply_1x128 ( + unpack_32_1x128 (d), negate_1x128 ( + expand_alpha_1x128 (unpack_32_1x128 (s))))); + + if (pm) + pm++; + ps++; + w--; + } + + while (w >= 4) + { + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + + xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm); + xmm_dst_hi = load_128_aligned ((__m128i*) pd); + + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + negate_2x128 (xmm_src_lo, xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + + pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi, + &xmm_src_lo, &xmm_src_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + ps += 4; + pd += 4; + if (pm) + pm += 4; + + w -= 4; + } + + while (w) + { + uint32_t s = combine1 (ps, pm); + uint32_t d = *pd; + + *pd++ = pack_1x128_32 ( + pix_multiply_1x128 ( + unpack_32_1x128 (d), negate_1x128 ( + expand_alpha_1x128 (unpack_32_1x128 (s))))); + ps++; + if (pm) + pm++; + w--; + } +} + +static void +sse2_combine_out_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * pd, + const uint32_t * ps, + const uint32_t * pm, + int w) +{ + while (w && ((uintptr_t)pd & 15)) + { + uint32_t s = combine1 (ps, pm); + uint32_t d = *pd; + + *pd++ = pack_1x128_32 ( + pix_multiply_1x128 ( + unpack_32_1x128 (s), negate_1x128 ( + expand_alpha_1x128 (unpack_32_1x128 (d))))); + w--; + ps++; + if (pm) + pm++; + } + + while (w >= 4) + { + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + + xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*)pm); + xmm_dst_hi = load_128_aligned ((__m128i*) pd); + + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + + expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + negate_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + + pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_dst_lo, &xmm_dst_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + ps += 4; + pd += 4; + w -= 4; + if (pm) + pm += 4; + } + + while (w) + { + uint32_t s = combine1 (ps, pm); + uint32_t d = *pd; + + *pd++ = pack_1x128_32 ( + pix_multiply_1x128 ( + unpack_32_1x128 (s), negate_1x128 ( + expand_alpha_1x128 (unpack_32_1x128 (d))))); + w--; + ps++; + if (pm) + pm++; + } +} + +static force_inline uint32_t +core_combine_atop_u_pixel_sse2 (uint32_t src, + uint32_t dst) +{ + __m128i s = unpack_32_1x128 (src); + __m128i d = unpack_32_1x128 (dst); + + __m128i sa = negate_1x128 (expand_alpha_1x128 (s)); + __m128i da = expand_alpha_1x128 (d); + + return pack_1x128_32 (pix_add_multiply_1x128 (&s, &da, &d, &sa)); +} + +static void +sse2_combine_atop_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * pd, + const uint32_t * ps, + const uint32_t * pm, + int w) +{ + uint32_t s, d; + + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + __m128i xmm_alpha_src_lo, xmm_alpha_src_hi; + __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi; + + while (w && ((uintptr_t)pd & 15)) + { + s = combine1 (ps, pm); + d = *pd; + + *pd++ = core_combine_atop_u_pixel_sse2 (s, d); + w--; + ps++; + if (pm) + pm++; + } + + while (w >= 4) + { + xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm); + xmm_dst_hi = load_128_aligned ((__m128i*) pd); + + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_alpha_src_lo, &xmm_alpha_src_hi); + expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, + &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); + + negate_2x128 (xmm_alpha_src_lo, xmm_alpha_src_hi, + &xmm_alpha_src_lo, &xmm_alpha_src_hi); + + pix_add_multiply_2x128 ( + &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi, + &xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + ps += 4; + pd += 4; + w -= 4; + if (pm) + pm += 4; + } + + while (w) + { + s = combine1 (ps, pm); + d = *pd; + + *pd++ = core_combine_atop_u_pixel_sse2 (s, d); + w--; + ps++; + if (pm) + pm++; + } +} + +static force_inline uint32_t +core_combine_reverse_atop_u_pixel_sse2 (uint32_t src, + uint32_t dst) +{ + __m128i s = unpack_32_1x128 (src); + __m128i d = unpack_32_1x128 (dst); + + __m128i sa = expand_alpha_1x128 (s); + __m128i da = negate_1x128 (expand_alpha_1x128 (d)); + + return pack_1x128_32 (pix_add_multiply_1x128 (&s, &da, &d, &sa)); +} + +static void +sse2_combine_atop_reverse_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * pd, + const uint32_t * ps, + const uint32_t * pm, + int w) +{ + uint32_t s, d; + + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + __m128i xmm_alpha_src_lo, xmm_alpha_src_hi; + __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi; + + while (w && ((uintptr_t)pd & 15)) + { + s = combine1 (ps, pm); + d = *pd; + + *pd++ = core_combine_reverse_atop_u_pixel_sse2 (s, d); + ps++; + w--; + if (pm) + pm++; + } + + while (w >= 4) + { + xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm); + xmm_dst_hi = load_128_aligned ((__m128i*) pd); + + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_alpha_src_lo, &xmm_alpha_src_hi); + expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, + &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); + + negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi, + &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); + + pix_add_multiply_2x128 ( + &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi, + &xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + ps += 4; + pd += 4; + w -= 4; + if (pm) + pm += 4; + } + + while (w) + { + s = combine1 (ps, pm); + d = *pd; + + *pd++ = core_combine_reverse_atop_u_pixel_sse2 (s, d); + ps++; + w--; + if (pm) + pm++; + } +} + +static force_inline uint32_t +core_combine_xor_u_pixel_sse2 (uint32_t src, + uint32_t dst) +{ + __m128i s = unpack_32_1x128 (src); + __m128i d = unpack_32_1x128 (dst); + + __m128i neg_d = negate_1x128 (expand_alpha_1x128 (d)); + __m128i neg_s = negate_1x128 (expand_alpha_1x128 (s)); + + return pack_1x128_32 (pix_add_multiply_1x128 (&s, &neg_d, &d, &neg_s)); +} + +static void +sse2_combine_xor_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dst, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + int w = width; + uint32_t s, d; + uint32_t* pd = dst; + const uint32_t* ps = src; + const uint32_t* pm = mask; + + __m128i xmm_src, xmm_src_lo, xmm_src_hi; + __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; + __m128i xmm_alpha_src_lo, xmm_alpha_src_hi; + __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi; + + while (w && ((uintptr_t)pd & 15)) + { + s = combine1 (ps, pm); + d = *pd; + + *pd++ = core_combine_xor_u_pixel_sse2 (s, d); + w--; + ps++; + if (pm) + pm++; + } + + while (w >= 4) + { + xmm_src = combine4 ((__m128i*) ps, (__m128i*) pm); + xmm_dst = load_128_aligned ((__m128i*) pd); + + unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_alpha_src_lo, &xmm_alpha_src_hi); + expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, + &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); + + negate_2x128 (xmm_alpha_src_lo, xmm_alpha_src_hi, + &xmm_alpha_src_lo, &xmm_alpha_src_hi); + negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi, + &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); + + pix_add_multiply_2x128 ( + &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi, + &xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + ps += 4; + pd += 4; + w -= 4; + if (pm) + pm += 4; + } + + while (w) + { + s = combine1 (ps, pm); + d = *pd; + + *pd++ = core_combine_xor_u_pixel_sse2 (s, d); + w--; + ps++; + if (pm) + pm++; + } +} + +static force_inline void +sse2_combine_add_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dst, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + int w = width; + uint32_t s, d; + uint32_t* pd = dst; + const uint32_t* ps = src; + const uint32_t* pm = mask; + + while (w && (uintptr_t)pd & 15) + { + s = combine1 (ps, pm); + d = *pd; + + ps++; + if (pm) + pm++; + *pd++ = _mm_cvtsi128_si32 ( + _mm_adds_epu8 (_mm_cvtsi32_si128 (s), _mm_cvtsi32_si128 (d))); + w--; + } + + while (w >= 4) + { + __m128i s; + + s = combine4 ((__m128i*)ps, (__m128i*)pm); + + save_128_aligned ( + (__m128i*)pd, _mm_adds_epu8 (s, load_128_aligned ((__m128i*)pd))); + + pd += 4; + ps += 4; + if (pm) + pm += 4; + w -= 4; + } + + while (w--) + { + s = combine1 (ps, pm); + d = *pd; + + ps++; + *pd++ = _mm_cvtsi128_si32 ( + _mm_adds_epu8 (_mm_cvtsi32_si128 (s), _mm_cvtsi32_si128 (d))); + if (pm) + pm++; + } +} + +static force_inline uint32_t +core_combine_saturate_u_pixel_sse2 (uint32_t src, + uint32_t dst) +{ + __m128i ms = unpack_32_1x128 (src); + __m128i md = unpack_32_1x128 (dst); + uint32_t sa = src >> 24; + uint32_t da = ~dst >> 24; + + if (sa > da) + { + ms = pix_multiply_1x128 ( + ms, expand_alpha_1x128 (unpack_32_1x128 (DIV_UN8 (da, sa) << 24))); + } + + return pack_1x128_32 (_mm_adds_epu16 (md, ms)); +} + +static void +sse2_combine_saturate_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * pd, + const uint32_t * ps, + const uint32_t * pm, + int w) +{ + uint32_t s, d; + + uint32_t pack_cmp; + __m128i xmm_src, xmm_dst; + + while (w && (uintptr_t)pd & 15) + { + s = combine1 (ps, pm); + d = *pd; + + *pd++ = core_combine_saturate_u_pixel_sse2 (s, d); + w--; + ps++; + if (pm) + pm++; + } + + while (w >= 4) + { + xmm_dst = load_128_aligned ((__m128i*)pd); + xmm_src = combine4 ((__m128i*)ps, (__m128i*)pm); + + pack_cmp = _mm_movemask_epi8 ( + _mm_cmpgt_epi32 ( + _mm_srli_epi32 (xmm_src, 24), + _mm_srli_epi32 (_mm_xor_si128 (xmm_dst, mask_ff000000), 24))); + + /* if some alpha src is grater than respective ~alpha dst */ + if (pack_cmp) + { + s = combine1 (ps++, pm); + d = *pd; + *pd++ = core_combine_saturate_u_pixel_sse2 (s, d); + if (pm) + pm++; + + s = combine1 (ps++, pm); + d = *pd; + *pd++ = core_combine_saturate_u_pixel_sse2 (s, d); + if (pm) + pm++; + + s = combine1 (ps++, pm); + d = *pd; + *pd++ = core_combine_saturate_u_pixel_sse2 (s, d); + if (pm) + pm++; + + s = combine1 (ps++, pm); + d = *pd; + *pd++ = core_combine_saturate_u_pixel_sse2 (s, d); + if (pm) + pm++; + } + else + { + save_128_aligned ((__m128i*)pd, _mm_adds_epu8 (xmm_dst, xmm_src)); + + pd += 4; + ps += 4; + if (pm) + pm += 4; + } + + w -= 4; + } + + while (w--) + { + s = combine1 (ps, pm); + d = *pd; + + *pd++ = core_combine_saturate_u_pixel_sse2 (s, d); + ps++; + if (pm) + pm++; + } +} + +static void +sse2_combine_src_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * pd, + const uint32_t * ps, + const uint32_t * pm, + int w) +{ + uint32_t s, m; + + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_mask_lo, xmm_mask_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + + while (w && (uintptr_t)pd & 15) + { + s = *ps++; + m = *pm++; + *pd++ = pack_1x128_32 ( + pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (m))); + w--; + } + + while (w >= 4) + { + xmm_src_hi = load_128_unaligned ((__m128i*)ps); + xmm_mask_hi = load_128_unaligned ((__m128i*)pm); + + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); + + pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + ps += 4; + pd += 4; + pm += 4; + w -= 4; + } + + while (w) + { + s = *ps++; + m = *pm++; + *pd++ = pack_1x128_32 ( + pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (m))); + w--; + } +} + +static force_inline uint32_t +core_combine_over_ca_pixel_sse2 (uint32_t src, + uint32_t mask, + uint32_t dst) +{ + __m128i s = unpack_32_1x128 (src); + __m128i expAlpha = expand_alpha_1x128 (s); + __m128i unpk_mask = unpack_32_1x128 (mask); + __m128i unpk_dst = unpack_32_1x128 (dst); + + return pack_1x128_32 (in_over_1x128 (&s, &expAlpha, &unpk_mask, &unpk_dst)); +} + +static void +sse2_combine_over_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * pd, + const uint32_t * ps, + const uint32_t * pm, + int w) +{ + uint32_t s, m, d; + + __m128i xmm_alpha_lo, xmm_alpha_hi; + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + __m128i xmm_mask_lo, xmm_mask_hi; + + while (w && (uintptr_t)pd & 15) + { + s = *ps++; + m = *pm++; + d = *pd; + + *pd++ = core_combine_over_ca_pixel_sse2 (s, m, d); + w--; + } + + while (w >= 4) + { + xmm_dst_hi = load_128_aligned ((__m128i*)pd); + xmm_src_hi = load_128_unaligned ((__m128i*)ps); + xmm_mask_hi = load_128_unaligned ((__m128i*)pm); + + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); + + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_alpha_lo, &xmm_alpha_hi); + + in_over_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_alpha_lo, &xmm_alpha_hi, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + ps += 4; + pd += 4; + pm += 4; + w -= 4; + } + + while (w) + { + s = *ps++; + m = *pm++; + d = *pd; + + *pd++ = core_combine_over_ca_pixel_sse2 (s, m, d); + w--; + } +} + +static force_inline uint32_t +core_combine_over_reverse_ca_pixel_sse2 (uint32_t src, + uint32_t mask, + uint32_t dst) +{ + __m128i d = unpack_32_1x128 (dst); + + return pack_1x128_32 ( + over_1x128 (d, expand_alpha_1x128 (d), + pix_multiply_1x128 (unpack_32_1x128 (src), + unpack_32_1x128 (mask)))); +} + +static void +sse2_combine_over_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * pd, + const uint32_t * ps, + const uint32_t * pm, + int w) +{ + uint32_t s, m, d; + + __m128i xmm_alpha_lo, xmm_alpha_hi; + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + __m128i xmm_mask_lo, xmm_mask_hi; + + while (w && (uintptr_t)pd & 15) + { + s = *ps++; + m = *pm++; + d = *pd; + + *pd++ = core_combine_over_reverse_ca_pixel_sse2 (s, m, d); + w--; + } + + while (w >= 4) + { + xmm_dst_hi = load_128_aligned ((__m128i*)pd); + xmm_src_hi = load_128_unaligned ((__m128i*)ps); + xmm_mask_hi = load_128_unaligned ((__m128i*)pm); + + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); + + expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, + &xmm_alpha_lo, &xmm_alpha_hi); + pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_mask_lo, &xmm_mask_hi); + + over_2x128 (&xmm_dst_lo, &xmm_dst_hi, + &xmm_alpha_lo, &xmm_alpha_hi, + &xmm_mask_lo, &xmm_mask_hi); + + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 (xmm_mask_lo, xmm_mask_hi)); + + ps += 4; + pd += 4; + pm += 4; + w -= 4; + } + + while (w) + { + s = *ps++; + m = *pm++; + d = *pd; + + *pd++ = core_combine_over_reverse_ca_pixel_sse2 (s, m, d); + w--; + } +} + +static void +sse2_combine_in_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * pd, + const uint32_t * ps, + const uint32_t * pm, + int w) +{ + uint32_t s, m, d; + + __m128i xmm_alpha_lo, xmm_alpha_hi; + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + __m128i xmm_mask_lo, xmm_mask_hi; + + while (w && (uintptr_t)pd & 15) + { + s = *ps++; + m = *pm++; + d = *pd; + + *pd++ = pack_1x128_32 ( + pix_multiply_1x128 ( + pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (m)), + expand_alpha_1x128 (unpack_32_1x128 (d)))); + + w--; + } + + while (w >= 4) + { + xmm_dst_hi = load_128_aligned ((__m128i*)pd); + xmm_src_hi = load_128_unaligned ((__m128i*)ps); + xmm_mask_hi = load_128_unaligned ((__m128i*)pm); + + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); + + expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, + &xmm_alpha_lo, &xmm_alpha_hi); + + pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_dst_lo, &xmm_dst_hi); + + pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi, + &xmm_alpha_lo, &xmm_alpha_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + ps += 4; + pd += 4; + pm += 4; + w -= 4; + } + + while (w) + { + s = *ps++; + m = *pm++; + d = *pd; + + *pd++ = pack_1x128_32 ( + pix_multiply_1x128 ( + pix_multiply_1x128 ( + unpack_32_1x128 (s), unpack_32_1x128 (m)), + expand_alpha_1x128 (unpack_32_1x128 (d)))); + + w--; + } +} + +static void +sse2_combine_in_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * pd, + const uint32_t * ps, + const uint32_t * pm, + int w) +{ + uint32_t s, m, d; + + __m128i xmm_alpha_lo, xmm_alpha_hi; + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + __m128i xmm_mask_lo, xmm_mask_hi; + + while (w && (uintptr_t)pd & 15) + { + s = *ps++; + m = *pm++; + d = *pd; + + *pd++ = pack_1x128_32 ( + pix_multiply_1x128 ( + unpack_32_1x128 (d), + pix_multiply_1x128 (unpack_32_1x128 (m), + expand_alpha_1x128 (unpack_32_1x128 (s))))); + w--; + } + + while (w >= 4) + { + xmm_dst_hi = load_128_aligned ((__m128i*)pd); + xmm_src_hi = load_128_unaligned ((__m128i*)ps); + xmm_mask_hi = load_128_unaligned ((__m128i*)pm); + + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); + + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_alpha_lo, &xmm_alpha_hi); + pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi, + &xmm_alpha_lo, &xmm_alpha_hi, + &xmm_alpha_lo, &xmm_alpha_hi); + + pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi, + &xmm_alpha_lo, &xmm_alpha_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + ps += 4; + pd += 4; + pm += 4; + w -= 4; + } + + while (w) + { + s = *ps++; + m = *pm++; + d = *pd; + + *pd++ = pack_1x128_32 ( + pix_multiply_1x128 ( + unpack_32_1x128 (d), + pix_multiply_1x128 (unpack_32_1x128 (m), + expand_alpha_1x128 (unpack_32_1x128 (s))))); + w--; + } +} + +static void +sse2_combine_out_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * pd, + const uint32_t * ps, + const uint32_t * pm, + int w) +{ + uint32_t s, m, d; + + __m128i xmm_alpha_lo, xmm_alpha_hi; + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + __m128i xmm_mask_lo, xmm_mask_hi; + + while (w && (uintptr_t)pd & 15) + { + s = *ps++; + m = *pm++; + d = *pd; + + *pd++ = pack_1x128_32 ( + pix_multiply_1x128 ( + pix_multiply_1x128 ( + unpack_32_1x128 (s), unpack_32_1x128 (m)), + negate_1x128 (expand_alpha_1x128 (unpack_32_1x128 (d))))); + w--; + } + + while (w >= 4) + { + xmm_dst_hi = load_128_aligned ((__m128i*)pd); + xmm_src_hi = load_128_unaligned ((__m128i*)ps); + xmm_mask_hi = load_128_unaligned ((__m128i*)pm); + + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); + + expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, + &xmm_alpha_lo, &xmm_alpha_hi); + negate_2x128 (xmm_alpha_lo, xmm_alpha_hi, + &xmm_alpha_lo, &xmm_alpha_hi); + + pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_dst_lo, &xmm_dst_hi); + pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi, + &xmm_alpha_lo, &xmm_alpha_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + ps += 4; + pd += 4; + pm += 4; + w -= 4; + } + + while (w) + { + s = *ps++; + m = *pm++; + d = *pd; + + *pd++ = pack_1x128_32 ( + pix_multiply_1x128 ( + pix_multiply_1x128 ( + unpack_32_1x128 (s), unpack_32_1x128 (m)), + negate_1x128 (expand_alpha_1x128 (unpack_32_1x128 (d))))); + + w--; + } +} + +static void +sse2_combine_out_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * pd, + const uint32_t * ps, + const uint32_t * pm, + int w) +{ + uint32_t s, m, d; + + __m128i xmm_alpha_lo, xmm_alpha_hi; + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + __m128i xmm_mask_lo, xmm_mask_hi; + + while (w && (uintptr_t)pd & 15) + { + s = *ps++; + m = *pm++; + d = *pd; + + *pd++ = pack_1x128_32 ( + pix_multiply_1x128 ( + unpack_32_1x128 (d), + negate_1x128 (pix_multiply_1x128 ( + unpack_32_1x128 (m), + expand_alpha_1x128 (unpack_32_1x128 (s)))))); + w--; + } + + while (w >= 4) + { + xmm_dst_hi = load_128_aligned ((__m128i*)pd); + xmm_src_hi = load_128_unaligned ((__m128i*)ps); + xmm_mask_hi = load_128_unaligned ((__m128i*)pm); + + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); + + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_alpha_lo, &xmm_alpha_hi); + + pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi, + &xmm_alpha_lo, &xmm_alpha_hi, + &xmm_mask_lo, &xmm_mask_hi); + + negate_2x128 (xmm_mask_lo, xmm_mask_hi, + &xmm_mask_lo, &xmm_mask_hi); + + pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + ps += 4; + pd += 4; + pm += 4; + w -= 4; + } + + while (w) + { + s = *ps++; + m = *pm++; + d = *pd; + + *pd++ = pack_1x128_32 ( + pix_multiply_1x128 ( + unpack_32_1x128 (d), + negate_1x128 (pix_multiply_1x128 ( + unpack_32_1x128 (m), + expand_alpha_1x128 (unpack_32_1x128 (s)))))); + w--; + } +} + +static force_inline uint32_t +core_combine_atop_ca_pixel_sse2 (uint32_t src, + uint32_t mask, + uint32_t dst) +{ + __m128i m = unpack_32_1x128 (mask); + __m128i s = unpack_32_1x128 (src); + __m128i d = unpack_32_1x128 (dst); + __m128i sa = expand_alpha_1x128 (s); + __m128i da = expand_alpha_1x128 (d); + + s = pix_multiply_1x128 (s, m); + m = negate_1x128 (pix_multiply_1x128 (m, sa)); + + return pack_1x128_32 (pix_add_multiply_1x128 (&d, &m, &s, &da)); +} + +static void +sse2_combine_atop_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * pd, + const uint32_t * ps, + const uint32_t * pm, + int w) +{ + uint32_t s, m, d; + + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + __m128i xmm_alpha_src_lo, xmm_alpha_src_hi; + __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi; + __m128i xmm_mask_lo, xmm_mask_hi; + + while (w && (uintptr_t)pd & 15) + { + s = *ps++; + m = *pm++; + d = *pd; + + *pd++ = core_combine_atop_ca_pixel_sse2 (s, m, d); + w--; + } + + while (w >= 4) + { + xmm_dst_hi = load_128_aligned ((__m128i*)pd); + xmm_src_hi = load_128_unaligned ((__m128i*)ps); + xmm_mask_hi = load_128_unaligned ((__m128i*)pm); + + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); + + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_alpha_src_lo, &xmm_alpha_src_hi); + expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, + &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); + + pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_src_lo, &xmm_src_hi); + pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi, + &xmm_alpha_src_lo, &xmm_alpha_src_hi, + &xmm_mask_lo, &xmm_mask_hi); + + negate_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); + + pix_add_multiply_2x128 ( + &xmm_dst_lo, &xmm_dst_hi, &xmm_mask_lo, &xmm_mask_hi, + &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + ps += 4; + pd += 4; + pm += 4; + w -= 4; + } + + while (w) + { + s = *ps++; + m = *pm++; + d = *pd; + + *pd++ = core_combine_atop_ca_pixel_sse2 (s, m, d); + w--; + } +} + +static force_inline uint32_t +core_combine_reverse_atop_ca_pixel_sse2 (uint32_t src, + uint32_t mask, + uint32_t dst) +{ + __m128i m = unpack_32_1x128 (mask); + __m128i s = unpack_32_1x128 (src); + __m128i d = unpack_32_1x128 (dst); + + __m128i da = negate_1x128 (expand_alpha_1x128 (d)); + __m128i sa = expand_alpha_1x128 (s); + + s = pix_multiply_1x128 (s, m); + m = pix_multiply_1x128 (m, sa); + + return pack_1x128_32 (pix_add_multiply_1x128 (&d, &m, &s, &da)); +} + +static void +sse2_combine_atop_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * pd, + const uint32_t * ps, + const uint32_t * pm, + int w) +{ + uint32_t s, m, d; + + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + __m128i xmm_alpha_src_lo, xmm_alpha_src_hi; + __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi; + __m128i xmm_mask_lo, xmm_mask_hi; + + while (w && (uintptr_t)pd & 15) + { + s = *ps++; + m = *pm++; + d = *pd; + + *pd++ = core_combine_reverse_atop_ca_pixel_sse2 (s, m, d); + w--; + } + + while (w >= 4) + { + xmm_dst_hi = load_128_aligned ((__m128i*)pd); + xmm_src_hi = load_128_unaligned ((__m128i*)ps); + xmm_mask_hi = load_128_unaligned ((__m128i*)pm); + + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); + + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_alpha_src_lo, &xmm_alpha_src_hi); + expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, + &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); + + pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_src_lo, &xmm_src_hi); + pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi, + &xmm_alpha_src_lo, &xmm_alpha_src_hi, + &xmm_mask_lo, &xmm_mask_hi); + + negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi, + &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); + + pix_add_multiply_2x128 ( + &xmm_dst_lo, &xmm_dst_hi, &xmm_mask_lo, &xmm_mask_hi, + &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + ps += 4; + pd += 4; + pm += 4; + w -= 4; + } + + while (w) + { + s = *ps++; + m = *pm++; + d = *pd; + + *pd++ = core_combine_reverse_atop_ca_pixel_sse2 (s, m, d); + w--; + } +} + +static force_inline uint32_t +core_combine_xor_ca_pixel_sse2 (uint32_t src, + uint32_t mask, + uint32_t dst) +{ + __m128i a = unpack_32_1x128 (mask); + __m128i s = unpack_32_1x128 (src); + __m128i d = unpack_32_1x128 (dst); + + __m128i alpha_dst = negate_1x128 (pix_multiply_1x128 ( + a, expand_alpha_1x128 (s))); + __m128i dest = pix_multiply_1x128 (s, a); + __m128i alpha_src = negate_1x128 (expand_alpha_1x128 (d)); + + return pack_1x128_32 (pix_add_multiply_1x128 (&d, + &alpha_dst, + &dest, + &alpha_src)); +} + +static void +sse2_combine_xor_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * pd, + const uint32_t * ps, + const uint32_t * pm, + int w) +{ + uint32_t s, m, d; + + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + __m128i xmm_alpha_src_lo, xmm_alpha_src_hi; + __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi; + __m128i xmm_mask_lo, xmm_mask_hi; + + while (w && (uintptr_t)pd & 15) + { + s = *ps++; + m = *pm++; + d = *pd; + + *pd++ = core_combine_xor_ca_pixel_sse2 (s, m, d); + w--; + } + + while (w >= 4) + { + xmm_dst_hi = load_128_aligned ((__m128i*)pd); + xmm_src_hi = load_128_unaligned ((__m128i*)ps); + xmm_mask_hi = load_128_unaligned ((__m128i*)pm); + + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); + + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_alpha_src_lo, &xmm_alpha_src_hi); + expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, + &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); + + pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_src_lo, &xmm_src_hi); + pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi, + &xmm_alpha_src_lo, &xmm_alpha_src_hi, + &xmm_mask_lo, &xmm_mask_hi); + + negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi, + &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); + negate_2x128 (xmm_mask_lo, xmm_mask_hi, + &xmm_mask_lo, &xmm_mask_hi); + + pix_add_multiply_2x128 ( + &xmm_dst_lo, &xmm_dst_hi, &xmm_mask_lo, &xmm_mask_hi, + &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + ps += 4; + pd += 4; + pm += 4; + w -= 4; + } + + while (w) + { + s = *ps++; + m = *pm++; + d = *pd; + + *pd++ = core_combine_xor_ca_pixel_sse2 (s, m, d); + w--; + } +} + +static void +sse2_combine_add_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * pd, + const uint32_t * ps, + const uint32_t * pm, + int w) +{ + uint32_t s, m, d; + + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + __m128i xmm_mask_lo, xmm_mask_hi; + + while (w && (uintptr_t)pd & 15) + { + s = *ps++; + m = *pm++; + d = *pd; + + *pd++ = pack_1x128_32 ( + _mm_adds_epu8 (pix_multiply_1x128 (unpack_32_1x128 (s), + unpack_32_1x128 (m)), + unpack_32_1x128 (d))); + w--; + } + + while (w >= 4) + { + xmm_src_hi = load_128_unaligned ((__m128i*)ps); + xmm_mask_hi = load_128_unaligned ((__m128i*)pm); + xmm_dst_hi = load_128_aligned ((__m128i*)pd); + + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + + pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_src_lo, &xmm_src_hi); + + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 ( + _mm_adds_epu8 (xmm_src_lo, xmm_dst_lo), + _mm_adds_epu8 (xmm_src_hi, xmm_dst_hi))); + + ps += 4; + pd += 4; + pm += 4; + w -= 4; + } + + while (w) + { + s = *ps++; + m = *pm++; + d = *pd; + + *pd++ = pack_1x128_32 ( + _mm_adds_epu8 (pix_multiply_1x128 (unpack_32_1x128 (s), + unpack_32_1x128 (m)), + unpack_32_1x128 (d))); + w--; + } +} + +static force_inline __m128i +create_mask_16_128 (uint16_t mask) +{ + return _mm_set1_epi16 (mask); +} + +/* Work around a code generation bug in Sun Studio 12. */ +#if defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590) +# define create_mask_2x32_128(mask0, mask1) \ + (_mm_set_epi32 ((mask0), (mask1), (mask0), (mask1))) +#else +static force_inline __m128i +create_mask_2x32_128 (uint32_t mask0, + uint32_t mask1) +{ + return _mm_set_epi32 (mask0, mask1, mask0, mask1); +} +#endif + +static void +sse2_composite_over_n_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src; + uint32_t *dst_line, *dst, d; + int32_t w; + int dst_stride; + __m128i xmm_src, xmm_alpha; + __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + + xmm_src = expand_pixel_32_1x128 (src); + xmm_alpha = expand_alpha_1x128 (xmm_src); + + while (height--) + { + dst = dst_line; + + dst_line += dst_stride; + w = width; + + while (w && (uintptr_t)dst & 15) + { + d = *dst; + *dst++ = pack_1x128_32 (over_1x128 (xmm_src, + xmm_alpha, + unpack_32_1x128 (d))); + w--; + } + + while (w >= 4) + { + xmm_dst = load_128_aligned ((__m128i*)dst); + + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + + over_2x128 (&xmm_src, &xmm_src, + &xmm_alpha, &xmm_alpha, + &xmm_dst_lo, &xmm_dst_hi); + + /* rebuid the 4 pixel data and save*/ + save_128_aligned ( + (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + w -= 4; + dst += 4; + } + + while (w) + { + d = *dst; + *dst++ = pack_1x128_32 (over_1x128 (xmm_src, + xmm_alpha, + unpack_32_1x128 (d))); + w--; + } + + } +} + +static void +sse2_composite_over_n_0565 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src; + uint16_t *dst_line, *dst, d; + int32_t w; + int dst_stride; + __m128i xmm_src, xmm_alpha; + __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3; + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + + xmm_src = expand_pixel_32_1x128 (src); + xmm_alpha = expand_alpha_1x128 (xmm_src); + + while (height--) + { + dst = dst_line; + + dst_line += dst_stride; + w = width; + + while (w && (uintptr_t)dst & 15) + { + d = *dst; + + *dst++ = pack_565_32_16 ( + pack_1x128_32 (over_1x128 (xmm_src, + xmm_alpha, + expand565_16_1x128 (d)))); + w--; + } + + while (w >= 8) + { + xmm_dst = load_128_aligned ((__m128i*)dst); + + unpack_565_128_4x128 (xmm_dst, + &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3); + + over_2x128 (&xmm_src, &xmm_src, + &xmm_alpha, &xmm_alpha, + &xmm_dst0, &xmm_dst1); + over_2x128 (&xmm_src, &xmm_src, + &xmm_alpha, &xmm_alpha, + &xmm_dst2, &xmm_dst3); + + xmm_dst = pack_565_4x128_128 ( + &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3); + + save_128_aligned ((__m128i*)dst, xmm_dst); + + dst += 8; + w -= 8; + } + + while (w--) + { + d = *dst; + *dst++ = pack_565_32_16 ( + pack_1x128_32 (over_1x128 (xmm_src, xmm_alpha, + expand565_16_1x128 (d)))); + } + } + +} + +static void +sse2_composite_add_n_8888_8888_ca (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src; + uint32_t *dst_line, d; + uint32_t *mask_line, m; + uint32_t pack_cmp; + int dst_stride, mask_stride; + + __m128i xmm_src; + __m128i xmm_dst; + __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; + + __m128i mmx_src, mmx_mask, mmx_dest; + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); + + xmm_src = _mm_unpacklo_epi8 ( + create_mask_2x32_128 (src, src), _mm_setzero_si128 ()); + mmx_src = xmm_src; + + while (height--) + { + int w = width; + const uint32_t *pm = (uint32_t *)mask_line; + uint32_t *pd = (uint32_t *)dst_line; + + dst_line += dst_stride; + mask_line += mask_stride; + + while (w && (uintptr_t)pd & 15) + { + m = *pm++; + + if (m) + { + d = *pd; + + mmx_mask = unpack_32_1x128 (m); + mmx_dest = unpack_32_1x128 (d); + + *pd = pack_1x128_32 ( + _mm_adds_epu8 (pix_multiply_1x128 (mmx_mask, mmx_src), + mmx_dest)); + } + + pd++; + w--; + } + + while (w >= 4) + { + xmm_mask = load_128_unaligned ((__m128i*)pm); + + pack_cmp = + _mm_movemask_epi8 ( + _mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ())); + + /* if all bits in mask are zero, pack_cmp are equal to 0xffff */ + if (pack_cmp != 0xffff) + { + xmm_dst = load_128_aligned ((__m128i*)pd); + + unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); + + pix_multiply_2x128 (&xmm_src, &xmm_src, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_mask_lo, &xmm_mask_hi); + xmm_mask_hi = pack_2x128_128 (xmm_mask_lo, xmm_mask_hi); + + save_128_aligned ( + (__m128i*)pd, _mm_adds_epu8 (xmm_mask_hi, xmm_dst)); + } + + pd += 4; + pm += 4; + w -= 4; + } + + while (w) + { + m = *pm++; + + if (m) + { + d = *pd; + + mmx_mask = unpack_32_1x128 (m); + mmx_dest = unpack_32_1x128 (d); + + *pd = pack_1x128_32 ( + _mm_adds_epu8 (pix_multiply_1x128 (mmx_mask, mmx_src), + mmx_dest)); + } + + pd++; + w--; + } + } + +} + +static void +sse2_composite_over_n_8888_8888_ca (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src; + uint32_t *dst_line, d; + uint32_t *mask_line, m; + uint32_t pack_cmp; + int dst_stride, mask_stride; + + __m128i xmm_src, xmm_alpha; + __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; + __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; + + __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest; + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); + + xmm_src = _mm_unpacklo_epi8 ( + create_mask_2x32_128 (src, src), _mm_setzero_si128 ()); + xmm_alpha = expand_alpha_1x128 (xmm_src); + mmx_src = xmm_src; + mmx_alpha = xmm_alpha; + + while (height--) + { + int w = width; + const uint32_t *pm = (uint32_t *)mask_line; + uint32_t *pd = (uint32_t *)dst_line; + + dst_line += dst_stride; + mask_line += mask_stride; + + while (w && (uintptr_t)pd & 15) + { + m = *pm++; + + if (m) + { + d = *pd; + mmx_mask = unpack_32_1x128 (m); + mmx_dest = unpack_32_1x128 (d); + + *pd = pack_1x128_32 (in_over_1x128 (&mmx_src, + &mmx_alpha, + &mmx_mask, + &mmx_dest)); + } + + pd++; + w--; + } + + while (w >= 4) + { + xmm_mask = load_128_unaligned ((__m128i*)pm); + + pack_cmp = + _mm_movemask_epi8 ( + _mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ())); + + /* if all bits in mask are zero, pack_cmp are equal to 0xffff */ + if (pack_cmp != 0xffff) + { + xmm_dst = load_128_aligned ((__m128i*)pd); + + unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + + in_over_2x128 (&xmm_src, &xmm_src, + &xmm_alpha, &xmm_alpha, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + } + + pd += 4; + pm += 4; + w -= 4; + } + + while (w) + { + m = *pm++; + + if (m) + { + d = *pd; + mmx_mask = unpack_32_1x128 (m); + mmx_dest = unpack_32_1x128 (d); + + *pd = pack_1x128_32 ( + in_over_1x128 (&mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest)); + } + + pd++; + w--; + } + } + +} + +static void +sse2_composite_over_8888_n_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; + uint32_t mask; + int32_t w; + int dst_stride, src_stride; + + __m128i xmm_mask; + __m128i xmm_src, xmm_src_lo, xmm_src_hi; + __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; + __m128i xmm_alpha_lo, xmm_alpha_hi; + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + + mask = _pixman_image_get_solid (imp, mask_image, PIXMAN_a8r8g8b8); + + xmm_mask = create_mask_16_128 (mask >> 24); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w && (uintptr_t)dst & 15) + { + uint32_t s = *src++; + + if (s) + { + uint32_t d = *dst; + + __m128i ms = unpack_32_1x128 (s); + __m128i alpha = expand_alpha_1x128 (ms); + __m128i dest = xmm_mask; + __m128i alpha_dst = unpack_32_1x128 (d); + + *dst = pack_1x128_32 ( + in_over_1x128 (&ms, &alpha, &dest, &alpha_dst)); + } + dst++; + w--; + } + + while (w >= 4) + { + xmm_src = load_128_unaligned ((__m128i*)src); + + if (!is_zero (xmm_src)) + { + xmm_dst = load_128_aligned ((__m128i*)dst); + + unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_alpha_lo, &xmm_alpha_hi); + + in_over_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_alpha_lo, &xmm_alpha_hi, + &xmm_mask, &xmm_mask, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + } + + dst += 4; + src += 4; + w -= 4; + } + + while (w) + { + uint32_t s = *src++; + + if (s) + { + uint32_t d = *dst; + + __m128i ms = unpack_32_1x128 (s); + __m128i alpha = expand_alpha_1x128 (ms); + __m128i mask = xmm_mask; + __m128i dest = unpack_32_1x128 (d); + + *dst = pack_1x128_32 ( + in_over_1x128 (&ms, &alpha, &mask, &dest)); + } + + dst++; + w--; + } + } + +} + +static void +sse2_composite_src_x888_0565 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint16_t *dst_line, *dst; + uint32_t *src_line, *src, s; + int dst_stride, src_stride; + int32_t w; + + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w && (uintptr_t)dst & 15) + { + s = *src++; + *dst = convert_8888_to_0565 (s); + dst++; + w--; + } + + while (w >= 8) + { + __m128i xmm_src0 = load_128_unaligned ((__m128i *)src + 0); + __m128i xmm_src1 = load_128_unaligned ((__m128i *)src + 1); + + save_128_aligned ((__m128i*)dst, pack_565_2packedx128_128 (xmm_src0, xmm_src1)); + + w -= 8; + src += 8; + dst += 8; + } + + while (w) + { + s = *src++; + *dst = convert_8888_to_0565 (s); + dst++; + w--; + } + } +} + +static void +sse2_composite_src_x888_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; + int32_t w; + int dst_stride, src_stride; + + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w && (uintptr_t)dst & 15) + { + *dst++ = *src++ | 0xff000000; + w--; + } + + while (w >= 16) + { + __m128i xmm_src1, xmm_src2, xmm_src3, xmm_src4; + + xmm_src1 = load_128_unaligned ((__m128i*)src + 0); + xmm_src2 = load_128_unaligned ((__m128i*)src + 1); + xmm_src3 = load_128_unaligned ((__m128i*)src + 2); + xmm_src4 = load_128_unaligned ((__m128i*)src + 3); + + save_128_aligned ((__m128i*)dst + 0, _mm_or_si128 (xmm_src1, mask_ff000000)); + save_128_aligned ((__m128i*)dst + 1, _mm_or_si128 (xmm_src2, mask_ff000000)); + save_128_aligned ((__m128i*)dst + 2, _mm_or_si128 (xmm_src3, mask_ff000000)); + save_128_aligned ((__m128i*)dst + 3, _mm_or_si128 (xmm_src4, mask_ff000000)); + + dst += 16; + src += 16; + w -= 16; + } + + while (w) + { + *dst++ = *src++ | 0xff000000; + w--; + } + } + +} + +static void +sse2_composite_over_x888_n_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; + uint32_t mask; + int dst_stride, src_stride; + int32_t w; + + __m128i xmm_mask, xmm_alpha; + __m128i xmm_src, xmm_src_lo, xmm_src_hi; + __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + + mask = _pixman_image_get_solid (imp, mask_image, PIXMAN_a8r8g8b8); + + xmm_mask = create_mask_16_128 (mask >> 24); + xmm_alpha = mask_00ff; + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w && (uintptr_t)dst & 15) + { + uint32_t s = (*src++) | 0xff000000; + uint32_t d = *dst; + + __m128i src = unpack_32_1x128 (s); + __m128i alpha = xmm_alpha; + __m128i mask = xmm_mask; + __m128i dest = unpack_32_1x128 (d); + + *dst++ = pack_1x128_32 ( + in_over_1x128 (&src, &alpha, &mask, &dest)); + + w--; + } + + while (w >= 4) + { + xmm_src = _mm_or_si128 ( + load_128_unaligned ((__m128i*)src), mask_ff000000); + xmm_dst = load_128_aligned ((__m128i*)dst); + + unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + + in_over_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_alpha, &xmm_alpha, + &xmm_mask, &xmm_mask, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + dst += 4; + src += 4; + w -= 4; + + } + + while (w) + { + uint32_t s = (*src++) | 0xff000000; + uint32_t d = *dst; + + __m128i src = unpack_32_1x128 (s); + __m128i alpha = xmm_alpha; + __m128i mask = xmm_mask; + __m128i dest = unpack_32_1x128 (d); + + *dst++ = pack_1x128_32 ( + in_over_1x128 (&src, &alpha, &mask, &dest)); + + w--; + } + } + +} + +static void +sse2_composite_over_8888_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + int dst_stride, src_stride; + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + + dst = dst_line; + src = src_line; + + while (height--) + { + sse2_combine_over_u (imp, op, dst, src, NULL, width); + + dst += dst_stride; + src += src_stride; + } +} + +static force_inline uint16_t +composite_over_8888_0565pixel (uint32_t src, uint16_t dst) +{ + __m128i ms; + + ms = unpack_32_1x128 (src); + return pack_565_32_16 ( + pack_1x128_32 ( + over_1x128 ( + ms, expand_alpha_1x128 (ms), expand565_16_1x128 (dst)))); +} + +static void +sse2_composite_over_8888_0565 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint16_t *dst_line, *dst, d; + uint32_t *src_line, *src, s; + int dst_stride, src_stride; + int32_t w; + + __m128i xmm_alpha_lo, xmm_alpha_hi; + __m128i xmm_src, xmm_src_lo, xmm_src_hi; + __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3; + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + + while (height--) + { + dst = dst_line; + src = src_line; + + dst_line += dst_stride; + src_line += src_stride; + w = width; + + /* Align dst on a 16-byte boundary */ + while (w && + ((uintptr_t)dst & 15)) + { + s = *src++; + d = *dst; + + *dst++ = composite_over_8888_0565pixel (s, d); + w--; + } + + /* It's a 8 pixel loop */ + while (w >= 8) + { + /* I'm loading unaligned because I'm not sure + * about the address alignment. + */ + xmm_src = load_128_unaligned ((__m128i*) src); + xmm_dst = load_128_aligned ((__m128i*) dst); + + /* Unpacking */ + unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); + unpack_565_128_4x128 (xmm_dst, + &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3); + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_alpha_lo, &xmm_alpha_hi); + + /* I'm loading next 4 pixels from memory + * before to optimze the memory read. + */ + xmm_src = load_128_unaligned ((__m128i*) (src + 4)); + + over_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_alpha_lo, &xmm_alpha_hi, + &xmm_dst0, &xmm_dst1); + + /* Unpacking */ + unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_alpha_lo, &xmm_alpha_hi); + + over_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_alpha_lo, &xmm_alpha_hi, + &xmm_dst2, &xmm_dst3); + + save_128_aligned ( + (__m128i*)dst, pack_565_4x128_128 ( + &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3)); + + w -= 8; + dst += 8; + src += 8; + } + + while (w--) + { + s = *src++; + d = *dst; + + *dst++ = composite_over_8888_0565pixel (s, d); + } + } + +} + +static void +sse2_composite_over_n_8_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src, srca; + uint32_t *dst_line, *dst; + uint8_t *mask_line, *mask; + int dst_stride, mask_stride; + int32_t w; + uint32_t m, d; + + __m128i xmm_src, xmm_alpha, xmm_def; + __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; + __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; + + __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest; + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + + srca = src >> 24; + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + + xmm_def = create_mask_2x32_128 (src, src); + xmm_src = expand_pixel_32_1x128 (src); + xmm_alpha = expand_alpha_1x128 (xmm_src); + mmx_src = xmm_src; + mmx_alpha = xmm_alpha; + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w && (uintptr_t)dst & 15) + { + uint8_t m = *mask++; + + if (m) + { + d = *dst; + mmx_mask = expand_pixel_8_1x128 (m); + mmx_dest = unpack_32_1x128 (d); + + *dst = pack_1x128_32 (in_over_1x128 (&mmx_src, + &mmx_alpha, + &mmx_mask, + &mmx_dest)); + } + + w--; + dst++; + } + + while (w >= 4) + { + m = *((uint32_t*)mask); + + if (srca == 0xff && m == 0xffffffff) + { + save_128_aligned ((__m128i*)dst, xmm_def); + } + else if (m) + { + xmm_dst = load_128_aligned ((__m128i*) dst); + xmm_mask = unpack_32_1x128 (m); + xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ()); + + /* Unpacking */ + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); + + expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, + &xmm_mask_lo, &xmm_mask_hi); + + in_over_2x128 (&xmm_src, &xmm_src, + &xmm_alpha, &xmm_alpha, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + } + + w -= 4; + dst += 4; + mask += 4; + } + + while (w) + { + uint8_t m = *mask++; + + if (m) + { + d = *dst; + mmx_mask = expand_pixel_8_1x128 (m); + mmx_dest = unpack_32_1x128 (d); + + *dst = pack_1x128_32 (in_over_1x128 (&mmx_src, + &mmx_alpha, + &mmx_mask, + &mmx_dest)); + } + + w--; + dst++; + } + } + +} + +#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__) +__attribute__((__force_align_arg_pointer__)) +#endif +static pixman_bool_t +sse2_fill (pixman_implementation_t *imp, + uint32_t * bits, + int stride, + int bpp, + int x, + int y, + int width, + int height, + uint32_t filler) +{ + uint32_t byte_width; + uint8_t *byte_line; + + __m128i xmm_def; + + if (bpp == 8) + { + uint8_t b; + uint16_t w; + + stride = stride * (int) sizeof (uint32_t) / 1; + byte_line = (uint8_t *)(((uint8_t *)bits) + stride * y + x); + byte_width = width; + stride *= 1; + + b = filler & 0xff; + w = (b << 8) | b; + filler = (w << 16) | w; + } + else if (bpp == 16) + { + stride = stride * (int) sizeof (uint32_t) / 2; + byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x); + byte_width = 2 * width; + stride *= 2; + + filler = (filler & 0xffff) * 0x00010001; + } + else if (bpp == 32) + { + stride = stride * (int) sizeof (uint32_t) / 4; + byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x); + byte_width = 4 * width; + stride *= 4; + } + else + { + return FALSE; + } + + xmm_def = create_mask_2x32_128 (filler, filler); + + while (height--) + { + int w; + uint8_t *d = byte_line; + byte_line += stride; + w = byte_width; + + if (w >= 1 && ((uintptr_t)d & 1)) + { + *(uint8_t *)d = filler; + w -= 1; + d += 1; + } + + while (w >= 2 && ((uintptr_t)d & 3)) + { + *(uint16_t *)d = filler; + w -= 2; + d += 2; + } + + while (w >= 4 && ((uintptr_t)d & 15)) + { + *(uint32_t *)d = filler; + + w -= 4; + d += 4; + } + + while (w >= 128) + { + save_128_aligned ((__m128i*)(d), xmm_def); + save_128_aligned ((__m128i*)(d + 16), xmm_def); + save_128_aligned ((__m128i*)(d + 32), xmm_def); + save_128_aligned ((__m128i*)(d + 48), xmm_def); + save_128_aligned ((__m128i*)(d + 64), xmm_def); + save_128_aligned ((__m128i*)(d + 80), xmm_def); + save_128_aligned ((__m128i*)(d + 96), xmm_def); + save_128_aligned ((__m128i*)(d + 112), xmm_def); + + d += 128; + w -= 128; + } + + if (w >= 64) + { + save_128_aligned ((__m128i*)(d), xmm_def); + save_128_aligned ((__m128i*)(d + 16), xmm_def); + save_128_aligned ((__m128i*)(d + 32), xmm_def); + save_128_aligned ((__m128i*)(d + 48), xmm_def); + + d += 64; + w -= 64; + } + + if (w >= 32) + { + save_128_aligned ((__m128i*)(d), xmm_def); + save_128_aligned ((__m128i*)(d + 16), xmm_def); + + d += 32; + w -= 32; + } + + if (w >= 16) + { + save_128_aligned ((__m128i*)(d), xmm_def); + + d += 16; + w -= 16; + } + + while (w >= 4) + { + *(uint32_t *)d = filler; + + w -= 4; + d += 4; + } + + if (w >= 2) + { + *(uint16_t *)d = filler; + w -= 2; + d += 2; + } + + if (w >= 1) + { + *(uint8_t *)d = filler; + w -= 1; + d += 1; + } + } + + return TRUE; +} + +static void +sse2_composite_src_n_8_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src, srca; + uint32_t *dst_line, *dst; + uint8_t *mask_line, *mask; + int dst_stride, mask_stride; + int32_t w; + uint32_t m; + + __m128i xmm_src, xmm_def; + __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + + srca = src >> 24; + if (src == 0) + { + sse2_fill (imp, dest_image->bits.bits, dest_image->bits.rowstride, + PIXMAN_FORMAT_BPP (dest_image->bits.format), + dest_x, dest_y, width, height, 0); + return; + } + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + + xmm_def = create_mask_2x32_128 (src, src); + xmm_src = expand_pixel_32_1x128 (src); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w && (uintptr_t)dst & 15) + { + uint8_t m = *mask++; + + if (m) + { + *dst = pack_1x128_32 ( + pix_multiply_1x128 (xmm_src, expand_pixel_8_1x128 (m))); + } + else + { + *dst = 0; + } + + w--; + dst++; + } + + while (w >= 4) + { + m = *((uint32_t*)mask); + + if (srca == 0xff && m == 0xffffffff) + { + save_128_aligned ((__m128i*)dst, xmm_def); + } + else if (m) + { + xmm_mask = unpack_32_1x128 (m); + xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ()); + + /* Unpacking */ + unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); + + expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, + &xmm_mask_lo, &xmm_mask_hi); + + pix_multiply_2x128 (&xmm_src, &xmm_src, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_mask_lo, &xmm_mask_hi); + + save_128_aligned ( + (__m128i*)dst, pack_2x128_128 (xmm_mask_lo, xmm_mask_hi)); + } + else + { + save_128_aligned ((__m128i*)dst, _mm_setzero_si128 ()); + } + + w -= 4; + dst += 4; + mask += 4; + } + + while (w) + { + uint8_t m = *mask++; + + if (m) + { + *dst = pack_1x128_32 ( + pix_multiply_1x128 ( + xmm_src, expand_pixel_8_1x128 (m))); + } + else + { + *dst = 0; + } + + w--; + dst++; + } + } + +} + +static void +sse2_composite_over_n_8_0565 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src; + uint16_t *dst_line, *dst, d; + uint8_t *mask_line, *mask; + int dst_stride, mask_stride; + int32_t w; + uint32_t m; + __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest; + + __m128i xmm_src, xmm_alpha; + __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; + __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3; + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + + xmm_src = expand_pixel_32_1x128 (src); + xmm_alpha = expand_alpha_1x128 (xmm_src); + mmx_src = xmm_src; + mmx_alpha = xmm_alpha; + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w && (uintptr_t)dst & 15) + { + m = *mask++; + + if (m) + { + d = *dst; + mmx_mask = expand_alpha_rev_1x128 (unpack_32_1x128 (m)); + mmx_dest = expand565_16_1x128 (d); + + *dst = pack_565_32_16 ( + pack_1x128_32 ( + in_over_1x128 ( + &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest))); + } + + w--; + dst++; + } + + while (w >= 8) + { + xmm_dst = load_128_aligned ((__m128i*) dst); + unpack_565_128_4x128 (xmm_dst, + &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3); + + m = *((uint32_t*)mask); + mask += 4; + + if (m) + { + xmm_mask = unpack_32_1x128 (m); + xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ()); + + /* Unpacking */ + unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); + + expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, + &xmm_mask_lo, &xmm_mask_hi); + + in_over_2x128 (&xmm_src, &xmm_src, + &xmm_alpha, &xmm_alpha, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_dst0, &xmm_dst1); + } + + m = *((uint32_t*)mask); + mask += 4; + + if (m) + { + xmm_mask = unpack_32_1x128 (m); + xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ()); + + /* Unpacking */ + unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); + + expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, + &xmm_mask_lo, &xmm_mask_hi); + in_over_2x128 (&xmm_src, &xmm_src, + &xmm_alpha, &xmm_alpha, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_dst2, &xmm_dst3); + } + + save_128_aligned ( + (__m128i*)dst, pack_565_4x128_128 ( + &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3)); + + w -= 8; + dst += 8; + } + + while (w) + { + m = *mask++; + + if (m) + { + d = *dst; + mmx_mask = expand_alpha_rev_1x128 (unpack_32_1x128 (m)); + mmx_dest = expand565_16_1x128 (d); + + *dst = pack_565_32_16 ( + pack_1x128_32 ( + in_over_1x128 ( + &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest))); + } + + w--; + dst++; + } + } + +} + +static void +sse2_composite_over_pixbuf_0565 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint16_t *dst_line, *dst, d; + uint32_t *src_line, *src, s; + int dst_stride, src_stride; + int32_t w; + uint32_t opaque, zero; + + __m128i ms; + __m128i xmm_src, xmm_src_lo, xmm_src_hi; + __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3; + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w && (uintptr_t)dst & 15) + { + s = *src++; + d = *dst; + + ms = unpack_32_1x128 (s); + + *dst++ = pack_565_32_16 ( + pack_1x128_32 ( + over_rev_non_pre_1x128 (ms, expand565_16_1x128 (d)))); + w--; + } + + while (w >= 8) + { + /* First round */ + xmm_src = load_128_unaligned ((__m128i*)src); + xmm_dst = load_128_aligned ((__m128i*)dst); + + opaque = is_opaque (xmm_src); + zero = is_zero (xmm_src); + + unpack_565_128_4x128 (xmm_dst, + &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3); + unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); + + /* preload next round*/ + xmm_src = load_128_unaligned ((__m128i*)(src + 4)); + + if (opaque) + { + invert_colors_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_dst0, &xmm_dst1); + } + else if (!zero) + { + over_rev_non_pre_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_dst0, &xmm_dst1); + } + + /* Second round */ + opaque = is_opaque (xmm_src); + zero = is_zero (xmm_src); + + unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); + + if (opaque) + { + invert_colors_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_dst2, &xmm_dst3); + } + else if (!zero) + { + over_rev_non_pre_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_dst2, &xmm_dst3); + } + + save_128_aligned ( + (__m128i*)dst, pack_565_4x128_128 ( + &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3)); + + w -= 8; + src += 8; + dst += 8; + } + + while (w) + { + s = *src++; + d = *dst; + + ms = unpack_32_1x128 (s); + + *dst++ = pack_565_32_16 ( + pack_1x128_32 ( + over_rev_non_pre_1x128 (ms, expand565_16_1x128 (d)))); + w--; + } + } + +} + +static void +sse2_composite_over_pixbuf_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *dst_line, *dst, d; + uint32_t *src_line, *src, s; + int dst_stride, src_stride; + int32_t w; + uint32_t opaque, zero; + + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w && (uintptr_t)dst & 15) + { + s = *src++; + d = *dst; + + *dst++ = pack_1x128_32 ( + over_rev_non_pre_1x128 ( + unpack_32_1x128 (s), unpack_32_1x128 (d))); + + w--; + } + + while (w >= 4) + { + xmm_src_hi = load_128_unaligned ((__m128i*)src); + + opaque = is_opaque (xmm_src_hi); + zero = is_zero (xmm_src_hi); + + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + + if (opaque) + { + invert_colors_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + } + else if (!zero) + { + xmm_dst_hi = load_128_aligned ((__m128i*)dst); + + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + + over_rev_non_pre_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + } + + w -= 4; + dst += 4; + src += 4; + } + + while (w) + { + s = *src++; + d = *dst; + + *dst++ = pack_1x128_32 ( + over_rev_non_pre_1x128 ( + unpack_32_1x128 (s), unpack_32_1x128 (d))); + + w--; + } + } + +} + +static void +sse2_composite_over_n_8888_0565_ca (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src; + uint16_t *dst_line, *dst, d; + uint32_t *mask_line, *mask, m; + int dst_stride, mask_stride; + int w; + uint32_t pack_cmp; + + __m128i xmm_src, xmm_alpha; + __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; + __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3; + + __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest; + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); + + xmm_src = expand_pixel_32_1x128 (src); + xmm_alpha = expand_alpha_1x128 (xmm_src); + mmx_src = xmm_src; + mmx_alpha = xmm_alpha; + + while (height--) + { + w = width; + mask = mask_line; + dst = dst_line; + mask_line += mask_stride; + dst_line += dst_stride; + + while (w && ((uintptr_t)dst & 15)) + { + m = *(uint32_t *) mask; + + if (m) + { + d = *dst; + mmx_mask = unpack_32_1x128 (m); + mmx_dest = expand565_16_1x128 (d); + + *dst = pack_565_32_16 ( + pack_1x128_32 ( + in_over_1x128 ( + &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest))); + } + + w--; + dst++; + mask++; + } + + while (w >= 8) + { + /* First round */ + xmm_mask = load_128_unaligned ((__m128i*)mask); + xmm_dst = load_128_aligned ((__m128i*)dst); + + pack_cmp = _mm_movemask_epi8 ( + _mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ())); + + unpack_565_128_4x128 (xmm_dst, + &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3); + unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); + + /* preload next round */ + xmm_mask = load_128_unaligned ((__m128i*)(mask + 4)); + + /* preload next round */ + if (pack_cmp != 0xffff) + { + in_over_2x128 (&xmm_src, &xmm_src, + &xmm_alpha, &xmm_alpha, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_dst0, &xmm_dst1); + } + + /* Second round */ + pack_cmp = _mm_movemask_epi8 ( + _mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ())); + + unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); + + if (pack_cmp != 0xffff) + { + in_over_2x128 (&xmm_src, &xmm_src, + &xmm_alpha, &xmm_alpha, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_dst2, &xmm_dst3); + } + + save_128_aligned ( + (__m128i*)dst, pack_565_4x128_128 ( + &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3)); + + w -= 8; + dst += 8; + mask += 8; + } + + while (w) + { + m = *(uint32_t *) mask; + + if (m) + { + d = *dst; + mmx_mask = unpack_32_1x128 (m); + mmx_dest = expand565_16_1x128 (d); + + *dst = pack_565_32_16 ( + pack_1x128_32 ( + in_over_1x128 ( + &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest))); + } + + w--; + dst++; + mask++; + } + } + +} + +static void +sse2_composite_in_n_8_8 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint8_t *dst_line, *dst; + uint8_t *mask_line, *mask; + int dst_stride, mask_stride; + uint32_t d, m; + uint32_t src; + int32_t w; + + __m128i xmm_alpha; + __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; + __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + + xmm_alpha = expand_alpha_1x128 (expand_pixel_32_1x128 (src)); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w && ((uintptr_t)dst & 15)) + { + m = (uint32_t) *mask++; + d = (uint32_t) *dst; + + *dst++ = (uint8_t) pack_1x128_32 ( + pix_multiply_1x128 ( + pix_multiply_1x128 (xmm_alpha, + unpack_32_1x128 (m)), + unpack_32_1x128 (d))); + w--; + } + + while (w >= 16) + { + xmm_mask = load_128_unaligned ((__m128i*)mask); + xmm_dst = load_128_aligned ((__m128i*)dst); + + unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + + pix_multiply_2x128 (&xmm_alpha, &xmm_alpha, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_mask_lo, &xmm_mask_hi); + + pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi, + &xmm_dst_lo, &xmm_dst_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + mask += 16; + dst += 16; + w -= 16; + } + + while (w) + { + m = (uint32_t) *mask++; + d = (uint32_t) *dst; + + *dst++ = (uint8_t) pack_1x128_32 ( + pix_multiply_1x128 ( + pix_multiply_1x128 ( + xmm_alpha, unpack_32_1x128 (m)), + unpack_32_1x128 (d))); + w--; + } + } + +} + +static void +sse2_composite_in_n_8 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint8_t *dst_line, *dst; + int dst_stride; + uint32_t d; + uint32_t src; + int32_t w; + + __m128i xmm_alpha; + __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + + xmm_alpha = expand_alpha_1x128 (expand_pixel_32_1x128 (src)); + + src = src >> 24; + + if (src == 0xff) + return; + + if (src == 0x00) + { + pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride, + 8, dest_x, dest_y, width, height, src); + + return; + } + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + w = width; + + while (w && ((uintptr_t)dst & 15)) + { + d = (uint32_t) *dst; + + *dst++ = (uint8_t) pack_1x128_32 ( + pix_multiply_1x128 ( + xmm_alpha, + unpack_32_1x128 (d))); + w--; + } + + while (w >= 16) + { + xmm_dst = load_128_aligned ((__m128i*)dst); + + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + + pix_multiply_2x128 (&xmm_alpha, &xmm_alpha, + &xmm_dst_lo, &xmm_dst_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + dst += 16; + w -= 16; + } + + while (w) + { + d = (uint32_t) *dst; + + *dst++ = (uint8_t) pack_1x128_32 ( + pix_multiply_1x128 ( + xmm_alpha, + unpack_32_1x128 (d))); + w--; + } + } + +} + +static void +sse2_composite_in_8_8 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint8_t *dst_line, *dst; + uint8_t *src_line, *src; + int src_stride, dst_stride; + int32_t w; + uint32_t s, d; + + __m128i xmm_src, xmm_src_lo, xmm_src_hi; + __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + src_image, src_x, src_y, uint8_t, src_stride, src_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w && ((uintptr_t)dst & 15)) + { + s = (uint32_t) *src++; + d = (uint32_t) *dst; + + *dst++ = (uint8_t) pack_1x128_32 ( + pix_multiply_1x128 ( + unpack_32_1x128 (s), unpack_32_1x128 (d))); + w--; + } + + while (w >= 16) + { + xmm_src = load_128_unaligned ((__m128i*)src); + xmm_dst = load_128_aligned ((__m128i*)dst); + + unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + + pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_dst_lo, &xmm_dst_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + src += 16; + dst += 16; + w -= 16; + } + + while (w) + { + s = (uint32_t) *src++; + d = (uint32_t) *dst; + + *dst++ = (uint8_t) pack_1x128_32 ( + pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (d))); + w--; + } + } + +} + +static void +sse2_composite_add_n_8_8 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint8_t *dst_line, *dst; + uint8_t *mask_line, *mask; + int dst_stride, mask_stride; + int32_t w; + uint32_t src; + uint32_t m, d; + + __m128i xmm_alpha; + __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; + __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + + xmm_alpha = expand_alpha_1x128 (expand_pixel_32_1x128 (src)); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w && ((uintptr_t)dst & 15)) + { + m = (uint32_t) *mask++; + d = (uint32_t) *dst; + + *dst++ = (uint8_t) pack_1x128_32 ( + _mm_adds_epu16 ( + pix_multiply_1x128 ( + xmm_alpha, unpack_32_1x128 (m)), + unpack_32_1x128 (d))); + w--; + } + + while (w >= 16) + { + xmm_mask = load_128_unaligned ((__m128i*)mask); + xmm_dst = load_128_aligned ((__m128i*)dst); + + unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + + pix_multiply_2x128 (&xmm_alpha, &xmm_alpha, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_mask_lo, &xmm_mask_hi); + + xmm_dst_lo = _mm_adds_epu16 (xmm_mask_lo, xmm_dst_lo); + xmm_dst_hi = _mm_adds_epu16 (xmm_mask_hi, xmm_dst_hi); + + save_128_aligned ( + (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + mask += 16; + dst += 16; + w -= 16; + } + + while (w) + { + m = (uint32_t) *mask++; + d = (uint32_t) *dst; + + *dst++ = (uint8_t) pack_1x128_32 ( + _mm_adds_epu16 ( + pix_multiply_1x128 ( + xmm_alpha, unpack_32_1x128 (m)), + unpack_32_1x128 (d))); + + w--; + } + } + +} + +static void +sse2_composite_add_n_8 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint8_t *dst_line, *dst; + int dst_stride; + int32_t w; + uint32_t src; + + __m128i xmm_src; + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + + src >>= 24; + + if (src == 0x00) + return; + + if (src == 0xff) + { + pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride, + 8, dest_x, dest_y, width, height, 0xff); + + return; + } + + src = (src << 24) | (src << 16) | (src << 8) | src; + xmm_src = _mm_set_epi32 (src, src, src, src); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + w = width; + + while (w && ((uintptr_t)dst & 15)) + { + *dst = (uint8_t)_mm_cvtsi128_si32 ( + _mm_adds_epu8 ( + xmm_src, + _mm_cvtsi32_si128 (*dst))); + + w--; + dst++; + } + + while (w >= 16) + { + save_128_aligned ( + (__m128i*)dst, _mm_adds_epu8 (xmm_src, load_128_aligned ((__m128i*)dst))); + + dst += 16; + w -= 16; + } + + while (w) + { + *dst = (uint8_t)_mm_cvtsi128_si32 ( + _mm_adds_epu8 ( + xmm_src, + _mm_cvtsi32_si128 (*dst))); + + w--; + dst++; + } + } + +} + +static void +sse2_composite_add_8_8 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint8_t *dst_line, *dst; + uint8_t *src_line, *src; + int dst_stride, src_stride; + int32_t w; + uint16_t t; + + PIXMAN_IMAGE_GET_LINE ( + src_image, src_x, src_y, uint8_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + + while (height--) + { + dst = dst_line; + src = src_line; + + dst_line += dst_stride; + src_line += src_stride; + w = width; + + /* Small head */ + while (w && (uintptr_t)dst & 3) + { + t = (*dst) + (*src++); + *dst++ = t | (0 - (t >> 8)); + w--; + } + + sse2_combine_add_u (imp, op, + (uint32_t*)dst, (uint32_t*)src, NULL, w >> 2); + + /* Small tail */ + dst += w & 0xfffc; + src += w & 0xfffc; + + w &= 3; + + while (w) + { + t = (*dst) + (*src++); + *dst++ = t | (0 - (t >> 8)); + w--; + } + } + +} + +static void +sse2_composite_add_8888_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; + int dst_stride, src_stride; + + PIXMAN_IMAGE_GET_LINE ( + src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + + sse2_combine_add_u (imp, op, dst, src, NULL, width); + } +} + +static void +sse2_composite_add_n_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *dst_line, *dst, src; + int dst_stride; + + __m128i xmm_src; + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + if (src == 0) + return; + + if (src == ~0) + { + pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride, 32, + dest_x, dest_y, width, height, ~0); + + return; + } + + xmm_src = _mm_set_epi32 (src, src, src, src); + while (height--) + { + int w = width; + uint32_t d; + + dst = dst_line; + dst_line += dst_stride; + + while (w && (uintptr_t)dst & 15) + { + d = *dst; + *dst++ = + _mm_cvtsi128_si32 ( _mm_adds_epu8 (xmm_src, _mm_cvtsi32_si128 (d))); + w--; + } + + while (w >= 4) + { + save_128_aligned + ((__m128i*)dst, + _mm_adds_epu8 (xmm_src, load_128_aligned ((__m128i*)dst))); + + dst += 4; + w -= 4; + } + + while (w--) + { + d = *dst; + *dst++ = + _mm_cvtsi128_si32 (_mm_adds_epu8 (xmm_src, + _mm_cvtsi32_si128 (d))); + } + } +} + +static void +sse2_composite_add_n_8_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *dst_line, *dst; + uint8_t *mask_line, *mask; + int dst_stride, mask_stride; + int32_t w; + uint32_t src; + + __m128i xmm_src; + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + if (src == 0) + return; + xmm_src = expand_pixel_32_1x128 (src); + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w && ((uintptr_t)dst & 15)) + { + uint8_t m = *mask++; + if (m) + { + *dst = pack_1x128_32 + (_mm_adds_epu16 + (pix_multiply_1x128 (xmm_src, expand_pixel_8_1x128 (m)), + unpack_32_1x128 (*dst))); + } + dst++; + w--; + } + + while (w >= 4) + { + uint32_t m = *(uint32_t*)mask; + if (m) + { + __m128i xmm_mask_lo, xmm_mask_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + + __m128i xmm_dst = load_128_aligned ((__m128i*)dst); + __m128i xmm_mask = + _mm_unpacklo_epi8 (unpack_32_1x128(m), + _mm_setzero_si128 ()); + + unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + + expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, + &xmm_mask_lo, &xmm_mask_hi); + + pix_multiply_2x128 (&xmm_src, &xmm_src, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_mask_lo, &xmm_mask_hi); + + xmm_dst_lo = _mm_adds_epu16 (xmm_mask_lo, xmm_dst_lo); + xmm_dst_hi = _mm_adds_epu16 (xmm_mask_hi, xmm_dst_hi); + + save_128_aligned ( + (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + } + + w -= 4; + dst += 4; + mask += 4; + } + + while (w) + { + uint8_t m = *mask++; + if (m) + { + *dst = pack_1x128_32 + (_mm_adds_epu16 + (pix_multiply_1x128 (xmm_src, expand_pixel_8_1x128 (m)), + unpack_32_1x128 (*dst))); + } + dst++; + w--; + } + } +} + +static pixman_bool_t +sse2_blt (pixman_implementation_t *imp, + uint32_t * src_bits, + uint32_t * dst_bits, + int src_stride, + int dst_stride, + int src_bpp, + int dst_bpp, + int src_x, + int src_y, + int dest_x, + int dest_y, + int width, + int height) +{ + uint8_t * src_bytes; + uint8_t * dst_bytes; + int byte_width; + + if (src_bpp != dst_bpp) + return FALSE; + + if (src_bpp == 16) + { + src_stride = src_stride * (int) sizeof (uint32_t) / 2; + dst_stride = dst_stride * (int) sizeof (uint32_t) / 2; + src_bytes =(uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x)); + dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dest_y) + (dest_x)); + byte_width = 2 * width; + src_stride *= 2; + dst_stride *= 2; + } + else if (src_bpp == 32) + { + src_stride = src_stride * (int) sizeof (uint32_t) / 4; + dst_stride = dst_stride * (int) sizeof (uint32_t) / 4; + src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x)); + dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dest_y) + (dest_x)); + byte_width = 4 * width; + src_stride *= 4; + dst_stride *= 4; + } + else + { + return FALSE; + } + + while (height--) + { + int w; + uint8_t *s = src_bytes; + uint8_t *d = dst_bytes; + src_bytes += src_stride; + dst_bytes += dst_stride; + w = byte_width; + + while (w >= 2 && ((uintptr_t)d & 3)) + { + *(uint16_t *)d = *(uint16_t *)s; + w -= 2; + s += 2; + d += 2; + } + + while (w >= 4 && ((uintptr_t)d & 15)) + { + *(uint32_t *)d = *(uint32_t *)s; + + w -= 4; + s += 4; + d += 4; + } + + while (w >= 64) + { + __m128i xmm0, xmm1, xmm2, xmm3; + + xmm0 = load_128_unaligned ((__m128i*)(s)); + xmm1 = load_128_unaligned ((__m128i*)(s + 16)); + xmm2 = load_128_unaligned ((__m128i*)(s + 32)); + xmm3 = load_128_unaligned ((__m128i*)(s + 48)); + + save_128_aligned ((__m128i*)(d), xmm0); + save_128_aligned ((__m128i*)(d + 16), xmm1); + save_128_aligned ((__m128i*)(d + 32), xmm2); + save_128_aligned ((__m128i*)(d + 48), xmm3); + + s += 64; + d += 64; + w -= 64; + } + + while (w >= 16) + { + save_128_aligned ((__m128i*)d, load_128_unaligned ((__m128i*)s) ); + + w -= 16; + d += 16; + s += 16; + } + + while (w >= 4) + { + *(uint32_t *)d = *(uint32_t *)s; + + w -= 4; + s += 4; + d += 4; + } + + if (w >= 2) + { + *(uint16_t *)d = *(uint16_t *)s; + w -= 2; + s += 2; + d += 2; + } + } + + return TRUE; +} + +static void +sse2_composite_copy_area (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + sse2_blt (imp, src_image->bits.bits, + dest_image->bits.bits, + src_image->bits.rowstride, + dest_image->bits.rowstride, + PIXMAN_FORMAT_BPP (src_image->bits.format), + PIXMAN_FORMAT_BPP (dest_image->bits.format), + src_x, src_y, dest_x, dest_y, width, height); +} + +static void +sse2_composite_over_x888_8_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *src, *src_line, s; + uint32_t *dst, *dst_line, d; + uint8_t *mask, *mask_line; + uint32_t m; + int src_stride, mask_stride, dst_stride; + int32_t w; + __m128i ms; + + __m128i xmm_src, xmm_src_lo, xmm_src_hi; + __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; + __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + PIXMAN_IMAGE_GET_LINE ( + src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + + while (height--) + { + src = src_line; + src_line += src_stride; + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + + w = width; + + while (w && (uintptr_t)dst & 15) + { + s = 0xff000000 | *src++; + m = (uint32_t) *mask++; + d = *dst; + ms = unpack_32_1x128 (s); + + if (m != 0xff) + { + __m128i ma = expand_alpha_rev_1x128 (unpack_32_1x128 (m)); + __m128i md = unpack_32_1x128 (d); + + ms = in_over_1x128 (&ms, &mask_00ff, &ma, &md); + } + + *dst++ = pack_1x128_32 (ms); + w--; + } + + while (w >= 4) + { + m = *(uint32_t*) mask; + xmm_src = _mm_or_si128 ( + load_128_unaligned ((__m128i*)src), mask_ff000000); + + if (m == 0xffffffff) + { + save_128_aligned ((__m128i*)dst, xmm_src); + } + else + { + xmm_dst = load_128_aligned ((__m128i*)dst); + + xmm_mask = _mm_unpacklo_epi16 (unpack_32_1x128 (m), _mm_setzero_si128()); + + unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + + expand_alpha_rev_2x128 ( + xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); + + in_over_2x128 (&xmm_src_lo, &xmm_src_hi, + &mask_00ff, &mask_00ff, &xmm_mask_lo, &xmm_mask_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + } + + src += 4; + dst += 4; + mask += 4; + w -= 4; + } + + while (w) + { + m = (uint32_t) *mask++; + + if (m) + { + s = 0xff000000 | *src; + + if (m == 0xff) + { + *dst = s; + } + else + { + __m128i ma, md, ms; + + d = *dst; + + ma = expand_alpha_rev_1x128 (unpack_32_1x128 (m)); + md = unpack_32_1x128 (d); + ms = unpack_32_1x128 (s); + + *dst = pack_1x128_32 (in_over_1x128 (&ms, &mask_00ff, &ma, &md)); + } + + } + + src++; + dst++; + w--; + } + } + +} + +static void +sse2_composite_over_8888_8_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *src, *src_line, s; + uint32_t *dst, *dst_line, d; + uint8_t *mask, *mask_line; + uint32_t m; + int src_stride, mask_stride, dst_stride; + int32_t w; + + __m128i xmm_src, xmm_src_lo, xmm_src_hi, xmm_srca_lo, xmm_srca_hi; + __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; + __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + PIXMAN_IMAGE_GET_LINE ( + src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + + while (height--) + { + src = src_line; + src_line += src_stride; + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + + w = width; + + while (w && (uintptr_t)dst & 15) + { + uint32_t sa; + + s = *src++; + m = (uint32_t) *mask++; + d = *dst; + + sa = s >> 24; + + if (m) + { + if (sa == 0xff && m == 0xff) + { + *dst = s; + } + else + { + __m128i ms, md, ma, msa; + + ma = expand_alpha_rev_1x128 (load_32_1x128 (m)); + ms = unpack_32_1x128 (s); + md = unpack_32_1x128 (d); + + msa = expand_alpha_rev_1x128 (load_32_1x128 (sa)); + + *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md)); + } + } + + dst++; + w--; + } + + while (w >= 4) + { + m = *(uint32_t *) mask; + + if (m) + { + xmm_src = load_128_unaligned ((__m128i*)src); + + if (m == 0xffffffff && is_opaque (xmm_src)) + { + save_128_aligned ((__m128i *)dst, xmm_src); + } + else + { + xmm_dst = load_128_aligned ((__m128i *)dst); + + xmm_mask = _mm_unpacklo_epi16 (unpack_32_1x128 (m), _mm_setzero_si128()); + + unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi); + expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); + + in_over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi, + &xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + } + } + + src += 4; + dst += 4; + mask += 4; + w -= 4; + } + + while (w) + { + uint32_t sa; + + s = *src++; + m = (uint32_t) *mask++; + d = *dst; + + sa = s >> 24; + + if (m) + { + if (sa == 0xff && m == 0xff) + { + *dst = s; + } + else + { + __m128i ms, md, ma, msa; + + ma = expand_alpha_rev_1x128 (load_32_1x128 (m)); + ms = unpack_32_1x128 (s); + md = unpack_32_1x128 (d); + + msa = expand_alpha_rev_1x128 (load_32_1x128 (sa)); + + *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md)); + } + } + + dst++; + w--; + } + } + +} + +static void +sse2_composite_over_reverse_n_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src; + uint32_t *dst_line, *dst; + __m128i xmm_src; + __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; + __m128i xmm_dsta_hi, xmm_dsta_lo; + int dst_stride; + int32_t w; + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + + xmm_src = expand_pixel_32_1x128 (src); + + while (height--) + { + dst = dst_line; + + dst_line += dst_stride; + w = width; + + while (w && (uintptr_t)dst & 15) + { + __m128i vd; + + vd = unpack_32_1x128 (*dst); + + *dst = pack_1x128_32 (over_1x128 (vd, expand_alpha_1x128 (vd), + xmm_src)); + w--; + dst++; + } + + while (w >= 4) + { + __m128i tmp_lo, tmp_hi; + + xmm_dst = load_128_aligned ((__m128i*)dst); + + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dsta_lo, &xmm_dsta_hi); + + tmp_lo = xmm_src; + tmp_hi = xmm_src; + + over_2x128 (&xmm_dst_lo, &xmm_dst_hi, + &xmm_dsta_lo, &xmm_dsta_hi, + &tmp_lo, &tmp_hi); + + save_128_aligned ( + (__m128i*)dst, pack_2x128_128 (tmp_lo, tmp_hi)); + + w -= 4; + dst += 4; + } + + while (w) + { + __m128i vd; + + vd = unpack_32_1x128 (*dst); + + *dst = pack_1x128_32 (over_1x128 (vd, expand_alpha_1x128 (vd), + xmm_src)); + w--; + dst++; + } + + } + +} + +static void +sse2_composite_over_8888_8888_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *src, *src_line, s; + uint32_t *dst, *dst_line, d; + uint32_t *mask, *mask_line; + uint32_t m; + int src_stride, mask_stride, dst_stride; + int32_t w; + + __m128i xmm_src, xmm_src_lo, xmm_src_hi, xmm_srca_lo, xmm_srca_hi; + __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; + __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); + PIXMAN_IMAGE_GET_LINE ( + src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + + while (height--) + { + src = src_line; + src_line += src_stride; + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + + w = width; + + while (w && (uintptr_t)dst & 15) + { + uint32_t sa; + + s = *src++; + m = (*mask++) >> 24; + d = *dst; + + sa = s >> 24; + + if (m) + { + if (sa == 0xff && m == 0xff) + { + *dst = s; + } + else + { + __m128i ms, md, ma, msa; + + ma = expand_alpha_rev_1x128 (load_32_1x128 (m)); + ms = unpack_32_1x128 (s); + md = unpack_32_1x128 (d); + + msa = expand_alpha_rev_1x128 (load_32_1x128 (sa)); + + *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md)); + } + } + + dst++; + w--; + } + + while (w >= 4) + { + xmm_mask = load_128_unaligned ((__m128i*)mask); + + if (!is_transparent (xmm_mask)) + { + xmm_src = load_128_unaligned ((__m128i*)src); + + if (is_opaque (xmm_mask) && is_opaque (xmm_src)) + { + save_128_aligned ((__m128i *)dst, xmm_src); + } + else + { + xmm_dst = load_128_aligned ((__m128i *)dst); + + unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi); + expand_alpha_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); + + in_over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi, + &xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + } + } + + src += 4; + dst += 4; + mask += 4; + w -= 4; + } + + while (w) + { + uint32_t sa; + + s = *src++; + m = (*mask++) >> 24; + d = *dst; + + sa = s >> 24; + + if (m) + { + if (sa == 0xff && m == 0xff) + { + *dst = s; + } + else + { + __m128i ms, md, ma, msa; + + ma = expand_alpha_rev_1x128 (load_32_1x128 (m)); + ms = unpack_32_1x128 (s); + md = unpack_32_1x128 (d); + + msa = expand_alpha_rev_1x128 (load_32_1x128 (sa)); + + *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md)); + } + } + + dst++; + w--; + } + } + +} + +/* A variant of 'sse2_combine_over_u' with minor tweaks */ +static force_inline void +scaled_nearest_scanline_sse2_8888_8888_OVER (uint32_t* pd, + const uint32_t* ps, + int32_t w, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + pixman_fixed_t src_width_fixed, + pixman_bool_t fully_transparent_src) +{ + uint32_t s, d; + const uint32_t* pm = NULL; + + __m128i xmm_dst_lo, xmm_dst_hi; + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_alpha_lo, xmm_alpha_hi; + + if (fully_transparent_src) + return; + + /* Align dst on a 16-byte boundary */ + while (w && ((uintptr_t)pd & 15)) + { + d = *pd; + s = combine1 (ps + pixman_fixed_to_int (vx), pm); + vx += unit_x; + while (vx >= 0) + vx -= src_width_fixed; + + *pd++ = core_combine_over_u_pixel_sse2 (s, d); + if (pm) + pm++; + w--; + } + + while (w >= 4) + { + __m128i tmp; + uint32_t tmp1, tmp2, tmp3, tmp4; + + tmp1 = *(ps + pixman_fixed_to_int (vx)); + vx += unit_x; + while (vx >= 0) + vx -= src_width_fixed; + tmp2 = *(ps + pixman_fixed_to_int (vx)); + vx += unit_x; + while (vx >= 0) + vx -= src_width_fixed; + tmp3 = *(ps + pixman_fixed_to_int (vx)); + vx += unit_x; + while (vx >= 0) + vx -= src_width_fixed; + tmp4 = *(ps + pixman_fixed_to_int (vx)); + vx += unit_x; + while (vx >= 0) + vx -= src_width_fixed; + + tmp = _mm_set_epi32 (tmp4, tmp3, tmp2, tmp1); + + xmm_src_hi = combine4 ((__m128i*)&tmp, (__m128i*)pm); + + if (is_opaque (xmm_src_hi)) + { + save_128_aligned ((__m128i*)pd, xmm_src_hi); + } + else if (!is_zero (xmm_src_hi)) + { + xmm_dst_hi = load_128_aligned ((__m128i*) pd); + + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + + expand_alpha_2x128 ( + xmm_src_lo, xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi); + + over_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_alpha_lo, &xmm_alpha_hi, + &xmm_dst_lo, &xmm_dst_hi); + + /* rebuid the 4 pixel data and save*/ + save_128_aligned ((__m128i*)pd, + pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + } + + w -= 4; + pd += 4; + if (pm) + pm += 4; + } + + while (w) + { + d = *pd; + s = combine1 (ps + pixman_fixed_to_int (vx), pm); + vx += unit_x; + while (vx >= 0) + vx -= src_width_fixed; + + *pd++ = core_combine_over_u_pixel_sse2 (s, d); + if (pm) + pm++; + + w--; + } +} + +FAST_NEAREST_MAINLOOP (sse2_8888_8888_cover_OVER, + scaled_nearest_scanline_sse2_8888_8888_OVER, + uint32_t, uint32_t, COVER) +FAST_NEAREST_MAINLOOP (sse2_8888_8888_none_OVER, + scaled_nearest_scanline_sse2_8888_8888_OVER, + uint32_t, uint32_t, NONE) +FAST_NEAREST_MAINLOOP (sse2_8888_8888_pad_OVER, + scaled_nearest_scanline_sse2_8888_8888_OVER, + uint32_t, uint32_t, PAD) +FAST_NEAREST_MAINLOOP (sse2_8888_8888_normal_OVER, + scaled_nearest_scanline_sse2_8888_8888_OVER, + uint32_t, uint32_t, NORMAL) + +static force_inline void +scaled_nearest_scanline_sse2_8888_n_8888_OVER (const uint32_t * mask, + uint32_t * dst, + const uint32_t * src, + int32_t w, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + pixman_fixed_t src_width_fixed, + pixman_bool_t zero_src) +{ + __m128i xmm_mask; + __m128i xmm_src, xmm_src_lo, xmm_src_hi; + __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; + __m128i xmm_alpha_lo, xmm_alpha_hi; + + if (zero_src || (*mask >> 24) == 0) + return; + + xmm_mask = create_mask_16_128 (*mask >> 24); + + while (w && (uintptr_t)dst & 15) + { + uint32_t s = *(src + pixman_fixed_to_int (vx)); + vx += unit_x; + while (vx >= 0) + vx -= src_width_fixed; + + if (s) + { + uint32_t d = *dst; + + __m128i ms = unpack_32_1x128 (s); + __m128i alpha = expand_alpha_1x128 (ms); + __m128i dest = xmm_mask; + __m128i alpha_dst = unpack_32_1x128 (d); + + *dst = pack_1x128_32 ( + in_over_1x128 (&ms, &alpha, &dest, &alpha_dst)); + } + dst++; + w--; + } + + while (w >= 4) + { + uint32_t tmp1, tmp2, tmp3, tmp4; + + tmp1 = *(src + pixman_fixed_to_int (vx)); + vx += unit_x; + while (vx >= 0) + vx -= src_width_fixed; + tmp2 = *(src + pixman_fixed_to_int (vx)); + vx += unit_x; + while (vx >= 0) + vx -= src_width_fixed; + tmp3 = *(src + pixman_fixed_to_int (vx)); + vx += unit_x; + while (vx >= 0) + vx -= src_width_fixed; + tmp4 = *(src + pixman_fixed_to_int (vx)); + vx += unit_x; + while (vx >= 0) + vx -= src_width_fixed; + + xmm_src = _mm_set_epi32 (tmp4, tmp3, tmp2, tmp1); + + if (!is_zero (xmm_src)) + { + xmm_dst = load_128_aligned ((__m128i*)dst); + + unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_alpha_lo, &xmm_alpha_hi); + + in_over_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_alpha_lo, &xmm_alpha_hi, + &xmm_mask, &xmm_mask, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + } + + dst += 4; + w -= 4; + } + + while (w) + { + uint32_t s = *(src + pixman_fixed_to_int (vx)); + vx += unit_x; + while (vx >= 0) + vx -= src_width_fixed; + + if (s) + { + uint32_t d = *dst; + + __m128i ms = unpack_32_1x128 (s); + __m128i alpha = expand_alpha_1x128 (ms); + __m128i mask = xmm_mask; + __m128i dest = unpack_32_1x128 (d); + + *dst = pack_1x128_32 ( + in_over_1x128 (&ms, &alpha, &mask, &dest)); + } + + dst++; + w--; + } + +} + +FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_cover_OVER, + scaled_nearest_scanline_sse2_8888_n_8888_OVER, + uint32_t, uint32_t, uint32_t, COVER, TRUE, TRUE) +FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_pad_OVER, + scaled_nearest_scanline_sse2_8888_n_8888_OVER, + uint32_t, uint32_t, uint32_t, PAD, TRUE, TRUE) +FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_none_OVER, + scaled_nearest_scanline_sse2_8888_n_8888_OVER, + uint32_t, uint32_t, uint32_t, NONE, TRUE, TRUE) +FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_normal_OVER, + scaled_nearest_scanline_sse2_8888_n_8888_OVER, + uint32_t, uint32_t, uint32_t, NORMAL, TRUE, TRUE) + +#if PSHUFD_IS_FAST + +/***********************************************************************************/ + +# define BILINEAR_DECLARE_VARIABLES \ + const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt); \ + const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb); \ + const __m128i xmm_addc = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1); \ + const __m128i xmm_ux1 = _mm_set_epi16 (unit_x, -unit_x, unit_x, -unit_x, \ + unit_x, -unit_x, unit_x, -unit_x); \ + const __m128i xmm_ux4 = _mm_set_epi16 (unit_x * 4, -unit_x * 4, \ + unit_x * 4, -unit_x * 4, \ + unit_x * 4, -unit_x * 4, \ + unit_x * 4, -unit_x * 4); \ + const __m128i xmm_zero = _mm_setzero_si128 (); \ + __m128i xmm_x = _mm_set_epi16 (vx + unit_x * 3, -(vx + 1) - unit_x * 3, \ + vx + unit_x * 2, -(vx + 1) - unit_x * 2, \ + vx + unit_x * 1, -(vx + 1) - unit_x * 1, \ + vx + unit_x * 0, -(vx + 1) - unit_x * 0); \ + __m128i xmm_wh_state; + +#define BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER(pix, phase_) \ +do { \ + int phase = phase_; \ + __m128i xmm_wh, xmm_a, xmm_b; \ + /* fetch 2x2 pixel block into sse2 registers */ \ + __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); \ + __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); \ + vx += unit_x; \ + /* vertical interpolation */ \ + xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); \ + xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); \ + xmm_a = _mm_add_epi16 (xmm_a, xmm_b); \ + /* calculate horizontal weights */ \ + if (phase <= 0) \ + { \ + xmm_wh_state = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, \ + 16 - BILINEAR_INTERPOLATION_BITS)); \ + xmm_x = _mm_add_epi16 (xmm_x, (phase < 0) ? xmm_ux1 : xmm_ux4); \ + phase = 0; \ + } \ + xmm_wh = _mm_shuffle_epi32 (xmm_wh_state, _MM_SHUFFLE (phase, phase, \ + phase, phase)); \ + /* horizontal interpolation */ \ + xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (_mm_shuffle_epi32 ( \ + xmm_a, _MM_SHUFFLE (1, 0, 3, 2)), xmm_a), xmm_wh); \ + /* shift the result */ \ + pix = _mm_srli_epi32 (xmm_a, BILINEAR_INTERPOLATION_BITS * 2); \ +} while (0) + +#else /************************************************************************/ + +# define BILINEAR_DECLARE_VARIABLES \ + const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt); \ + const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb); \ + const __m128i xmm_addc = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1); \ + const __m128i xmm_ux1 = _mm_set_epi16 (unit_x, -unit_x, unit_x, -unit_x, \ + unit_x, -unit_x, unit_x, -unit_x); \ + const __m128i xmm_ux4 = _mm_set_epi16 (unit_x * 4, -unit_x * 4, \ + unit_x * 4, -unit_x * 4, \ + unit_x * 4, -unit_x * 4, \ + unit_x * 4, -unit_x * 4); \ + const __m128i xmm_zero = _mm_setzero_si128 (); \ + __m128i xmm_x = _mm_set_epi16 (vx, -(vx + 1), vx, -(vx + 1), \ + vx, -(vx + 1), vx, -(vx + 1)) + +#define BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER(pix, phase) \ +do { \ + __m128i xmm_wh, xmm_a, xmm_b; \ + /* fetch 2x2 pixel block into sse2 registers */ \ + __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); \ + __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); \ + (void)xmm_ux4; /* suppress warning: unused variable 'xmm_ux4' */ \ + vx += unit_x; \ + /* vertical interpolation */ \ + xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); \ + xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); \ + xmm_a = _mm_add_epi16 (xmm_a, xmm_b); \ + /* calculate horizontal weights */ \ + xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, \ + 16 - BILINEAR_INTERPOLATION_BITS)); \ + xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); \ + /* horizontal interpolation */ \ + xmm_b = _mm_unpacklo_epi64 (/* any value is fine here */ xmm_b, xmm_a); \ + xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); \ + /* shift the result */ \ + pix = _mm_srli_epi32 (xmm_a, BILINEAR_INTERPOLATION_BITS * 2); \ +} while (0) + +/***********************************************************************************/ + +#endif + +#define BILINEAR_INTERPOLATE_ONE_PIXEL(pix); \ +do { \ + __m128i xmm_pix; \ + BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix, -1); \ + xmm_pix = _mm_packs_epi32 (xmm_pix, xmm_pix); \ + xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); \ + pix = _mm_cvtsi128_si32 (xmm_pix); \ +} while(0) + +#define BILINEAR_INTERPOLATE_FOUR_PIXELS(pix); \ +do { \ + __m128i xmm_pix1, xmm_pix2, xmm_pix3, xmm_pix4; \ + BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix1, 0); \ + BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix2, 1); \ + BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix3, 2); \ + BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix4, 3); \ + xmm_pix1 = _mm_packs_epi32 (xmm_pix1, xmm_pix2); \ + xmm_pix3 = _mm_packs_epi32 (xmm_pix3, xmm_pix4); \ + pix = _mm_packus_epi16 (xmm_pix1, xmm_pix3); \ +} while(0) + +#define BILINEAR_SKIP_ONE_PIXEL() \ +do { \ + vx += unit_x; \ + xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); \ +} while(0) + +#define BILINEAR_SKIP_FOUR_PIXELS() \ +do { \ + vx += unit_x * 4; \ + xmm_x = _mm_add_epi16 (xmm_x, xmm_ux4); \ +} while(0) + +/***********************************************************************************/ + +static force_inline void +scaled_bilinear_scanline_sse2_8888_8888_SRC (uint32_t * dst, + const uint32_t * mask, + const uint32_t * src_top, + const uint32_t * src_bottom, + int32_t w, + int wt, + int wb, + pixman_fixed_t vx_, + pixman_fixed_t unit_x_, + pixman_fixed_t max_vx, + pixman_bool_t zero_src) +{ + intptr_t vx = vx_; + intptr_t unit_x = unit_x_; + BILINEAR_DECLARE_VARIABLES; + uint32_t pix1, pix2; + + while (w && ((uintptr_t)dst & 15)) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + *dst++ = pix1; + w--; + } + + while ((w -= 4) >= 0) { + __m128i xmm_src; + BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src); + _mm_store_si128 ((__m128i *)dst, xmm_src); + dst += 4; + } + + if (w & 2) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + BILINEAR_INTERPOLATE_ONE_PIXEL (pix2); + *dst++ = pix1; + *dst++ = pix2; + } + + if (w & 1) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + *dst = pix1; + } + +} + +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_cover_SRC, + scaled_bilinear_scanline_sse2_8888_8888_SRC, + uint32_t, uint32_t, uint32_t, + COVER, FLAG_NONE) +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_pad_SRC, + scaled_bilinear_scanline_sse2_8888_8888_SRC, + uint32_t, uint32_t, uint32_t, + PAD, FLAG_NONE) +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_none_SRC, + scaled_bilinear_scanline_sse2_8888_8888_SRC, + uint32_t, uint32_t, uint32_t, + NONE, FLAG_NONE) +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_normal_SRC, + scaled_bilinear_scanline_sse2_8888_8888_SRC, + uint32_t, uint32_t, uint32_t, + NORMAL, FLAG_NONE) + +static force_inline void +scaled_bilinear_scanline_sse2_x888_8888_SRC (uint32_t * dst, + const uint32_t * mask, + const uint32_t * src_top, + const uint32_t * src_bottom, + int32_t w, + int wt, + int wb, + pixman_fixed_t vx_, + pixman_fixed_t unit_x_, + pixman_fixed_t max_vx, + pixman_bool_t zero_src) +{ + intptr_t vx = vx_; + intptr_t unit_x = unit_x_; + BILINEAR_DECLARE_VARIABLES; + uint32_t pix1, pix2; + + while (w && ((uintptr_t)dst & 15)) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + *dst++ = pix1 | 0xFF000000; + w--; + } + + while ((w -= 4) >= 0) { + __m128i xmm_src; + BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src); + _mm_store_si128 ((__m128i *)dst, _mm_or_si128 (xmm_src, mask_ff000000)); + dst += 4; + } + + if (w & 2) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + BILINEAR_INTERPOLATE_ONE_PIXEL (pix2); + *dst++ = pix1 | 0xFF000000; + *dst++ = pix2 | 0xFF000000; + } + + if (w & 1) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + *dst = pix1 | 0xFF000000; + } +} + +FAST_BILINEAR_MAINLOOP_COMMON (sse2_x888_8888_cover_SRC, + scaled_bilinear_scanline_sse2_x888_8888_SRC, + uint32_t, uint32_t, uint32_t, + COVER, FLAG_NONE) +FAST_BILINEAR_MAINLOOP_COMMON (sse2_x888_8888_pad_SRC, + scaled_bilinear_scanline_sse2_x888_8888_SRC, + uint32_t, uint32_t, uint32_t, + PAD, FLAG_NONE) +FAST_BILINEAR_MAINLOOP_COMMON (sse2_x888_8888_normal_SRC, + scaled_bilinear_scanline_sse2_x888_8888_SRC, + uint32_t, uint32_t, uint32_t, + NORMAL, FLAG_NONE) + +static force_inline void +scaled_bilinear_scanline_sse2_8888_8888_OVER (uint32_t * dst, + const uint32_t * mask, + const uint32_t * src_top, + const uint32_t * src_bottom, + int32_t w, + int wt, + int wb, + pixman_fixed_t vx_, + pixman_fixed_t unit_x_, + pixman_fixed_t max_vx, + pixman_bool_t zero_src) +{ + intptr_t vx = vx_; + intptr_t unit_x = unit_x_; + BILINEAR_DECLARE_VARIABLES; + uint32_t pix1, pix2; + + while (w && ((uintptr_t)dst & 15)) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + + if (pix1) + { + pix2 = *dst; + *dst = core_combine_over_u_pixel_sse2 (pix1, pix2); + } + + w--; + dst++; + } + + while (w >= 4) + { + __m128i xmm_src; + __m128i xmm_src_hi, xmm_src_lo, xmm_dst_hi, xmm_dst_lo; + __m128i xmm_alpha_hi, xmm_alpha_lo; + + BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src); + + if (!is_zero (xmm_src)) + { + if (is_opaque (xmm_src)) + { + save_128_aligned ((__m128i *)dst, xmm_src); + } + else + { + __m128i xmm_dst = load_128_aligned ((__m128i *)dst); + + unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi); + over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ((__m128i *)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + } + } + + w -= 4; + dst += 4; + } + + while (w) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + + if (pix1) + { + pix2 = *dst; + *dst = core_combine_over_u_pixel_sse2 (pix1, pix2); + } + + w--; + dst++; + } +} + +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_cover_OVER, + scaled_bilinear_scanline_sse2_8888_8888_OVER, + uint32_t, uint32_t, uint32_t, + COVER, FLAG_NONE) +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_pad_OVER, + scaled_bilinear_scanline_sse2_8888_8888_OVER, + uint32_t, uint32_t, uint32_t, + PAD, FLAG_NONE) +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_none_OVER, + scaled_bilinear_scanline_sse2_8888_8888_OVER, + uint32_t, uint32_t, uint32_t, + NONE, FLAG_NONE) +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_normal_OVER, + scaled_bilinear_scanline_sse2_8888_8888_OVER, + uint32_t, uint32_t, uint32_t, + NORMAL, FLAG_NONE) + +static force_inline void +scaled_bilinear_scanline_sse2_8888_8_8888_OVER (uint32_t * dst, + const uint8_t * mask, + const uint32_t * src_top, + const uint32_t * src_bottom, + int32_t w, + int wt, + int wb, + pixman_fixed_t vx_, + pixman_fixed_t unit_x_, + pixman_fixed_t max_vx, + pixman_bool_t zero_src) +{ + intptr_t vx = vx_; + intptr_t unit_x = unit_x_; + BILINEAR_DECLARE_VARIABLES; + uint32_t pix1, pix2; + uint32_t m; + + while (w && ((uintptr_t)dst & 15)) + { + uint32_t sa; + + m = (uint32_t) *mask++; + + if (m) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + sa = pix1 >> 24; + + if (sa == 0xff && m == 0xff) + { + *dst = pix1; + } + else + { + __m128i ms, md, ma, msa; + + pix2 = *dst; + ma = expand_alpha_rev_1x128 (load_32_1x128 (m)); + ms = unpack_32_1x128 (pix1); + md = unpack_32_1x128 (pix2); + + msa = expand_alpha_rev_1x128 (load_32_1x128 (sa)); + + *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md)); + } + } + else + { + BILINEAR_SKIP_ONE_PIXEL (); + } + + w--; + dst++; + } + + while (w >= 4) + { + __m128i xmm_src, xmm_src_lo, xmm_src_hi, xmm_srca_lo, xmm_srca_hi; + __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; + __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; + + m = *(uint32_t*)mask; + + if (m) + { + BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src); + + if (m == 0xffffffff && is_opaque (xmm_src)) + { + save_128_aligned ((__m128i *)dst, xmm_src); + } + else + { + xmm_dst = load_128_aligned ((__m128i *)dst); + + xmm_mask = _mm_unpacklo_epi16 (unpack_32_1x128 (m), _mm_setzero_si128()); + + unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi); + expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); + + in_over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi, + &xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + } + } + else + { + BILINEAR_SKIP_FOUR_PIXELS (); + } + + w -= 4; + dst += 4; + mask += 4; + } + + while (w) + { + uint32_t sa; + + m = (uint32_t) *mask++; + + if (m) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + sa = pix1 >> 24; + + if (sa == 0xff && m == 0xff) + { + *dst = pix1; + } + else + { + __m128i ms, md, ma, msa; + + pix2 = *dst; + ma = expand_alpha_rev_1x128 (load_32_1x128 (m)); + ms = unpack_32_1x128 (pix1); + md = unpack_32_1x128 (pix2); + + msa = expand_alpha_rev_1x128 (load_32_1x128 (sa)); + + *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md)); + } + } + else + { + BILINEAR_SKIP_ONE_PIXEL (); + } + + w--; + dst++; + } +} + +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_cover_OVER, + scaled_bilinear_scanline_sse2_8888_8_8888_OVER, + uint32_t, uint8_t, uint32_t, + COVER, FLAG_HAVE_NON_SOLID_MASK) +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_pad_OVER, + scaled_bilinear_scanline_sse2_8888_8_8888_OVER, + uint32_t, uint8_t, uint32_t, + PAD, FLAG_HAVE_NON_SOLID_MASK) +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_none_OVER, + scaled_bilinear_scanline_sse2_8888_8_8888_OVER, + uint32_t, uint8_t, uint32_t, + NONE, FLAG_HAVE_NON_SOLID_MASK) +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_normal_OVER, + scaled_bilinear_scanline_sse2_8888_8_8888_OVER, + uint32_t, uint8_t, uint32_t, + NORMAL, FLAG_HAVE_NON_SOLID_MASK) + +static force_inline void +scaled_bilinear_scanline_sse2_8888_n_8888_OVER (uint32_t * dst, + const uint32_t * mask, + const uint32_t * src_top, + const uint32_t * src_bottom, + int32_t w, + int wt, + int wb, + pixman_fixed_t vx_, + pixman_fixed_t unit_x_, + pixman_fixed_t max_vx, + pixman_bool_t zero_src) +{ + intptr_t vx = vx_; + intptr_t unit_x = unit_x_; + BILINEAR_DECLARE_VARIABLES; + uint32_t pix1; + __m128i xmm_mask; + + if (zero_src || (*mask >> 24) == 0) + return; + + xmm_mask = create_mask_16_128 (*mask >> 24); + + while (w && ((uintptr_t)dst & 15)) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + if (pix1) + { + uint32_t d = *dst; + + __m128i ms = unpack_32_1x128 (pix1); + __m128i alpha = expand_alpha_1x128 (ms); + __m128i dest = xmm_mask; + __m128i alpha_dst = unpack_32_1x128 (d); + + *dst = pack_1x128_32 + (in_over_1x128 (&ms, &alpha, &dest, &alpha_dst)); + } + + dst++; + w--; + } + + while (w >= 4) + { + __m128i xmm_src; + BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src); + + if (!is_zero (xmm_src)) + { + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; + __m128i xmm_alpha_lo, xmm_alpha_hi; + + xmm_dst = load_128_aligned ((__m128i*)dst); + + unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_alpha_lo, &xmm_alpha_hi); + + in_over_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_alpha_lo, &xmm_alpha_hi, + &xmm_mask, &xmm_mask, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned + ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + } + + dst += 4; + w -= 4; + } + + while (w) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + if (pix1) + { + uint32_t d = *dst; + + __m128i ms = unpack_32_1x128 (pix1); + __m128i alpha = expand_alpha_1x128 (ms); + __m128i dest = xmm_mask; + __m128i alpha_dst = unpack_32_1x128 (d); + + *dst = pack_1x128_32 + (in_over_1x128 (&ms, &alpha, &dest, &alpha_dst)); + } + + dst++; + w--; + } +} + +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_cover_OVER, + scaled_bilinear_scanline_sse2_8888_n_8888_OVER, + uint32_t, uint32_t, uint32_t, + COVER, FLAG_HAVE_SOLID_MASK) +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_pad_OVER, + scaled_bilinear_scanline_sse2_8888_n_8888_OVER, + uint32_t, uint32_t, uint32_t, + PAD, FLAG_HAVE_SOLID_MASK) +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_none_OVER, + scaled_bilinear_scanline_sse2_8888_n_8888_OVER, + uint32_t, uint32_t, uint32_t, + NONE, FLAG_HAVE_SOLID_MASK) +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_normal_OVER, + scaled_bilinear_scanline_sse2_8888_n_8888_OVER, + uint32_t, uint32_t, uint32_t, + NORMAL, FLAG_HAVE_SOLID_MASK) + +static const pixman_fast_path_t sse2_fast_paths[] = +{ + /* PIXMAN_OP_OVER */ + PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, sse2_composite_over_n_8_0565), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, sse2_composite_over_n_8_0565), + PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, sse2_composite_over_n_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, sse2_composite_over_n_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, null, r5g6b5, sse2_composite_over_n_0565), + PIXMAN_STD_FAST_PATH (OVER, solid, null, b5g6r5, sse2_composite_over_n_0565), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, sse2_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, sse2_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, sse2_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, sse2_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, sse2_composite_over_8888_0565), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, sse2_composite_over_8888_0565), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, sse2_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, sse2_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, sse2_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, sse2_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, a8r8g8b8, sse2_composite_over_8888_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, x8r8g8b8, sse2_composite_over_8888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, a8r8g8b8, sse2_composite_over_8888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, x8b8g8r8, sse2_composite_over_8888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, a8b8g8r8, sse2_composite_over_8888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, sse2_composite_over_x888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, sse2_composite_over_x888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, sse2_composite_over_x888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, sse2_composite_over_x888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, solid, a8r8g8b8, sse2_composite_over_x888_n_8888), + PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, solid, x8r8g8b8, sse2_composite_over_x888_n_8888), + PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, solid, a8b8g8r8, sse2_composite_over_x888_n_8888), + PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, solid, x8b8g8r8, sse2_composite_over_x888_n_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, sse2_composite_over_8888_n_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, sse2_composite_over_8888_n_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, sse2_composite_over_8888_n_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, sse2_composite_over_8888_n_8888), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, sse2_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, sse2_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, sse2_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, sse2_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, sse2_composite_over_n_8888_0565_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, sse2_composite_over_n_8888_0565_ca), + PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, a8r8g8b8, sse2_composite_over_pixbuf_8888), + PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, x8r8g8b8, sse2_composite_over_pixbuf_8888), + PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, a8b8g8r8, sse2_composite_over_pixbuf_8888), + PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, x8b8g8r8, sse2_composite_over_pixbuf_8888), + PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, r5g6b5, sse2_composite_over_pixbuf_0565), + PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, b5g6r5, sse2_composite_over_pixbuf_0565), + PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, x8r8g8b8, sse2_composite_copy_area), + PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, x8b8g8r8, sse2_composite_copy_area), + + /* PIXMAN_OP_OVER_REVERSE */ + PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, sse2_composite_over_reverse_n_8888), + PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, sse2_composite_over_reverse_n_8888), + + /* PIXMAN_OP_ADD */ + PIXMAN_STD_FAST_PATH_CA (ADD, solid, a8r8g8b8, a8r8g8b8, sse2_composite_add_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, sse2_composite_add_8_8), + PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, sse2_composite_add_8888_8888), + PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, sse2_composite_add_8888_8888), + PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, sse2_composite_add_n_8_8), + PIXMAN_STD_FAST_PATH (ADD, solid, null, a8, sse2_composite_add_n_8), + PIXMAN_STD_FAST_PATH (ADD, solid, null, x8r8g8b8, sse2_composite_add_n_8888), + PIXMAN_STD_FAST_PATH (ADD, solid, null, a8r8g8b8, sse2_composite_add_n_8888), + PIXMAN_STD_FAST_PATH (ADD, solid, null, x8b8g8r8, sse2_composite_add_n_8888), + PIXMAN_STD_FAST_PATH (ADD, solid, null, a8b8g8r8, sse2_composite_add_n_8888), + PIXMAN_STD_FAST_PATH (ADD, solid, a8, x8r8g8b8, sse2_composite_add_n_8_8888), + PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8r8g8b8, sse2_composite_add_n_8_8888), + PIXMAN_STD_FAST_PATH (ADD, solid, a8, x8b8g8r8, sse2_composite_add_n_8_8888), + PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8b8g8r8, sse2_composite_add_n_8_8888), + + /* PIXMAN_OP_SRC */ + PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8r8g8b8, sse2_composite_src_n_8_8888), + PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8r8g8b8, sse2_composite_src_n_8_8888), + PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8b8g8r8, sse2_composite_src_n_8_8888), + PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8b8g8r8, sse2_composite_src_n_8_8888), + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, sse2_composite_src_x888_0565), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, sse2_composite_src_x888_0565), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, sse2_composite_src_x888_0565), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, sse2_composite_src_x888_0565), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, sse2_composite_src_x888_8888), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, sse2_composite_src_x888_8888), + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, sse2_composite_copy_area), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, sse2_composite_copy_area), + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, sse2_composite_copy_area), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, sse2_composite_copy_area), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, sse2_composite_copy_area), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, sse2_composite_copy_area), + PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, sse2_composite_copy_area), + PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, sse2_composite_copy_area), + + /* PIXMAN_OP_IN */ + PIXMAN_STD_FAST_PATH (IN, a8, null, a8, sse2_composite_in_8_8), + PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, sse2_composite_in_n_8_8), + PIXMAN_STD_FAST_PATH (IN, solid, null, a8, sse2_composite_in_n_8), + + SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888), + SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888), + SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888), + SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888), + SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888), + SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888), + SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888), + SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888), + SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888), + SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888), + SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888), + SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888), + SIMPLE_NEAREST_FAST_PATH_NORMAL (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888), + SIMPLE_NEAREST_FAST_PATH_NORMAL (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888), + SIMPLE_NEAREST_FAST_PATH_NORMAL (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888), + SIMPLE_NEAREST_FAST_PATH_NORMAL (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888), + + SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_n_8888), + SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_n_8888), + SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_n_8888), + SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_n_8888), + SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_n_8888), + SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_n_8888), + SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_n_8888), + SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_n_8888), + + SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, sse2_8888_8888), + SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, sse2_8888_8888), + SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, sse2_8888_8888), + SIMPLE_BILINEAR_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, sse2_8888_8888), + SIMPLE_BILINEAR_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, sse2_8888_8888), + SIMPLE_BILINEAR_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, sse2_8888_8888), + + SIMPLE_BILINEAR_FAST_PATH_COVER (SRC, x8r8g8b8, a8r8g8b8, sse2_x888_8888), + SIMPLE_BILINEAR_FAST_PATH_COVER (SRC, x8b8g8r8, a8b8g8r8, sse2_x888_8888), + SIMPLE_BILINEAR_FAST_PATH_PAD (SRC, x8r8g8b8, a8r8g8b8, sse2_x888_8888), + SIMPLE_BILINEAR_FAST_PATH_PAD (SRC, x8b8g8r8, a8b8g8r8, sse2_x888_8888), + SIMPLE_BILINEAR_FAST_PATH_NORMAL (SRC, x8r8g8b8, a8r8g8b8, sse2_x888_8888), + SIMPLE_BILINEAR_FAST_PATH_NORMAL (SRC, x8b8g8r8, a8b8g8r8, sse2_x888_8888), + + SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888), + SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888), + SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888), + SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888), + + SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_n_8888), + SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_n_8888), + SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_n_8888), + SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_n_8888), + + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8_8888), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8_8888), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8_8888), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8_8888), + + { PIXMAN_OP_NONE }, +}; + +static uint32_t * +sse2_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask) +{ + int w = iter->width; + __m128i ff000000 = mask_ff000000; + uint32_t *dst = iter->buffer; + uint32_t *src = (uint32_t *)iter->bits; + + iter->bits += iter->stride; + + while (w && ((uintptr_t)dst) & 0x0f) + { + *dst++ = (*src++) | 0xff000000; + w--; + } + + while (w >= 4) + { + save_128_aligned ( + (__m128i *)dst, _mm_or_si128 ( + load_128_unaligned ((__m128i *)src), ff000000)); + + dst += 4; + src += 4; + w -= 4; + } + + while (w) + { + *dst++ = (*src++) | 0xff000000; + w--; + } + + return iter->buffer; +} + +static uint32_t * +sse2_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask) +{ + int w = iter->width; + uint32_t *dst = iter->buffer; + uint16_t *src = (uint16_t *)iter->bits; + __m128i ff000000 = mask_ff000000; + + iter->bits += iter->stride; + + while (w && ((uintptr_t)dst) & 0x0f) + { + uint16_t s = *src++; + + *dst++ = convert_0565_to_8888 (s); + w--; + } + + while (w >= 8) + { + __m128i lo, hi, s; + + s = _mm_loadu_si128 ((__m128i *)src); + + lo = unpack_565_to_8888 (_mm_unpacklo_epi16 (s, _mm_setzero_si128 ())); + hi = unpack_565_to_8888 (_mm_unpackhi_epi16 (s, _mm_setzero_si128 ())); + + save_128_aligned ((__m128i *)(dst + 0), _mm_or_si128 (lo, ff000000)); + save_128_aligned ((__m128i *)(dst + 4), _mm_or_si128 (hi, ff000000)); + + dst += 8; + src += 8; + w -= 8; + } + + while (w) + { + uint16_t s = *src++; + + *dst++ = convert_0565_to_8888 (s); + w--; + } + + return iter->buffer; +} + +static uint32_t * +sse2_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask) +{ + int w = iter->width; + uint32_t *dst = iter->buffer; + uint8_t *src = iter->bits; + __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6; + + iter->bits += iter->stride; + + while (w && (((uintptr_t)dst) & 15)) + { + *dst++ = *(src++) << 24; + w--; + } + + while (w >= 16) + { + xmm0 = _mm_loadu_si128((__m128i *)src); + + xmm1 = _mm_unpacklo_epi8 (_mm_setzero_si128(), xmm0); + xmm2 = _mm_unpackhi_epi8 (_mm_setzero_si128(), xmm0); + xmm3 = _mm_unpacklo_epi16 (_mm_setzero_si128(), xmm1); + xmm4 = _mm_unpackhi_epi16 (_mm_setzero_si128(), xmm1); + xmm5 = _mm_unpacklo_epi16 (_mm_setzero_si128(), xmm2); + xmm6 = _mm_unpackhi_epi16 (_mm_setzero_si128(), xmm2); + + _mm_store_si128(((__m128i *)(dst + 0)), xmm3); + _mm_store_si128(((__m128i *)(dst + 4)), xmm4); + _mm_store_si128(((__m128i *)(dst + 8)), xmm5); + _mm_store_si128(((__m128i *)(dst + 12)), xmm6); + + dst += 16; + src += 16; + w -= 16; + } + + while (w) + { + *dst++ = *(src++) << 24; + w--; + } + + return iter->buffer; +} + +#define IMAGE_FLAGS \ + (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \ + FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST) + +static const pixman_iter_info_t sse2_iters[] = +{ + { PIXMAN_x8r8g8b8, IMAGE_FLAGS, ITER_NARROW, + _pixman_iter_init_bits_stride, sse2_fetch_x8r8g8b8, NULL + }, + { PIXMAN_r5g6b5, IMAGE_FLAGS, ITER_NARROW, + _pixman_iter_init_bits_stride, sse2_fetch_r5g6b5, NULL + }, + { PIXMAN_a8, IMAGE_FLAGS, ITER_NARROW, + _pixman_iter_init_bits_stride, sse2_fetch_a8, NULL + }, + { PIXMAN_null }, +}; + +#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__) +__attribute__((__force_align_arg_pointer__)) +#endif +pixman_implementation_t * +_pixman_implementation_create_sse2 (pixman_implementation_t *fallback) +{ + pixman_implementation_t *imp = _pixman_implementation_create (fallback, sse2_fast_paths); + + /* SSE2 constants */ + mask_565_r = create_mask_2x32_128 (0x00f80000, 0x00f80000); + mask_565_g1 = create_mask_2x32_128 (0x00070000, 0x00070000); + mask_565_g2 = create_mask_2x32_128 (0x000000e0, 0x000000e0); + mask_565_b = create_mask_2x32_128 (0x0000001f, 0x0000001f); + mask_red = create_mask_2x32_128 (0x00f80000, 0x00f80000); + mask_green = create_mask_2x32_128 (0x0000fc00, 0x0000fc00); + mask_blue = create_mask_2x32_128 (0x000000f8, 0x000000f8); + mask_565_fix_rb = create_mask_2x32_128 (0x00e000e0, 0x00e000e0); + mask_565_fix_g = create_mask_2x32_128 (0x0000c000, 0x0000c000); + mask_0080 = create_mask_16_128 (0x0080); + mask_00ff = create_mask_16_128 (0x00ff); + mask_0101 = create_mask_16_128 (0x0101); + mask_ffff = create_mask_16_128 (0xffff); + mask_ff000000 = create_mask_2x32_128 (0xff000000, 0xff000000); + mask_alpha = create_mask_2x32_128 (0x00ff0000, 0x00000000); + mask_565_rb = create_mask_2x32_128 (0x00f800f8, 0x00f800f8); + mask_565_pack_multiplier = create_mask_2x32_128 (0x20000004, 0x20000004); + + /* Set up function pointers */ + imp->combine_32[PIXMAN_OP_OVER] = sse2_combine_over_u; + imp->combine_32[PIXMAN_OP_OVER_REVERSE] = sse2_combine_over_reverse_u; + imp->combine_32[PIXMAN_OP_IN] = sse2_combine_in_u; + imp->combine_32[PIXMAN_OP_IN_REVERSE] = sse2_combine_in_reverse_u; + imp->combine_32[PIXMAN_OP_OUT] = sse2_combine_out_u; + imp->combine_32[PIXMAN_OP_OUT_REVERSE] = sse2_combine_out_reverse_u; + imp->combine_32[PIXMAN_OP_ATOP] = sse2_combine_atop_u; + imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = sse2_combine_atop_reverse_u; + imp->combine_32[PIXMAN_OP_XOR] = sse2_combine_xor_u; + imp->combine_32[PIXMAN_OP_ADD] = sse2_combine_add_u; + + imp->combine_32[PIXMAN_OP_SATURATE] = sse2_combine_saturate_u; + + imp->combine_32_ca[PIXMAN_OP_SRC] = sse2_combine_src_ca; + imp->combine_32_ca[PIXMAN_OP_OVER] = sse2_combine_over_ca; + imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = sse2_combine_over_reverse_ca; + imp->combine_32_ca[PIXMAN_OP_IN] = sse2_combine_in_ca; + imp->combine_32_ca[PIXMAN_OP_IN_REVERSE] = sse2_combine_in_reverse_ca; + imp->combine_32_ca[PIXMAN_OP_OUT] = sse2_combine_out_ca; + imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = sse2_combine_out_reverse_ca; + imp->combine_32_ca[PIXMAN_OP_ATOP] = sse2_combine_atop_ca; + imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = sse2_combine_atop_reverse_ca; + imp->combine_32_ca[PIXMAN_OP_XOR] = sse2_combine_xor_ca; + imp->combine_32_ca[PIXMAN_OP_ADD] = sse2_combine_add_ca; + + imp->blt = sse2_blt; + imp->fill = sse2_fill; + + imp->iter_info = sse2_iters; + + return imp; +} diff --git a/src/pixman/pixman/pixman-ssse3.c b/src/pixman/pixman/pixman-ssse3.c new file mode 100644 index 0000000..680d6b9 --- /dev/null +++ b/src/pixman/pixman/pixman-ssse3.c @@ -0,0 +1,351 @@ +/* + * Copyright © 2013 Soren Sandmann Pedersen + * Copyright © 2013 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Author: Soren Sandmann (soren.sandmann@gmail.com) + */ +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <stdlib.h> +#include <mmintrin.h> +#include <xmmintrin.h> +#include <emmintrin.h> +#include <tmmintrin.h> +#include "pixman-private.h" +#include "pixman-inlines.h" + +typedef struct +{ + int y; + uint64_t * buffer; +} line_t; + +typedef struct +{ + line_t lines[2]; + pixman_fixed_t y; + pixman_fixed_t x; + uint64_t data[1]; +} bilinear_info_t; + +static void +ssse3_fetch_horizontal (bits_image_t *image, line_t *line, + int y, pixman_fixed_t x, pixman_fixed_t ux, int n) +{ + uint32_t *bits = image->bits + y * image->rowstride; + __m128i vx = _mm_set_epi16 ( + - (x + 1), x, - (x + 1), x, + - (x + ux + 1), x + ux, - (x + ux + 1), x + ux); + __m128i vux = _mm_set_epi16 ( + - 2 * ux, 2 * ux, - 2 * ux, 2 * ux, + - 2 * ux, 2 * ux, - 2 * ux, 2 * ux); + __m128i vaddc = _mm_set_epi16 (1, 0, 1, 0, 1, 0, 1, 0); + __m128i *b = (__m128i *)line->buffer; + __m128i vrl0, vrl1; + + while ((n -= 2) >= 0) + { + __m128i vw, vr, s; + + vrl1 = _mm_loadl_epi64 ( + (__m128i *)(bits + pixman_fixed_to_int (x + ux))); + /* vrl1: R1, L1 */ + + final_pixel: + vrl0 = _mm_loadl_epi64 ( + (__m128i *)(bits + pixman_fixed_to_int (x))); + /* vrl0: R0, L0 */ + + /* The weights are based on vx which is a vector of + * + * - (x + 1), x, - (x + 1), x, + * - (x + ux + 1), x + ux, - (x + ux + 1), x + ux + * + * so the 16 bit weights end up like this: + * + * iw0, w0, iw0, w0, iw1, w1, iw1, w1 + * + * and after shifting and packing, we get these bytes: + * + * iw0, w0, iw0, w0, iw1, w1, iw1, w1, + * iw0, w0, iw0, w0, iw1, w1, iw1, w1, + * + * which means the first and the second input pixel + * have to be interleaved like this: + * + * la0, ra0, lr0, rr0, la1, ra1, lr1, rr1, + * lg0, rg0, lb0, rb0, lg1, rg1, lb1, rb1 + * + * before maddubsw can be used. + */ + + vw = _mm_add_epi16 ( + vaddc, _mm_srli_epi16 (vx, 16 - BILINEAR_INTERPOLATION_BITS)); + /* vw: iw0, w0, iw0, w0, iw1, w1, iw1, w1 + */ + + vw = _mm_packus_epi16 (vw, vw); + /* vw: iw0, w0, iw0, w0, iw1, w1, iw1, w1, + * iw0, w0, iw0, w0, iw1, w1, iw1, w1 + */ + vx = _mm_add_epi16 (vx, vux); + + x += 2 * ux; + + vr = _mm_unpacklo_epi16 (vrl1, vrl0); + /* vr: rar0, rar1, rgb0, rgb1, lar0, lar1, lgb0, lgb1 */ + + s = _mm_shuffle_epi32 (vr, _MM_SHUFFLE (1, 0, 3, 2)); + /* s: lar0, lar1, lgb0, lgb1, rar0, rar1, rgb0, rgb1 */ + + vr = _mm_unpackhi_epi8 (vr, s); + /* vr: la0, ra0, lr0, rr0, la1, ra1, lr1, rr1, + * lg0, rg0, lb0, rb0, lg1, rg1, lb1, rb1 + */ + + vr = _mm_maddubs_epi16 (vr, vw); + + /* When the weight is 0, the inverse weight is + * 128 which can't be represented in a signed byte. + * As a result maddubsw computes the following: + * + * r = l * -128 + r * 0 + * + * rather than the desired + * + * r = l * 128 + r * 0 + * + * We fix this by taking the absolute value of the + * result. + */ + vr = _mm_abs_epi16 (vr); + + /* vr: A0, R0, A1, R1, G0, B0, G1, B1 */ + _mm_store_si128 (b++, vr); + } + + if (n == -1) + { + vrl1 = _mm_setzero_si128(); + goto final_pixel; + } + + line->y = y; +} + +static uint32_t * +ssse3_fetch_bilinear_cover (pixman_iter_t *iter, const uint32_t *mask) +{ + pixman_fixed_t fx, ux; + bilinear_info_t *info = iter->data; + line_t *line0, *line1; + int y0, y1; + int32_t dist_y; + __m128i vw; + int i; + + fx = info->x; + ux = iter->image->common.transform->matrix[0][0]; + + y0 = pixman_fixed_to_int (info->y); + y1 = y0 + 1; + + line0 = &info->lines[y0 & 0x01]; + line1 = &info->lines[y1 & 0x01]; + + if (line0->y != y0) + { + ssse3_fetch_horizontal ( + &iter->image->bits, line0, y0, fx, ux, iter->width); + } + + if (line1->y != y1) + { + ssse3_fetch_horizontal ( + &iter->image->bits, line1, y1, fx, ux, iter->width); + } + + dist_y = pixman_fixed_to_bilinear_weight (info->y); + dist_y <<= (16 - BILINEAR_INTERPOLATION_BITS); + + vw = _mm_set_epi16 ( + dist_y, dist_y, dist_y, dist_y, dist_y, dist_y, dist_y, dist_y); + + for (i = 0; i + 3 < iter->width; i += 4) + { + __m128i top0 = _mm_load_si128 ((__m128i *)(line0->buffer + i)); + __m128i bot0 = _mm_load_si128 ((__m128i *)(line1->buffer + i)); + __m128i top1 = _mm_load_si128 ((__m128i *)(line0->buffer + i + 2)); + __m128i bot1 = _mm_load_si128 ((__m128i *)(line1->buffer + i + 2)); + __m128i r0, r1, tmp, p; + + r0 = _mm_mulhi_epu16 ( + _mm_sub_epi16 (bot0, top0), vw); + tmp = _mm_cmplt_epi16 (bot0, top0); + tmp = _mm_and_si128 (tmp, vw); + r0 = _mm_sub_epi16 (r0, tmp); + r0 = _mm_add_epi16 (r0, top0); + r0 = _mm_srli_epi16 (r0, BILINEAR_INTERPOLATION_BITS); + /* r0: A0 R0 A1 R1 G0 B0 G1 B1 */ + r0 = _mm_shuffle_epi32 (r0, _MM_SHUFFLE (2, 0, 3, 1)); + /* r0: A1 R1 G1 B1 A0 R0 G0 B0 */ + + r1 = _mm_mulhi_epu16 ( + _mm_sub_epi16 (bot1, top1), vw); + tmp = _mm_cmplt_epi16 (bot1, top1); + tmp = _mm_and_si128 (tmp, vw); + r1 = _mm_sub_epi16 (r1, tmp); + r1 = _mm_add_epi16 (r1, top1); + r1 = _mm_srli_epi16 (r1, BILINEAR_INTERPOLATION_BITS); + r1 = _mm_shuffle_epi32 (r1, _MM_SHUFFLE (2, 0, 3, 1)); + /* r1: A3 R3 G3 B3 A2 R2 G2 B2 */ + + p = _mm_packus_epi16 (r0, r1); + + _mm_storeu_si128 ((__m128i *)(iter->buffer + i), p); + } + + while (i < iter->width) + { + __m128i top0 = _mm_load_si128 ((__m128i *)(line0->buffer + i)); + __m128i bot0 = _mm_load_si128 ((__m128i *)(line1->buffer + i)); + __m128i r0, tmp, p; + + r0 = _mm_mulhi_epu16 ( + _mm_sub_epi16 (bot0, top0), vw); + tmp = _mm_cmplt_epi16 (bot0, top0); + tmp = _mm_and_si128 (tmp, vw); + r0 = _mm_sub_epi16 (r0, tmp); + r0 = _mm_add_epi16 (r0, top0); + r0 = _mm_srli_epi16 (r0, BILINEAR_INTERPOLATION_BITS); + /* r0: A0 R0 A1 R1 G0 B0 G1 B1 */ + r0 = _mm_shuffle_epi32 (r0, _MM_SHUFFLE (2, 0, 3, 1)); + /* r0: A1 R1 G1 B1 A0 R0 G0 B0 */ + + p = _mm_packus_epi16 (r0, r0); + + if (iter->width - i == 1) + { + *(uint32_t *)(iter->buffer + i) = _mm_cvtsi128_si32 (p); + i++; + } + else + { + _mm_storel_epi64 ((__m128i *)(iter->buffer + i), p); + i += 2; + } + } + + info->y += iter->image->common.transform->matrix[1][1]; + + return iter->buffer; +} + +static void +ssse3_bilinear_cover_iter_fini (pixman_iter_t *iter) +{ + free (iter->data); +} + +static void +ssse3_bilinear_cover_iter_init (pixman_iter_t *iter, const pixman_iter_info_t *iter_info) +{ + int width = iter->width; + bilinear_info_t *info; + pixman_vector_t v; + + /* Reference point is the center of the pixel */ + v.vector[0] = pixman_int_to_fixed (iter->x) + pixman_fixed_1 / 2; + v.vector[1] = pixman_int_to_fixed (iter->y) + pixman_fixed_1 / 2; + v.vector[2] = pixman_fixed_1; + + if (!pixman_transform_point_3d (iter->image->common.transform, &v)) + goto fail; + + info = malloc (sizeof (*info) + (2 * width - 1) * sizeof (uint64_t) + 64); + if (!info) + goto fail; + + info->x = v.vector[0] - pixman_fixed_1 / 2; + info->y = v.vector[1] - pixman_fixed_1 / 2; + +#define ALIGN(addr) \ + ((void *)((((uintptr_t)(addr)) + 15) & (~15))) + + /* It is safe to set the y coordinates to -1 initially + * because COVER_CLIP_BILINEAR ensures that we will only + * be asked to fetch lines in the [0, height) interval + */ + info->lines[0].y = -1; + info->lines[0].buffer = ALIGN (&(info->data[0])); + info->lines[1].y = -1; + info->lines[1].buffer = ALIGN (info->lines[0].buffer + width); + + iter->get_scanline = ssse3_fetch_bilinear_cover; + iter->fini = ssse3_bilinear_cover_iter_fini; + + iter->data = info; + return; + +fail: + /* Something went wrong, either a bad matrix or OOM; in such cases, + * we don't guarantee any particular rendering. + */ + _pixman_log_error ( + FUNC, "Allocation failure or bad matrix, skipping rendering\n"); + + iter->get_scanline = _pixman_iter_get_scanline_noop; + iter->fini = NULL; +} + +static const pixman_iter_info_t ssse3_iters[] = +{ + { PIXMAN_a8r8g8b8, + (FAST_PATH_STANDARD_FLAGS | + FAST_PATH_SCALE_TRANSFORM | + FAST_PATH_BILINEAR_FILTER | + FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR), + ITER_NARROW | ITER_SRC, + ssse3_bilinear_cover_iter_init, + NULL, NULL + }, + + { PIXMAN_null }, +}; + +static const pixman_fast_path_t ssse3_fast_paths[] = +{ + { PIXMAN_OP_NONE }, +}; + +pixman_implementation_t * +_pixman_implementation_create_ssse3 (pixman_implementation_t *fallback) +{ + pixman_implementation_t *imp = + _pixman_implementation_create (fallback, ssse3_fast_paths); + + imp->iter_info = ssse3_iters; + + return imp; +} diff --git a/src/pixman/pixman/pixman-timer.c b/src/pixman/pixman/pixman-timer.c new file mode 100644 index 0000000..f5ae18e --- /dev/null +++ b/src/pixman/pixman/pixman-timer.c @@ -0,0 +1,66 @@ +/* + * Copyright © 2007 Red Hat, Inc. + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of Red Hat not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. Red Hat makes no representations about the + * suitability of this software for any purpose. It is provided "as is" + * without express or implied warranty. + * + * RED HAT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL RED HAT + * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <stdlib.h> +#include <stdio.h> +#include "pixman-private.h" + +#ifdef PIXMAN_TIMERS + +static pixman_timer_t *timers; + +static void +dump_timers (void) +{ + pixman_timer_t *timer; + + for (timer = timers; timer != NULL; timer = timer->next) + { + printf ("%s: total: %llu n: %llu avg: %f\n", + timer->name, + timer->total, + timer->n_times, + timer->total / (double)timer->n_times); + } +} + +void +pixman_timer_register (pixman_timer_t *timer) +{ + static int initialized; + + int atexit (void (*function)(void)); + + if (!initialized) + { + atexit (dump_timers); + initialized = 1; + } + + timer->next = timers; + timers = timer; +} + +#endif diff --git a/src/pixman/pixman/pixman-trap.c b/src/pixman/pixman/pixman-trap.c new file mode 100644 index 0000000..91766fd --- /dev/null +++ b/src/pixman/pixman/pixman-trap.c @@ -0,0 +1,711 @@ +/* + * Copyright © 2002 Keith Packard, member of The XFree86 Project, Inc. + * Copyright © 2004 Keith Packard + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of Keith Packard not be used in + * advertising or publicity pertaining to distribution of the software without + * specific, written prior permission. Keith Packard makes no + * representations about the suitability of this software for any purpose. It + * is provided "as is" without express or implied warranty. + * + * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * PERFORMANCE OF THIS SOFTWARE. + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <stdio.h> +#include <stdlib.h> +#include "pixman-private.h" + +/* + * Compute the smallest value greater than or equal to y which is on a + * grid row. + */ + +PIXMAN_EXPORT pixman_fixed_t +pixman_sample_ceil_y (pixman_fixed_t y, int n) +{ + pixman_fixed_t f = pixman_fixed_frac (y); + pixman_fixed_t i = pixman_fixed_floor (y); + + f = DIV (f - Y_FRAC_FIRST (n) + (STEP_Y_SMALL (n) - pixman_fixed_e), STEP_Y_SMALL (n)) * STEP_Y_SMALL (n) + + Y_FRAC_FIRST (n); + + if (f > Y_FRAC_LAST (n)) + { + if (pixman_fixed_to_int (i) == 0x7fff) + { + f = 0xffff; /* saturate */ + } + else + { + f = Y_FRAC_FIRST (n); + i += pixman_fixed_1; + } + } + return (i | f); +} + +/* + * Compute the largest value strictly less than y which is on a + * grid row. + */ +PIXMAN_EXPORT pixman_fixed_t +pixman_sample_floor_y (pixman_fixed_t y, + int n) +{ + pixman_fixed_t f = pixman_fixed_frac (y); + pixman_fixed_t i = pixman_fixed_floor (y); + + f = DIV (f - pixman_fixed_e - Y_FRAC_FIRST (n), STEP_Y_SMALL (n)) * STEP_Y_SMALL (n) + + Y_FRAC_FIRST (n); + + if (f < Y_FRAC_FIRST (n)) + { + if (pixman_fixed_to_int (i) == 0x8000) + { + f = 0; /* saturate */ + } + else + { + f = Y_FRAC_LAST (n); + i -= pixman_fixed_1; + } + } + return (i | f); +} + +/* + * Step an edge by any amount (including negative values) + */ +PIXMAN_EXPORT void +pixman_edge_step (pixman_edge_t *e, + int n) +{ + pixman_fixed_48_16_t ne; + + e->x += n * e->stepx; + + ne = e->e + n * (pixman_fixed_48_16_t) e->dx; + + if (n >= 0) + { + if (ne > 0) + { + int nx = (ne + e->dy - 1) / e->dy; + e->e = ne - nx * (pixman_fixed_48_16_t) e->dy; + e->x += nx * e->signdx; + } + } + else + { + if (ne <= -e->dy) + { + int nx = (-ne) / e->dy; + e->e = ne + nx * (pixman_fixed_48_16_t) e->dy; + e->x -= nx * e->signdx; + } + } +} + +/* + * A private routine to initialize the multi-step + * elements of an edge structure + */ +static void +_pixman_edge_multi_init (pixman_edge_t * e, + int n, + pixman_fixed_t *stepx_p, + pixman_fixed_t *dx_p) +{ + pixman_fixed_t stepx; + pixman_fixed_48_16_t ne; + + ne = n * (pixman_fixed_48_16_t) e->dx; + stepx = n * e->stepx; + + if (ne > 0) + { + int nx = ne / e->dy; + ne -= nx * (pixman_fixed_48_16_t)e->dy; + stepx += nx * e->signdx; + } + + *dx_p = ne; + *stepx_p = stepx; +} + +/* + * Initialize one edge structure given the line endpoints and a + * starting y value + */ +PIXMAN_EXPORT void +pixman_edge_init (pixman_edge_t *e, + int n, + pixman_fixed_t y_start, + pixman_fixed_t x_top, + pixman_fixed_t y_top, + pixman_fixed_t x_bot, + pixman_fixed_t y_bot) +{ + pixman_fixed_t dx, dy; + + e->x = x_top; + e->e = 0; + dx = x_bot - x_top; + dy = y_bot - y_top; + e->dy = dy; + e->dx = 0; + + if (dy) + { + if (dx >= 0) + { + e->signdx = 1; + e->stepx = dx / dy; + e->dx = dx % dy; + e->e = -dy; + } + else + { + e->signdx = -1; + e->stepx = -(-dx / dy); + e->dx = -dx % dy; + e->e = 0; + } + + _pixman_edge_multi_init (e, STEP_Y_SMALL (n), + &e->stepx_small, &e->dx_small); + + _pixman_edge_multi_init (e, STEP_Y_BIG (n), + &e->stepx_big, &e->dx_big); + } + pixman_edge_step (e, y_start - y_top); +} + +/* + * Initialize one edge structure given a line, starting y value + * and a pixel offset for the line + */ +PIXMAN_EXPORT void +pixman_line_fixed_edge_init (pixman_edge_t * e, + int n, + pixman_fixed_t y, + const pixman_line_fixed_t *line, + int x_off, + int y_off) +{ + pixman_fixed_t x_off_fixed = pixman_int_to_fixed (x_off); + pixman_fixed_t y_off_fixed = pixman_int_to_fixed (y_off); + const pixman_point_fixed_t *top, *bot; + + if (line->p1.y <= line->p2.y) + { + top = &line->p1; + bot = &line->p2; + } + else + { + top = &line->p2; + bot = &line->p1; + } + + pixman_edge_init (e, n, y, + top->x + x_off_fixed, + top->y + y_off_fixed, + bot->x + x_off_fixed, + bot->y + y_off_fixed); +} + +PIXMAN_EXPORT void +pixman_add_traps (pixman_image_t * image, + int16_t x_off, + int16_t y_off, + int ntrap, + const pixman_trap_t *traps) +{ + int bpp; + int height; + + pixman_fixed_t x_off_fixed; + pixman_fixed_t y_off_fixed; + pixman_edge_t l, r; + pixman_fixed_t t, b; + + _pixman_image_validate (image); + + height = image->bits.height; + bpp = PIXMAN_FORMAT_BPP (image->bits.format); + + x_off_fixed = pixman_int_to_fixed (x_off); + y_off_fixed = pixman_int_to_fixed (y_off); + + while (ntrap--) + { + t = traps->top.y + y_off_fixed; + if (t < 0) + t = 0; + t = pixman_sample_ceil_y (t, bpp); + + b = traps->bot.y + y_off_fixed; + if (pixman_fixed_to_int (b) >= height) + b = pixman_int_to_fixed (height) - 1; + b = pixman_sample_floor_y (b, bpp); + + if (b >= t) + { + /* initialize edge walkers */ + pixman_edge_init (&l, bpp, t, + traps->top.l + x_off_fixed, + traps->top.y + y_off_fixed, + traps->bot.l + x_off_fixed, + traps->bot.y + y_off_fixed); + + pixman_edge_init (&r, bpp, t, + traps->top.r + x_off_fixed, + traps->top.y + y_off_fixed, + traps->bot.r + x_off_fixed, + traps->bot.y + y_off_fixed); + + pixman_rasterize_edges (image, &l, &r, t, b); + } + + traps++; + } +} + +#if 0 +static void +dump_image (pixman_image_t *image, + const char * title) +{ + int i, j; + + if (!image->type == BITS) + printf ("%s is not a regular image\n", title); + + if (!image->bits.format == PIXMAN_a8) + printf ("%s is not an alpha mask\n", title); + + printf ("\n\n\n%s: \n", title); + + for (i = 0; i < image->bits.height; ++i) + { + uint8_t *line = + (uint8_t *)&(image->bits.bits[i * image->bits.rowstride]); + + for (j = 0; j < image->bits.width; ++j) + printf ("%c", line[j] ? '#' : ' '); + + printf ("\n"); + } +} +#endif + +PIXMAN_EXPORT void +pixman_add_trapezoids (pixman_image_t * image, + int16_t x_off, + int y_off, + int ntraps, + const pixman_trapezoid_t *traps) +{ + int i; + +#if 0 + dump_image (image, "before"); +#endif + + for (i = 0; i < ntraps; ++i) + { + const pixman_trapezoid_t *trap = &(traps[i]); + + if (!pixman_trapezoid_valid (trap)) + continue; + + pixman_rasterize_trapezoid (image, trap, x_off, y_off); + } + +#if 0 + dump_image (image, "after"); +#endif +} + +PIXMAN_EXPORT void +pixman_rasterize_trapezoid (pixman_image_t * image, + const pixman_trapezoid_t *trap, + int x_off, + int y_off) +{ + int bpp; + int height; + + pixman_fixed_t y_off_fixed; + pixman_edge_t l, r; + pixman_fixed_t t, b; + + return_if_fail (image->type == BITS); + + _pixman_image_validate (image); + + if (!pixman_trapezoid_valid (trap)) + return; + + height = image->bits.height; + bpp = PIXMAN_FORMAT_BPP (image->bits.format); + + y_off_fixed = pixman_int_to_fixed (y_off); + + t = trap->top + y_off_fixed; + if (t < 0) + t = 0; + t = pixman_sample_ceil_y (t, bpp); + + b = trap->bottom + y_off_fixed; + if (pixman_fixed_to_int (b) >= height) + b = pixman_int_to_fixed (height) - 1; + b = pixman_sample_floor_y (b, bpp); + + if (b >= t) + { + /* initialize edge walkers */ + pixman_line_fixed_edge_init (&l, bpp, t, &trap->left, x_off, y_off); + pixman_line_fixed_edge_init (&r, bpp, t, &trap->right, x_off, y_off); + + pixman_rasterize_edges (image, &l, &r, t, b); + } +} + +static const pixman_bool_t zero_src_has_no_effect[PIXMAN_N_OPERATORS] = +{ + FALSE, /* Clear 0 0 */ + FALSE, /* Src 1 0 */ + TRUE, /* Dst 0 1 */ + TRUE, /* Over 1 1-Aa */ + TRUE, /* OverReverse 1-Ab 1 */ + FALSE, /* In Ab 0 */ + FALSE, /* InReverse 0 Aa */ + FALSE, /* Out 1-Ab 0 */ + TRUE, /* OutReverse 0 1-Aa */ + TRUE, /* Atop Ab 1-Aa */ + FALSE, /* AtopReverse 1-Ab Aa */ + TRUE, /* Xor 1-Ab 1-Aa */ + TRUE, /* Add 1 1 */ +}; + +static pixman_bool_t +get_trap_extents (pixman_op_t op, pixman_image_t *dest, + const pixman_trapezoid_t *traps, int n_traps, + pixman_box32_t *box) +{ + int i; + + /* When the operator is such that a zero source has an + * effect on the underlying image, we have to + * composite across the entire destination + */ + if (!zero_src_has_no_effect [op]) + { + box->x1 = 0; + box->y1 = 0; + box->x2 = dest->bits.width; + box->y2 = dest->bits.height; + return TRUE; + } + + box->x1 = INT32_MAX; + box->y1 = INT32_MAX; + box->x2 = INT32_MIN; + box->y2 = INT32_MIN; + + for (i = 0; i < n_traps; ++i) + { + const pixman_trapezoid_t *trap = &(traps[i]); + int y1, y2; + + if (!pixman_trapezoid_valid (trap)) + continue; + + y1 = pixman_fixed_to_int (trap->top); + if (y1 < box->y1) + box->y1 = y1; + + y2 = pixman_fixed_to_int (pixman_fixed_ceil (trap->bottom)); + if (y2 > box->y2) + box->y2 = y2; + +#define EXTEND_MIN(x) \ + if (pixman_fixed_to_int ((x)) < box->x1) \ + box->x1 = pixman_fixed_to_int ((x)); +#define EXTEND_MAX(x) \ + if (pixman_fixed_to_int (pixman_fixed_ceil ((x))) > box->x2) \ + box->x2 = pixman_fixed_to_int (pixman_fixed_ceil ((x))); + +#define EXTEND(x) \ + EXTEND_MIN(x); \ + EXTEND_MAX(x); + + EXTEND(trap->left.p1.x); + EXTEND(trap->left.p2.x); + EXTEND(trap->right.p1.x); + EXTEND(trap->right.p2.x); + } + + if (box->x1 >= box->x2 || box->y1 >= box->y2) + return FALSE; + + return TRUE; +} + +/* + * pixman_composite_trapezoids() + * + * All the trapezoids are conceptually rendered to an infinitely big image. + * The (0, 0) coordinates of this image are then aligned with the (x, y) + * coordinates of the source image, and then both images are aligned with + * the (x, y) coordinates of the destination. Then these three images are + * composited across the entire destination. + */ +PIXMAN_EXPORT void +pixman_composite_trapezoids (pixman_op_t op, + pixman_image_t * src, + pixman_image_t * dst, + pixman_format_code_t mask_format, + int x_src, + int y_src, + int x_dst, + int y_dst, + int n_traps, + const pixman_trapezoid_t * traps) +{ + int i; + + return_if_fail (PIXMAN_FORMAT_TYPE (mask_format) == PIXMAN_TYPE_A); + + if (n_traps <= 0) + return; + + _pixman_image_validate (src); + _pixman_image_validate (dst); + + if (op == PIXMAN_OP_ADD && + (src->common.flags & FAST_PATH_IS_OPAQUE) && + (mask_format == dst->common.extended_format_code) && + !(dst->common.have_clip_region)) + { + for (i = 0; i < n_traps; ++i) + { + const pixman_trapezoid_t *trap = &(traps[i]); + + if (!pixman_trapezoid_valid (trap)) + continue; + + pixman_rasterize_trapezoid (dst, trap, x_dst, y_dst); + } + } + else + { + pixman_image_t *tmp; + pixman_box32_t box; + int i; + + if (!get_trap_extents (op, dst, traps, n_traps, &box)) + return; + + if (!(tmp = pixman_image_create_bits ( + mask_format, box.x2 - box.x1, box.y2 - box.y1, NULL, -1))) + return; + + for (i = 0; i < n_traps; ++i) + { + const pixman_trapezoid_t *trap = &(traps[i]); + + if (!pixman_trapezoid_valid (trap)) + continue; + + pixman_rasterize_trapezoid (tmp, trap, - box.x1, - box.y1); + } + + pixman_image_composite (op, src, tmp, dst, + x_src + box.x1, y_src + box.y1, + 0, 0, + x_dst + box.x1, y_dst + box.y1, + box.x2 - box.x1, box.y2 - box.y1); + + pixman_image_unref (tmp); + } +} + +static int +greater_y (const pixman_point_fixed_t *a, const pixman_point_fixed_t *b) +{ + if (a->y == b->y) + return a->x > b->x; + return a->y > b->y; +} + +/* + * Note that the definition of this function is a bit odd because + * of the X coordinate space (y increasing downwards). + */ +static int +clockwise (const pixman_point_fixed_t *ref, + const pixman_point_fixed_t *a, + const pixman_point_fixed_t *b) +{ + pixman_point_fixed_t ad, bd; + + ad.x = a->x - ref->x; + ad.y = a->y - ref->y; + bd.x = b->x - ref->x; + bd.y = b->y - ref->y; + + return ((pixman_fixed_32_32_t) bd.y * ad.x - + (pixman_fixed_32_32_t) ad.y * bd.x) < 0; +} + +static void +triangle_to_trapezoids (const pixman_triangle_t *tri, pixman_trapezoid_t *traps) +{ + const pixman_point_fixed_t *top, *left, *right, *tmp; + + top = &tri->p1; + left = &tri->p2; + right = &tri->p3; + + if (greater_y (top, left)) + { + tmp = left; + left = top; + top = tmp; + } + + if (greater_y (top, right)) + { + tmp = right; + right = top; + top = tmp; + } + + if (clockwise (top, right, left)) + { + tmp = right; + right = left; + left = tmp; + } + + /* + * Two cases: + * + * + + + * / \ / \ + * / \ / \ + * / + + \ + * / -- -- \ + * / -- -- \ + * / --- --- \ + * +-- --+ + */ + + traps->top = top->y; + traps->left.p1 = *top; + traps->left.p2 = *left; + traps->right.p1 = *top; + traps->right.p2 = *right; + + if (right->y < left->y) + traps->bottom = right->y; + else + traps->bottom = left->y; + + traps++; + + *traps = *(traps - 1); + + if (right->y < left->y) + { + traps->top = right->y; + traps->bottom = left->y; + traps->right.p1 = *right; + traps->right.p2 = *left; + } + else + { + traps->top = left->y; + traps->bottom = right->y; + traps->left.p1 = *left; + traps->left.p2 = *right; + } +} + +static pixman_trapezoid_t * +convert_triangles (int n_tris, const pixman_triangle_t *tris) +{ + pixman_trapezoid_t *traps; + int i; + + if (n_tris <= 0) + return NULL; + + traps = pixman_malloc_ab (n_tris, 2 * sizeof (pixman_trapezoid_t)); + if (!traps) + return NULL; + + for (i = 0; i < n_tris; ++i) + triangle_to_trapezoids (&(tris[i]), traps + 2 * i); + + return traps; +} + +PIXMAN_EXPORT void +pixman_composite_triangles (pixman_op_t op, + pixman_image_t * src, + pixman_image_t * dst, + pixman_format_code_t mask_format, + int x_src, + int y_src, + int x_dst, + int y_dst, + int n_tris, + const pixman_triangle_t * tris) +{ + pixman_trapezoid_t *traps; + + if ((traps = convert_triangles (n_tris, tris))) + { + pixman_composite_trapezoids (op, src, dst, mask_format, + x_src, y_src, x_dst, y_dst, + n_tris * 2, traps); + + free (traps); + } +} + +PIXMAN_EXPORT void +pixman_add_triangles (pixman_image_t *image, + int32_t x_off, + int32_t y_off, + int n_tris, + const pixman_triangle_t *tris) +{ + pixman_trapezoid_t *traps; + + if ((traps = convert_triangles (n_tris, tris))) + { + pixman_add_trapezoids (image, x_off, y_off, + n_tris * 2, traps); + + free (traps); + } +} diff --git a/src/pixman/pixman/pixman-utils.c b/src/pixman/pixman/pixman-utils.c new file mode 100644 index 0000000..4a3a835 --- /dev/null +++ b/src/pixman/pixman/pixman-utils.c @@ -0,0 +1,330 @@ +/* + * Copyright © 2000 SuSE, Inc. + * Copyright © 1999 Keith Packard + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of SuSE not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. SuSE makes no representations about the + * suitability of this software for any purpose. It is provided "as is" + * without express or implied warranty. + * + * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE + * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Author: Keith Packard, SuSE, Inc. + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif +#include <stdio.h> +#include <stdlib.h> + +#include "pixman-private.h" + +pixman_bool_t +_pixman_multiply_overflows_size (size_t a, size_t b) +{ + return a >= SIZE_MAX / b; +} + +pixman_bool_t +_pixman_multiply_overflows_int (unsigned int a, unsigned int b) +{ + return a >= INT32_MAX / b; +} + +pixman_bool_t +_pixman_addition_overflows_int (unsigned int a, unsigned int b) +{ + return a > INT32_MAX - b; +} + +void * +pixman_malloc_ab_plus_c (unsigned int a, unsigned int b, unsigned int c) +{ + if (!b || a >= INT32_MAX / b || (a * b) > INT32_MAX - c) + return NULL; + + return malloc (a * b + c); +} + +void * +pixman_malloc_ab (unsigned int a, + unsigned int b) +{ + if (a >= INT32_MAX / b) + return NULL; + + return malloc (a * b); +} + +void * +pixman_malloc_abc (unsigned int a, + unsigned int b, + unsigned int c) +{ + if (a >= INT32_MAX / b) + return NULL; + else if (a * b >= INT32_MAX / c) + return NULL; + else + return malloc (a * b * c); +} + +static force_inline uint16_t +float_to_unorm (float f, int n_bits) +{ + uint32_t u; + + if (f > 1.0) + f = 1.0; + if (f < 0.0) + f = 0.0; + + u = f * (1 << n_bits); + u -= (u >> n_bits); + + return u; +} + +static force_inline float +unorm_to_float (uint16_t u, int n_bits) +{ + uint32_t m = ((1 << n_bits) - 1); + + return (u & m) * (1.f / (float)m); +} + +/* + * This function expands images from a8r8g8b8 to argb_t. To preserve + * precision, it needs to know from which source format the a8r8g8b8 pixels + * originally came. + * + * For example, if the source was PIXMAN_x1r5g5b5 and the red component + * contained bits 12345, then the 8-bit value is 12345123. To correctly + * expand this to floating point, it should be 12345 / 31.0 and not + * 12345123 / 255.0. + */ +void +pixman_expand_to_float (argb_t *dst, + const uint32_t *src, + pixman_format_code_t format, + int width) +{ + static const float multipliers[16] = { + 0.0f, + 1.0f / ((1 << 1) - 1), + 1.0f / ((1 << 2) - 1), + 1.0f / ((1 << 3) - 1), + 1.0f / ((1 << 4) - 1), + 1.0f / ((1 << 5) - 1), + 1.0f / ((1 << 6) - 1), + 1.0f / ((1 << 7) - 1), + 1.0f / ((1 << 8) - 1), + 1.0f / ((1 << 9) - 1), + 1.0f / ((1 << 10) - 1), + 1.0f / ((1 << 11) - 1), + 1.0f / ((1 << 12) - 1), + 1.0f / ((1 << 13) - 1), + 1.0f / ((1 << 14) - 1), + 1.0f / ((1 << 15) - 1), + }; + int a_size, r_size, g_size, b_size; + int a_shift, r_shift, g_shift, b_shift; + float a_mul, r_mul, g_mul, b_mul; + uint32_t a_mask, r_mask, g_mask, b_mask; + int i; + + if (!PIXMAN_FORMAT_VIS (format)) + format = PIXMAN_a8r8g8b8; + + /* + * Determine the sizes of each component and the masks and shifts + * required to extract them from the source pixel. + */ + a_size = PIXMAN_FORMAT_A (format); + r_size = PIXMAN_FORMAT_R (format); + g_size = PIXMAN_FORMAT_G (format); + b_size = PIXMAN_FORMAT_B (format); + + a_shift = 32 - a_size; + r_shift = 24 - r_size; + g_shift = 16 - g_size; + b_shift = 8 - b_size; + + a_mask = ((1 << a_size) - 1); + r_mask = ((1 << r_size) - 1); + g_mask = ((1 << g_size) - 1); + b_mask = ((1 << b_size) - 1); + + a_mul = multipliers[a_size]; + r_mul = multipliers[r_size]; + g_mul = multipliers[g_size]; + b_mul = multipliers[b_size]; + + /* Start at the end so that we can do the expansion in place + * when src == dst + */ + for (i = width - 1; i >= 0; i--) + { + const uint32_t pixel = src[i]; + + dst[i].a = a_mask? ((pixel >> a_shift) & a_mask) * a_mul : 1.0f; + dst[i].r = ((pixel >> r_shift) & r_mask) * r_mul; + dst[i].g = ((pixel >> g_shift) & g_mask) * g_mul; + dst[i].b = ((pixel >> b_shift) & b_mask) * b_mul; + } +} + +uint16_t +pixman_float_to_unorm (float f, int n_bits) +{ + return float_to_unorm (f, n_bits); +} + +float +pixman_unorm_to_float (uint16_t u, int n_bits) +{ + return unorm_to_float (u, n_bits); +} + +void +pixman_contract_from_float (uint32_t *dst, + const argb_t *src, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + uint8_t a, r, g, b; + + a = float_to_unorm (src[i].a, 8); + r = float_to_unorm (src[i].r, 8); + g = float_to_unorm (src[i].g, 8); + b = float_to_unorm (src[i].b, 8); + + dst[i] = (a << 24) | (r << 16) | (g << 8) | (b << 0); + } +} + +uint32_t * +_pixman_iter_get_scanline_noop (pixman_iter_t *iter, const uint32_t *mask) +{ + return iter->buffer; +} + +void +_pixman_iter_init_bits_stride (pixman_iter_t *iter, const pixman_iter_info_t *info) +{ + pixman_image_t *image = iter->image; + uint8_t *b = (uint8_t *)image->bits.bits; + int s = image->bits.rowstride * 4; + + iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (info->format) / 8; + iter->stride = s; +} + +#define N_TMP_BOXES (16) + +pixman_bool_t +pixman_region16_copy_from_region32 (pixman_region16_t *dst, + pixman_region32_t *src) +{ + int n_boxes, i; + pixman_box32_t *boxes32; + pixman_box16_t *boxes16; + pixman_bool_t retval; + + boxes32 = pixman_region32_rectangles (src, &n_boxes); + + boxes16 = pixman_malloc_ab (n_boxes, sizeof (pixman_box16_t)); + + if (!boxes16) + return FALSE; + + for (i = 0; i < n_boxes; ++i) + { + boxes16[i].x1 = boxes32[i].x1; + boxes16[i].y1 = boxes32[i].y1; + boxes16[i].x2 = boxes32[i].x2; + boxes16[i].y2 = boxes32[i].y2; + } + + pixman_region_fini (dst); + retval = pixman_region_init_rects (dst, boxes16, n_boxes); + free (boxes16); + return retval; +} + +pixman_bool_t +pixman_region32_copy_from_region16 (pixman_region32_t *dst, + pixman_region16_t *src) +{ + int n_boxes, i; + pixman_box16_t *boxes16; + pixman_box32_t *boxes32; + pixman_box32_t tmp_boxes[N_TMP_BOXES]; + pixman_bool_t retval; + + boxes16 = pixman_region_rectangles (src, &n_boxes); + + if (n_boxes > N_TMP_BOXES) + boxes32 = pixman_malloc_ab (n_boxes, sizeof (pixman_box32_t)); + else + boxes32 = tmp_boxes; + + if (!boxes32) + return FALSE; + + for (i = 0; i < n_boxes; ++i) + { + boxes32[i].x1 = boxes16[i].x1; + boxes32[i].y1 = boxes16[i].y1; + boxes32[i].x2 = boxes16[i].x2; + boxes32[i].y2 = boxes16[i].y2; + } + + pixman_region32_fini (dst); + retval = pixman_region32_init_rects (dst, boxes32, n_boxes); + + if (boxes32 != tmp_boxes) + free (boxes32); + + return retval; +} + +/* This function is exported for the sake of the test suite and not part + * of the ABI. + */ +PIXMAN_EXPORT pixman_implementation_t * +_pixman_internal_only_get_implementation (void) +{ + return get_implementation (); +} + +void +_pixman_log_error (const char *function, const char *message) +{ + static int n_messages = 0; + + if (n_messages < 10) + { + fprintf (stderr, + "*** BUG ***\n" + "In %s: %s\n" + "Set a breakpoint on '_pixman_log_error' to debug\n\n", + function, message); + + n_messages++; + } +} diff --git a/src/pixman/pixman/pixman-version.h.in b/src/pixman/pixman/pixman-version.h.in new file mode 100644 index 0000000..256b2e6 --- /dev/null +++ b/src/pixman/pixman/pixman-version.h.in @@ -0,0 +1,50 @@ +/* + * Copyright © 2008 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Author: Carl D. Worth <cworth@cworth.org> + */ + +#ifndef PIXMAN_VERSION_H__ +#define PIXMAN_VERSION_H__ + +#ifndef PIXMAN_H__ +# error pixman-version.h should only be included by pixman.h +#endif + +#define PIXMAN_VERSION_MAJOR @PIXMAN_VERSION_MAJOR@ +#define PIXMAN_VERSION_MINOR @PIXMAN_VERSION_MINOR@ +#define PIXMAN_VERSION_MICRO @PIXMAN_VERSION_MICRO@ + +#define PIXMAN_VERSION_STRING "@PIXMAN_VERSION_MAJOR@.@PIXMAN_VERSION_MINOR@.@PIXMAN_VERSION_MICRO@" + +#define PIXMAN_VERSION_ENCODE(major, minor, micro) ( \ + ((major) * 10000) \ + + ((minor) * 100) \ + + ((micro) * 1)) + +#define PIXMAN_VERSION PIXMAN_VERSION_ENCODE( \ + PIXMAN_VERSION_MAJOR, \ + PIXMAN_VERSION_MINOR, \ + PIXMAN_VERSION_MICRO) + +#endif /* PIXMAN_VERSION_H__ */ diff --git a/src/pixman/pixman/pixman-vmx.c b/src/pixman/pixman/pixman-vmx.c new file mode 100644 index 0000000..c33631c --- /dev/null +++ b/src/pixman/pixman/pixman-vmx.c @@ -0,0 +1,2026 @@ +/* + * Copyright © 2007 Luca Barbato + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of Luca Barbato not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. Luca Barbato makes no representations about the + * suitability of this software for any purpose. It is provided "as is" + * without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + * + * Author: Luca Barbato (lu_zero@gentoo.org) + * + * Based on fbmmx.c by Owen Taylor, Søren Sandmann and Nicholas Miell + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif +#include "pixman-private.h" +#include "pixman-combine32.h" +#include <altivec.h> + +#define AVV(x...) {x} + +static force_inline vector unsigned int +splat_alpha (vector unsigned int pix) +{ + return vec_perm (pix, pix, + (vector unsigned char)AVV ( + 0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x04, 0x04, + 0x08, 0x08, 0x08, 0x08, 0x0C, 0x0C, 0x0C, 0x0C)); +} + +static force_inline vector unsigned int +pix_multiply (vector unsigned int p, vector unsigned int a) +{ + vector unsigned short hi, lo, mod; + + /* unpack to short */ + hi = (vector unsigned short) + vec_mergeh ((vector unsigned char)AVV (0), + (vector unsigned char)p); + + mod = (vector unsigned short) + vec_mergeh ((vector unsigned char)AVV (0), + (vector unsigned char)a); + + hi = vec_mladd (hi, mod, (vector unsigned short) + AVV (0x0080, 0x0080, 0x0080, 0x0080, + 0x0080, 0x0080, 0x0080, 0x0080)); + + hi = vec_adds (hi, vec_sr (hi, vec_splat_u16 (8))); + + hi = vec_sr (hi, vec_splat_u16 (8)); + + /* unpack to short */ + lo = (vector unsigned short) + vec_mergel ((vector unsigned char)AVV (0), + (vector unsigned char)p); + mod = (vector unsigned short) + vec_mergel ((vector unsigned char)AVV (0), + (vector unsigned char)a); + + lo = vec_mladd (lo, mod, (vector unsigned short) + AVV (0x0080, 0x0080, 0x0080, 0x0080, + 0x0080, 0x0080, 0x0080, 0x0080)); + + lo = vec_adds (lo, vec_sr (lo, vec_splat_u16 (8))); + + lo = vec_sr (lo, vec_splat_u16 (8)); + + return (vector unsigned int)vec_packsu (hi, lo); +} + +static force_inline vector unsigned int +pix_add (vector unsigned int a, vector unsigned int b) +{ + return (vector unsigned int)vec_adds ((vector unsigned char)a, + (vector unsigned char)b); +} + +static force_inline vector unsigned int +pix_add_mul (vector unsigned int x, + vector unsigned int a, + vector unsigned int y, + vector unsigned int b) +{ + vector unsigned int t1, t2; + + t1 = pix_multiply (x, a); + t2 = pix_multiply (y, b); + + return pix_add (t1, t2); +} + +static force_inline vector unsigned int +negate (vector unsigned int src) +{ + return vec_nor (src, src); +} + +/* dest*~srca + src */ +static force_inline vector unsigned int +over (vector unsigned int src, + vector unsigned int srca, + vector unsigned int dest) +{ + vector unsigned char tmp = (vector unsigned char) + pix_multiply (dest, negate (srca)); + + tmp = vec_adds ((vector unsigned char)src, tmp); + return (vector unsigned int)tmp; +} + +/* in == pix_multiply */ +#define in_over(src, srca, mask, dest) \ + over (pix_multiply (src, mask), \ + pix_multiply (srca, mask), dest) + + +#define COMPUTE_SHIFT_MASK(source) \ + source ## _mask = vec_lvsl (0, source); + +#define COMPUTE_SHIFT_MASKS(dest, source) \ + source ## _mask = vec_lvsl (0, source); + +#define COMPUTE_SHIFT_MASKC(dest, source, mask) \ + mask ## _mask = vec_lvsl (0, mask); \ + source ## _mask = vec_lvsl (0, source); + +/* notice you have to declare temp vars... + * Note: tmp3 and tmp4 must remain untouched! + */ + +#define LOAD_VECTORS(dest, source) \ + tmp1 = (typeof(tmp1))vec_ld (0, source); \ + tmp2 = (typeof(tmp2))vec_ld (15, source); \ + v ## source = (typeof(v ## source)) \ + vec_perm (tmp1, tmp2, source ## _mask); \ + v ## dest = (typeof(v ## dest))vec_ld (0, dest); + +#define LOAD_VECTORSC(dest, source, mask) \ + tmp1 = (typeof(tmp1))vec_ld (0, source); \ + tmp2 = (typeof(tmp2))vec_ld (15, source); \ + v ## source = (typeof(v ## source)) \ + vec_perm (tmp1, tmp2, source ## _mask); \ + tmp1 = (typeof(tmp1))vec_ld (0, mask); \ + v ## dest = (typeof(v ## dest))vec_ld (0, dest); \ + tmp2 = (typeof(tmp2))vec_ld (15, mask); \ + v ## mask = (typeof(v ## mask)) \ + vec_perm (tmp1, tmp2, mask ## _mask); + +#define LOAD_VECTORSM(dest, source, mask) \ + LOAD_VECTORSC (dest, source, mask) \ + v ## source = pix_multiply (v ## source, \ + splat_alpha (v ## mask)); + +#define STORE_VECTOR(dest) \ + vec_st ((vector unsigned int) v ## dest, 0, dest); + +static void +vmx_combine_over_u_no_mask (uint32_t * dest, + const uint32_t *src, + int width) +{ + int i; + vector unsigned int vdest, vsrc; + vector unsigned char tmp1, tmp2, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t ia = ALPHA_8 (~s); + + UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s); + + *dest++ = d; + width--; + } + + COMPUTE_SHIFT_MASKS (dest, src); + + /* printf ("%s\n",__PRETTY_FUNCTION__); */ + for (i = width / 4; i > 0; i--) + { + + LOAD_VECTORS (dest, src); + + vdest = over (vsrc, splat_alpha (vsrc), vdest); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; + } + + for (i = width % 4; --i >= 0;) + { + uint32_t s = src[i]; + uint32_t d = dest[i]; + uint32_t ia = ALPHA_8 (~s); + + UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s); + + dest[i] = d; + } +} + +static void +vmx_combine_over_u_mask (uint32_t * dest, + const uint32_t *src, + const uint32_t *mask, + int width) +{ + int i; + vector unsigned int vdest, vsrc, vmask; + vector unsigned char tmp1, tmp2, src_mask, mask_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t m = ALPHA_8 (*mask++); + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t ia; + + UN8x4_MUL_UN8 (s, m); + + ia = ALPHA_8 (~s); + + UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s); + *dest++ = d; + width--; + } + + COMPUTE_SHIFT_MASKC (dest, src, mask); + + /* printf ("%s\n",__PRETTY_FUNCTION__); */ + for (i = width / 4; i > 0; i--) + { + LOAD_VECTORSM (dest, src, mask); + + vdest = over (vsrc, splat_alpha (vsrc), vdest); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; + mask += 4; + } + + for (i = width % 4; --i >= 0;) + { + uint32_t m = ALPHA_8 (mask[i]); + uint32_t s = src[i]; + uint32_t d = dest[i]; + uint32_t ia; + + UN8x4_MUL_UN8 (s, m); + + ia = ALPHA_8 (~s); + + UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s); + dest[i] = d; + } +} + +static void +vmx_combine_over_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + if (mask) + vmx_combine_over_u_mask (dest, src, mask, width); + else + vmx_combine_over_u_no_mask (dest, src, width); +} + +static void +vmx_combine_over_reverse_u_no_mask (uint32_t * dest, + const uint32_t *src, + int width) +{ + int i; + vector unsigned int vdest, vsrc; + vector unsigned char tmp1, tmp2, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t ia = ALPHA_8 (~d); + + UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d); + *dest++ = s; + width--; + } + + COMPUTE_SHIFT_MASKS (dest, src); + + /* printf ("%s\n",__PRETTY_FUNCTION__); */ + for (i = width / 4; i > 0; i--) + { + + LOAD_VECTORS (dest, src); + + vdest = over (vdest, splat_alpha (vdest), vsrc); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; + } + + for (i = width % 4; --i >= 0;) + { + uint32_t s = src[i]; + uint32_t d = dest[i]; + uint32_t ia = ALPHA_8 (~dest[i]); + + UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d); + dest[i] = s; + } +} + +static void +vmx_combine_over_reverse_u_mask (uint32_t * dest, + const uint32_t *src, + const uint32_t *mask, + int width) +{ + int i; + vector unsigned int vdest, vsrc, vmask; + vector unsigned char tmp1, tmp2, src_mask, mask_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t m = ALPHA_8 (*mask++); + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t ia = ALPHA_8 (~d); + + UN8x4_MUL_UN8 (s, m); + + UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d); + *dest++ = s; + width--; + } + + COMPUTE_SHIFT_MASKC (dest, src, mask); + + /* printf ("%s\n",__PRETTY_FUNCTION__); */ + for (i = width / 4; i > 0; i--) + { + + LOAD_VECTORSM (dest, src, mask); + + vdest = over (vdest, splat_alpha (vdest), vsrc); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; + mask += 4; + } + + for (i = width % 4; --i >= 0;) + { + uint32_t m = ALPHA_8 (mask[i]); + uint32_t s = src[i]; + uint32_t d = dest[i]; + uint32_t ia = ALPHA_8 (~dest[i]); + + UN8x4_MUL_UN8 (s, m); + + UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d); + dest[i] = s; + } +} + +static void +vmx_combine_over_reverse_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + if (mask) + vmx_combine_over_reverse_u_mask (dest, src, mask, width); + else + vmx_combine_over_reverse_u_no_mask (dest, src, width); +} + +static void +vmx_combine_in_u_no_mask (uint32_t * dest, + const uint32_t *src, + int width) +{ + int i; + vector unsigned int vdest, vsrc; + vector unsigned char tmp1, tmp2, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t s = *src++; + uint32_t a = ALPHA_8 (*dest); + + UN8x4_MUL_UN8 (s, a); + *dest++ = s; + width--; + } + + COMPUTE_SHIFT_MASKS (dest, src); + + /* printf ("%s\n",__PRETTY_FUNCTION__); */ + for (i = width / 4; i > 0; i--) + { + LOAD_VECTORS (dest, src); + + vdest = pix_multiply (vsrc, splat_alpha (vdest)); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; + } + + for (i = width % 4; --i >= 0;) + { + uint32_t s = src[i]; + uint32_t a = ALPHA_8 (dest[i]); + + UN8x4_MUL_UN8 (s, a); + dest[i] = s; + } +} + +static void +vmx_combine_in_u_mask (uint32_t * dest, + const uint32_t *src, + const uint32_t *mask, + int width) +{ + int i; + vector unsigned int vdest, vsrc, vmask; + vector unsigned char tmp1, tmp2, src_mask, mask_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t m = ALPHA_8 (*mask++); + uint32_t s = *src++; + uint32_t a = ALPHA_8 (*dest); + + UN8x4_MUL_UN8 (s, m); + UN8x4_MUL_UN8 (s, a); + + *dest++ = s; + width--; + } + + COMPUTE_SHIFT_MASKC (dest, src, mask); + + /* printf ("%s\n",__PRETTY_FUNCTION__); */ + for (i = width / 4; i > 0; i--) + { + LOAD_VECTORSM (dest, src, mask); + + vdest = pix_multiply (vsrc, splat_alpha (vdest)); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; + mask += 4; + } + + for (i = width % 4; --i >= 0;) + { + uint32_t m = ALPHA_8 (mask[i]); + uint32_t s = src[i]; + uint32_t a = ALPHA_8 (dest[i]); + + UN8x4_MUL_UN8 (s, m); + UN8x4_MUL_UN8 (s, a); + + dest[i] = s; + } +} + +static void +vmx_combine_in_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + if (mask) + vmx_combine_in_u_mask (dest, src, mask, width); + else + vmx_combine_in_u_no_mask (dest, src, width); +} + +static void +vmx_combine_in_reverse_u_no_mask (uint32_t * dest, + const uint32_t *src, + int width) +{ + int i; + vector unsigned int vdest, vsrc; + vector unsigned char tmp1, tmp2, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t d = *dest; + uint32_t a = ALPHA_8 (*src++); + + UN8x4_MUL_UN8 (d, a); + + *dest++ = d; + width--; + } + + COMPUTE_SHIFT_MASKS (dest, src); + + /* printf ("%s\n",__PRETTY_FUNCTION__); */ + for (i = width / 4; i > 0; i--) + { + LOAD_VECTORS (dest, src); + + vdest = pix_multiply (vdest, splat_alpha (vsrc)); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; + } + + for (i = width % 4; --i >= 0;) + { + uint32_t d = dest[i]; + uint32_t a = ALPHA_8 (src[i]); + + UN8x4_MUL_UN8 (d, a); + + dest[i] = d; + } +} + +static void +vmx_combine_in_reverse_u_mask (uint32_t * dest, + const uint32_t *src, + const uint32_t *mask, + int width) +{ + int i; + vector unsigned int vdest, vsrc, vmask; + vector unsigned char tmp1, tmp2, src_mask, mask_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t m = ALPHA_8 (*mask++); + uint32_t d = *dest; + uint32_t a = *src++; + + UN8x4_MUL_UN8 (a, m); + a = ALPHA_8 (a); + UN8x4_MUL_UN8 (d, a); + + *dest++ = d; + width--; + } + + COMPUTE_SHIFT_MASKC (dest, src, mask); + + /* printf ("%s\n",__PRETTY_FUNCTION__); */ + for (i = width / 4; i > 0; i--) + { + LOAD_VECTORSM (dest, src, mask); + + vdest = pix_multiply (vdest, splat_alpha (vsrc)); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; + mask += 4; + } + + for (i = width % 4; --i >= 0;) + { + uint32_t m = ALPHA_8 (mask[i]); + uint32_t d = dest[i]; + uint32_t a = src[i]; + + UN8x4_MUL_UN8 (a, m); + a = ALPHA_8 (a); + UN8x4_MUL_UN8 (d, a); + + dest[i] = d; + } +} + +static void +vmx_combine_in_reverse_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + if (mask) + vmx_combine_in_reverse_u_mask (dest, src, mask, width); + else + vmx_combine_in_reverse_u_no_mask (dest, src, width); +} + +static void +vmx_combine_out_u_no_mask (uint32_t * dest, + const uint32_t *src, + int width) +{ + int i; + vector unsigned int vdest, vsrc; + vector unsigned char tmp1, tmp2, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t s = *src++; + uint32_t a = ALPHA_8 (~(*dest)); + + UN8x4_MUL_UN8 (s, a); + + *dest++ = s; + width--; + } + + COMPUTE_SHIFT_MASKS (dest, src); + + /* printf ("%s\n",__PRETTY_FUNCTION__); */ + for (i = width / 4; i > 0; i--) + { + LOAD_VECTORS (dest, src); + + vdest = pix_multiply (vsrc, splat_alpha (negate (vdest))); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; + } + + for (i = width % 4; --i >= 0;) + { + uint32_t s = src[i]; + uint32_t a = ALPHA_8 (~dest[i]); + + UN8x4_MUL_UN8 (s, a); + + dest[i] = s; + } +} + +static void +vmx_combine_out_u_mask (uint32_t * dest, + const uint32_t *src, + const uint32_t *mask, + int width) +{ + int i; + vector unsigned int vdest, vsrc, vmask; + vector unsigned char tmp1, tmp2, src_mask, mask_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t m = ALPHA_8 (*mask++); + uint32_t s = *src++; + uint32_t a = ALPHA_8 (~(*dest)); + + UN8x4_MUL_UN8 (s, m); + UN8x4_MUL_UN8 (s, a); + + *dest++ = s; + width--; + } + + COMPUTE_SHIFT_MASKC (dest, src, mask); + + /* printf ("%s\n",__PRETTY_FUNCTION__); */ + for (i = width / 4; i > 0; i--) + { + LOAD_VECTORSM (dest, src, mask); + + vdest = pix_multiply (vsrc, splat_alpha (negate (vdest))); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; + mask += 4; + } + + for (i = width % 4; --i >= 0;) + { + uint32_t m = ALPHA_8 (mask[i]); + uint32_t s = src[i]; + uint32_t a = ALPHA_8 (~dest[i]); + + UN8x4_MUL_UN8 (s, m); + UN8x4_MUL_UN8 (s, a); + + dest[i] = s; + } +} + +static void +vmx_combine_out_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + if (mask) + vmx_combine_out_u_mask (dest, src, mask, width); + else + vmx_combine_out_u_no_mask (dest, src, width); +} + +static void +vmx_combine_out_reverse_u_no_mask (uint32_t * dest, + const uint32_t *src, + int width) +{ + int i; + vector unsigned int vdest, vsrc; + vector unsigned char tmp1, tmp2, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t d = *dest; + uint32_t a = ALPHA_8 (~(*src++)); + + UN8x4_MUL_UN8 (d, a); + + *dest++ = d; + width--; + } + + COMPUTE_SHIFT_MASKS (dest, src); + + /* printf ("%s\n",__PRETTY_FUNCTION__); */ + for (i = width / 4; i > 0; i--) + { + + LOAD_VECTORS (dest, src); + + vdest = pix_multiply (vdest, splat_alpha (negate (vsrc))); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; + } + + for (i = width % 4; --i >= 0;) + { + uint32_t d = dest[i]; + uint32_t a = ALPHA_8 (~src[i]); + + UN8x4_MUL_UN8 (d, a); + + dest[i] = d; + } +} + +static void +vmx_combine_out_reverse_u_mask (uint32_t * dest, + const uint32_t *src, + const uint32_t *mask, + int width) +{ + int i; + vector unsigned int vdest, vsrc, vmask; + vector unsigned char tmp1, tmp2, src_mask, mask_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t m = ALPHA_8 (*mask++); + uint32_t d = *dest; + uint32_t a = *src++; + + UN8x4_MUL_UN8 (a, m); + a = ALPHA_8 (~a); + UN8x4_MUL_UN8 (d, a); + + *dest++ = d; + width--; + } + + COMPUTE_SHIFT_MASKC (dest, src, mask); + + /* printf ("%s\n",__PRETTY_FUNCTION__); */ + for (i = width / 4; i > 0; i--) + { + LOAD_VECTORSM (dest, src, mask); + + vdest = pix_multiply (vdest, splat_alpha (negate (vsrc))); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; + mask += 4; + } + + for (i = width % 4; --i >= 0;) + { + uint32_t m = ALPHA_8 (mask[i]); + uint32_t d = dest[i]; + uint32_t a = src[i]; + + UN8x4_MUL_UN8 (a, m); + a = ALPHA_8 (~a); + UN8x4_MUL_UN8 (d, a); + + dest[i] = d; + } +} + +static void +vmx_combine_out_reverse_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + if (mask) + vmx_combine_out_reverse_u_mask (dest, src, mask, width); + else + vmx_combine_out_reverse_u_no_mask (dest, src, width); +} + +static void +vmx_combine_atop_u_no_mask (uint32_t * dest, + const uint32_t *src, + int width) +{ + int i; + vector unsigned int vdest, vsrc; + vector unsigned char tmp1, tmp2, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t dest_a = ALPHA_8 (d); + uint32_t src_ia = ALPHA_8 (~s); + + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia); + + *dest++ = s; + width--; + } + + COMPUTE_SHIFT_MASKS (dest, src); + + /* printf ("%s\n",__PRETTY_FUNCTION__); */ + for (i = width / 4; i > 0; i--) + { + LOAD_VECTORS (dest, src); + + vdest = pix_add_mul (vsrc, splat_alpha (vdest), + vdest, splat_alpha (negate (vsrc))); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; + } + + for (i = width % 4; --i >= 0;) + { + uint32_t s = src[i]; + uint32_t d = dest[i]; + uint32_t dest_a = ALPHA_8 (d); + uint32_t src_ia = ALPHA_8 (~s); + + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia); + + dest[i] = s; + } +} + +static void +vmx_combine_atop_u_mask (uint32_t * dest, + const uint32_t *src, + const uint32_t *mask, + int width) +{ + int i; + vector unsigned int vdest, vsrc, vmask; + vector unsigned char tmp1, tmp2, src_mask, mask_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t m = ALPHA_8 (*mask++); + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t dest_a = ALPHA_8 (d); + uint32_t src_ia; + + UN8x4_MUL_UN8 (s, m); + + src_ia = ALPHA_8 (~s); + + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia); + + *dest++ = s; + width--; + } + + COMPUTE_SHIFT_MASKC (dest, src, mask); + + /* printf ("%s\n",__PRETTY_FUNCTION__); */ + for (i = width / 4; i > 0; i--) + { + LOAD_VECTORSM (dest, src, mask); + + vdest = pix_add_mul (vsrc, splat_alpha (vdest), + vdest, splat_alpha (negate (vsrc))); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; + mask += 4; + } + + for (i = width % 4; --i >= 0;) + { + uint32_t m = ALPHA_8 (mask[i]); + uint32_t s = src[i]; + uint32_t d = dest[i]; + uint32_t dest_a = ALPHA_8 (d); + uint32_t src_ia; + + UN8x4_MUL_UN8 (s, m); + + src_ia = ALPHA_8 (~s); + + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia); + + dest[i] = s; + } +} + +static void +vmx_combine_atop_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + if (mask) + vmx_combine_atop_u_mask (dest, src, mask, width); + else + vmx_combine_atop_u_no_mask (dest, src, width); +} + +static void +vmx_combine_atop_reverse_u_no_mask (uint32_t * dest, + const uint32_t *src, + int width) +{ + int i; + vector unsigned int vdest, vsrc; + vector unsigned char tmp1, tmp2, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t src_a = ALPHA_8 (s); + uint32_t dest_ia = ALPHA_8 (~d); + + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a); + + *dest++ = s; + width--; + } + + COMPUTE_SHIFT_MASKS (dest, src); + + /* printf ("%s\n",__PRETTY_FUNCTION__); */ + for (i = width / 4; i > 0; i--) + { + LOAD_VECTORS (dest, src); + + vdest = pix_add_mul (vdest, splat_alpha (vsrc), + vsrc, splat_alpha (negate (vdest))); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; + } + + for (i = width % 4; --i >= 0;) + { + uint32_t s = src[i]; + uint32_t d = dest[i]; + uint32_t src_a = ALPHA_8 (s); + uint32_t dest_ia = ALPHA_8 (~d); + + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a); + + dest[i] = s; + } +} + +static void +vmx_combine_atop_reverse_u_mask (uint32_t * dest, + const uint32_t *src, + const uint32_t *mask, + int width) +{ + int i; + vector unsigned int vdest, vsrc, vmask; + vector unsigned char tmp1, tmp2, src_mask, mask_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t m = ALPHA_8 (*mask++); + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t src_a; + uint32_t dest_ia = ALPHA_8 (~d); + + UN8x4_MUL_UN8 (s, m); + + src_a = ALPHA_8 (s); + + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a); + + *dest++ = s; + width--; + } + + COMPUTE_SHIFT_MASKC (dest, src, mask); + + /* printf ("%s\n",__PRETTY_FUNCTION__); */ + for (i = width / 4; i > 0; i--) + { + LOAD_VECTORSM (dest, src, mask); + + vdest = pix_add_mul (vdest, splat_alpha (vsrc), + vsrc, splat_alpha (negate (vdest))); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; + mask += 4; + } + + for (i = width % 4; --i >= 0;) + { + uint32_t m = ALPHA_8 (mask[i]); + uint32_t s = src[i]; + uint32_t d = dest[i]; + uint32_t src_a; + uint32_t dest_ia = ALPHA_8 (~d); + + UN8x4_MUL_UN8 (s, m); + + src_a = ALPHA_8 (s); + + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a); + + dest[i] = s; + } +} + +static void +vmx_combine_atop_reverse_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + if (mask) + vmx_combine_atop_reverse_u_mask (dest, src, mask, width); + else + vmx_combine_atop_reverse_u_no_mask (dest, src, width); +} + +static void +vmx_combine_xor_u_no_mask (uint32_t * dest, + const uint32_t *src, + int width) +{ + int i; + vector unsigned int vdest, vsrc; + vector unsigned char tmp1, tmp2, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t src_ia = ALPHA_8 (~s); + uint32_t dest_ia = ALPHA_8 (~d); + + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia); + + *dest++ = s; + width--; + } + + COMPUTE_SHIFT_MASKS (dest, src); + + /* printf ("%s\n",__PRETTY_FUNCTION__); */ + for (i = width / 4; i > 0; i--) + { + LOAD_VECTORS (dest, src); + + vdest = pix_add_mul (vsrc, splat_alpha (negate (vdest)), + vdest, splat_alpha (negate (vsrc))); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; + } + + for (i = width % 4; --i >= 0;) + { + uint32_t s = src[i]; + uint32_t d = dest[i]; + uint32_t src_ia = ALPHA_8 (~s); + uint32_t dest_ia = ALPHA_8 (~d); + + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia); + + dest[i] = s; + } +} + +static void +vmx_combine_xor_u_mask (uint32_t * dest, + const uint32_t *src, + const uint32_t *mask, + int width) +{ + int i; + vector unsigned int vdest, vsrc, vmask; + vector unsigned char tmp1, tmp2, src_mask, mask_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t m = ALPHA_8 (*mask++); + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t src_ia; + uint32_t dest_ia = ALPHA_8 (~d); + + UN8x4_MUL_UN8 (s, m); + + src_ia = ALPHA_8 (~s); + + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia); + + *dest++ = s; + width--; + } + + COMPUTE_SHIFT_MASKC (dest, src, mask); + + /* printf ("%s\n",__PRETTY_FUNCTION__); */ + for (i = width / 4; i > 0; i--) + { + LOAD_VECTORSM (dest, src, mask); + + vdest = pix_add_mul (vsrc, splat_alpha (negate (vdest)), + vdest, splat_alpha (negate (vsrc))); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; + mask += 4; + } + + for (i = width % 4; --i >= 0;) + { + uint32_t m = ALPHA_8 (mask[i]); + uint32_t s = src[i]; + uint32_t d = dest[i]; + uint32_t src_ia; + uint32_t dest_ia = ALPHA_8 (~d); + + UN8x4_MUL_UN8 (s, m); + + src_ia = ALPHA_8 (~s); + + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia); + + dest[i] = s; + } +} + +static void +vmx_combine_xor_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + if (mask) + vmx_combine_xor_u_mask (dest, src, mask, width); + else + vmx_combine_xor_u_no_mask (dest, src, width); +} + +static void +vmx_combine_add_u_no_mask (uint32_t * dest, + const uint32_t *src, + int width) +{ + int i; + vector unsigned int vdest, vsrc; + vector unsigned char tmp1, tmp2, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t s = *src++; + uint32_t d = *dest; + + UN8x4_ADD_UN8x4 (d, s); + + *dest++ = d; + width--; + } + + COMPUTE_SHIFT_MASKS (dest, src); + /* printf ("%s\n",__PRETTY_FUNCTION__); */ + for (i = width / 4; i > 0; i--) + { + LOAD_VECTORS (dest, src); + + vdest = pix_add (vsrc, vdest); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; + } + + for (i = width % 4; --i >= 0;) + { + uint32_t s = src[i]; + uint32_t d = dest[i]; + + UN8x4_ADD_UN8x4 (d, s); + + dest[i] = d; + } +} + +static void +vmx_combine_add_u_mask (uint32_t * dest, + const uint32_t *src, + const uint32_t *mask, + int width) +{ + int i; + vector unsigned int vdest, vsrc, vmask; + vector unsigned char tmp1, tmp2, src_mask, mask_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t m = ALPHA_8 (*mask++); + uint32_t s = *src++; + uint32_t d = *dest; + + UN8x4_MUL_UN8 (s, m); + UN8x4_ADD_UN8x4 (d, s); + + *dest++ = d; + width--; + } + + COMPUTE_SHIFT_MASKC (dest, src, mask); + + /* printf ("%s\n",__PRETTY_FUNCTION__); */ + for (i = width / 4; i > 0; i--) + { + LOAD_VECTORSM (dest, src, mask); + + vdest = pix_add (vsrc, vdest); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; + mask += 4; + } + + for (i = width % 4; --i >= 0;) + { + uint32_t m = ALPHA_8 (mask[i]); + uint32_t s = src[i]; + uint32_t d = dest[i]; + + UN8x4_MUL_UN8 (s, m); + UN8x4_ADD_UN8x4 (d, s); + + dest[i] = d; + } +} + +static void +vmx_combine_add_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + if (mask) + vmx_combine_add_u_mask (dest, src, mask, width); + else + vmx_combine_add_u_no_mask (dest, src, width); +} + +static void +vmx_combine_src_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + int i; + vector unsigned int vdest, vsrc, vmask; + vector unsigned char tmp1, tmp2, mask_mask, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t a = *mask++; + uint32_t s = *src++; + + UN8x4_MUL_UN8x4 (s, a); + + *dest++ = s; + width--; + } + + COMPUTE_SHIFT_MASKC (dest, src, mask); + + /* printf ("%s\n",__PRETTY_FUNCTION__); */ + for (i = width / 4; i > 0; i--) + { + LOAD_VECTORSC (dest, src, mask); + + vdest = pix_multiply (vsrc, vmask); + + STORE_VECTOR (dest); + + mask += 4; + src += 4; + dest += 4; + } + + for (i = width % 4; --i >= 0;) + { + uint32_t a = mask[i]; + uint32_t s = src[i]; + + UN8x4_MUL_UN8x4 (s, a); + + dest[i] = s; + } +} + +static void +vmx_combine_over_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + int i; + vector unsigned int vdest, vsrc, vmask; + vector unsigned char tmp1, tmp2, mask_mask, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t a = *mask++; + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t sa = ALPHA_8 (s); + + UN8x4_MUL_UN8x4 (s, a); + UN8x4_MUL_UN8 (a, sa); + UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ~a, s); + + *dest++ = d; + width--; + } + + COMPUTE_SHIFT_MASKC (dest, src, mask); + + /* printf ("%s\n",__PRETTY_FUNCTION__); */ + for (i = width / 4; i > 0; i--) + { + LOAD_VECTORSC (dest, src, mask); + + vdest = in_over (vsrc, splat_alpha (vsrc), vmask, vdest); + + STORE_VECTOR (dest); + + mask += 4; + src += 4; + dest += 4; + } + + for (i = width % 4; --i >= 0;) + { + uint32_t a = mask[i]; + uint32_t s = src[i]; + uint32_t d = dest[i]; + uint32_t sa = ALPHA_8 (s); + + UN8x4_MUL_UN8x4 (s, a); + UN8x4_MUL_UN8 (a, sa); + UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ~a, s); + + dest[i] = d; + } +} + +static void +vmx_combine_over_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + int i; + vector unsigned int vdest, vsrc, vmask; + vector unsigned char tmp1, tmp2, mask_mask, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t a = *mask++; + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t ida = ALPHA_8 (~d); + + UN8x4_MUL_UN8x4 (s, a); + UN8x4_MUL_UN8_ADD_UN8x4 (s, ida, d); + + *dest++ = s; + width--; + } + + COMPUTE_SHIFT_MASKC (dest, src, mask); + + /* printf("%s\n",__PRETTY_FUNCTION__); */ + for (i = width / 4; i > 0; i--) + { + LOAD_VECTORSC (dest, src, mask); + + vdest = over (vdest, splat_alpha (vdest), pix_multiply (vsrc, vmask)); + + STORE_VECTOR (dest); + + mask += 4; + src += 4; + dest += 4; + } + + for (i = width % 4; --i >= 0;) + { + uint32_t a = mask[i]; + uint32_t s = src[i]; + uint32_t d = dest[i]; + uint32_t ida = ALPHA_8 (~d); + + UN8x4_MUL_UN8x4 (s, a); + UN8x4_MUL_UN8_ADD_UN8x4 (s, ida, d); + + dest[i] = s; + } +} + +static void +vmx_combine_in_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + int i; + vector unsigned int vdest, vsrc, vmask; + vector unsigned char tmp1, tmp2, mask_mask, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t a = *mask++; + uint32_t s = *src++; + uint32_t da = ALPHA_8 (*dest); + + UN8x4_MUL_UN8x4 (s, a); + UN8x4_MUL_UN8 (s, da); + + *dest++ = s; + width--; + } + + COMPUTE_SHIFT_MASKC (dest, src, mask); + + /* printf ("%s\n",__PRETTY_FUNCTION__); */ + for (i = width / 4; i > 0; i--) + { + LOAD_VECTORSC (dest, src, mask); + + vdest = pix_multiply (pix_multiply (vsrc, vmask), splat_alpha (vdest)); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; + mask += 4; + } + + for (i = width % 4; --i >= 0;) + { + uint32_t a = mask[i]; + uint32_t s = src[i]; + uint32_t da = ALPHA_8 (dest[i]); + + UN8x4_MUL_UN8x4 (s, a); + UN8x4_MUL_UN8 (s, da); + + dest[i] = s; + } +} + +static void +vmx_combine_in_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + int i; + vector unsigned int vdest, vsrc, vmask; + vector unsigned char tmp1, tmp2, mask_mask, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t a = *mask++; + uint32_t d = *dest; + uint32_t sa = ALPHA_8 (*src++); + + UN8x4_MUL_UN8 (a, sa); + UN8x4_MUL_UN8x4 (d, a); + + *dest++ = d; + width--; + } + + COMPUTE_SHIFT_MASKC (dest, src, mask); + + /* printf ("%s\n",__PRETTY_FUNCTION__); */ + for (i = width / 4; i > 0; i--) + { + + LOAD_VECTORSC (dest, src, mask); + + vdest = pix_multiply (vdest, pix_multiply (vmask, splat_alpha (vsrc))); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; + mask += 4; + } + + for (i = width % 4; --i >= 0;) + { + uint32_t a = mask[i]; + uint32_t d = dest[i]; + uint32_t sa = ALPHA_8 (src[i]); + + UN8x4_MUL_UN8 (a, sa); + UN8x4_MUL_UN8x4 (d, a); + + dest[i] = d; + } +} + +static void +vmx_combine_out_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + int i; + vector unsigned int vdest, vsrc, vmask; + vector unsigned char tmp1, tmp2, mask_mask, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t a = *mask++; + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t da = ALPHA_8 (~d); + + UN8x4_MUL_UN8x4 (s, a); + UN8x4_MUL_UN8 (s, da); + + *dest++ = s; + width--; + } + + COMPUTE_SHIFT_MASKC (dest, src, mask); + + /* printf ("%s\n",__PRETTY_FUNCTION__); */ + for (i = width / 4; i > 0; i--) + { + LOAD_VECTORSC (dest, src, mask); + + vdest = pix_multiply ( + pix_multiply (vsrc, vmask), splat_alpha (negate (vdest))); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; + mask += 4; + } + + for (i = width % 4; --i >= 0;) + { + uint32_t a = mask[i]; + uint32_t s = src[i]; + uint32_t d = dest[i]; + uint32_t da = ALPHA_8 (~d); + + UN8x4_MUL_UN8x4 (s, a); + UN8x4_MUL_UN8 (s, da); + + dest[i] = s; + } +} + +static void +vmx_combine_out_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + int i; + vector unsigned int vdest, vsrc, vmask; + vector unsigned char tmp1, tmp2, mask_mask, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t a = *mask++; + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t sa = ALPHA_8 (s); + + UN8x4_MUL_UN8 (a, sa); + UN8x4_MUL_UN8x4 (d, ~a); + + *dest++ = d; + width--; + } + + COMPUTE_SHIFT_MASKC (dest, src, mask); + + /* printf ("%s\n",__PRETTY_FUNCTION__); */ + for (i = width / 4; i > 0; i--) + { + LOAD_VECTORSC (dest, src, mask); + + vdest = pix_multiply ( + vdest, negate (pix_multiply (vmask, splat_alpha (vsrc)))); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; + mask += 4; + } + + for (i = width % 4; --i >= 0;) + { + uint32_t a = mask[i]; + uint32_t s = src[i]; + uint32_t d = dest[i]; + uint32_t sa = ALPHA_8 (s); + + UN8x4_MUL_UN8 (a, sa); + UN8x4_MUL_UN8x4 (d, ~a); + + dest[i] = d; + } +} + +static void +vmx_combine_atop_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + int i; + vector unsigned int vdest, vsrc, vmask, vsrca; + vector unsigned char tmp1, tmp2, mask_mask, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t a = *mask++; + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t sa = ALPHA_8 (s); + uint32_t da = ALPHA_8 (d); + + UN8x4_MUL_UN8x4 (s, a); + UN8x4_MUL_UN8 (a, sa); + UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, da); + + *dest++ = d; + width--; + } + + COMPUTE_SHIFT_MASKC (dest, src, mask); + + /* printf ("%s\n",__PRETTY_FUNCTION__); */ + for (i = width / 4; i > 0; i--) + { + LOAD_VECTORSC (dest, src, mask); + + vsrca = splat_alpha (vsrc); + + vsrc = pix_multiply (vsrc, vmask); + vmask = pix_multiply (vmask, vsrca); + + vdest = pix_add_mul (vsrc, splat_alpha (vdest), + negate (vmask), vdest); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; + mask += 4; + } + + for (i = width % 4; --i >= 0;) + { + uint32_t a = mask[i]; + uint32_t s = src[i]; + uint32_t d = dest[i]; + uint32_t sa = ALPHA_8 (s); + uint32_t da = ALPHA_8 (d); + + UN8x4_MUL_UN8x4 (s, a); + UN8x4_MUL_UN8 (a, sa); + UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, da); + + dest[i] = d; + } +} + +static void +vmx_combine_atop_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + int i; + vector unsigned int vdest, vsrc, vmask; + vector unsigned char tmp1, tmp2, mask_mask, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t a = *mask++; + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t sa = ALPHA_8 (s); + uint32_t da = ALPHA_8 (~d); + + UN8x4_MUL_UN8x4 (s, a); + UN8x4_MUL_UN8 (a, sa); + UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, a, s, da); + + *dest++ = d; + width--; + } + + COMPUTE_SHIFT_MASKC (dest, src, mask); + + /* printf ("%s\n",__PRETTY_FUNCTION__); */ + for (i = width / 4; i > 0; i--) + { + LOAD_VECTORSC (dest, src, mask); + + vdest = pix_add_mul (vdest, + pix_multiply (vmask, splat_alpha (vsrc)), + pix_multiply (vsrc, vmask), + negate (splat_alpha (vdest))); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; + mask += 4; + } + + for (i = width % 4; --i >= 0;) + { + uint32_t a = mask[i]; + uint32_t s = src[i]; + uint32_t d = dest[i]; + uint32_t sa = ALPHA_8 (s); + uint32_t da = ALPHA_8 (~d); + + UN8x4_MUL_UN8x4 (s, a); + UN8x4_MUL_UN8 (a, sa); + UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, a, s, da); + + dest[i] = d; + } +} + +static void +vmx_combine_xor_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + int i; + vector unsigned int vdest, vsrc, vmask; + vector unsigned char tmp1, tmp2, mask_mask, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t a = *mask++; + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t sa = ALPHA_8 (s); + uint32_t da = ALPHA_8 (~d); + + UN8x4_MUL_UN8x4 (s, a); + UN8x4_MUL_UN8 (a, sa); + UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, da); + + *dest++ = d; + width--; + } + + COMPUTE_SHIFT_MASKC (dest, src, mask); + + /* printf ("%s\n",__PRETTY_FUNCTION__); */ + for (i = width / 4; i > 0; i--) + { + LOAD_VECTORSC (dest, src, mask); + + vdest = pix_add_mul (vdest, + negate (pix_multiply (vmask, splat_alpha (vsrc))), + pix_multiply (vsrc, vmask), + negate (splat_alpha (vdest))); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; + mask += 4; + } + + for (i = width % 4; --i >= 0;) + { + uint32_t a = mask[i]; + uint32_t s = src[i]; + uint32_t d = dest[i]; + uint32_t sa = ALPHA_8 (s); + uint32_t da = ALPHA_8 (~d); + + UN8x4_MUL_UN8x4 (s, a); + UN8x4_MUL_UN8 (a, sa); + UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, da); + + dest[i] = d; + } +} + +static void +vmx_combine_add_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + int i; + vector unsigned int vdest, vsrc, vmask; + vector unsigned char tmp1, tmp2, mask_mask, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t a = *mask++; + uint32_t s = *src++; + uint32_t d = *dest; + + UN8x4_MUL_UN8x4 (s, a); + UN8x4_ADD_UN8x4 (s, d); + + *dest++ = s; + width--; + } + + COMPUTE_SHIFT_MASKC (dest, src, mask); + + /* printf ("%s\n",__PRETTY_FUNCTION__); */ + for (i = width / 4; i > 0; i--) + { + LOAD_VECTORSC (dest, src, mask); + + vdest = pix_add (pix_multiply (vsrc, vmask), vdest); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; + mask += 4; + } + + for (i = width % 4; --i >= 0;) + { + uint32_t a = mask[i]; + uint32_t s = src[i]; + uint32_t d = dest[i]; + + UN8x4_MUL_UN8x4 (s, a); + UN8x4_ADD_UN8x4 (s, d); + + dest[i] = s; + } +} + +static const pixman_fast_path_t vmx_fast_paths[] = +{ + { PIXMAN_OP_NONE }, +}; + +pixman_implementation_t * +_pixman_implementation_create_vmx (pixman_implementation_t *fallback) +{ + pixman_implementation_t *imp = _pixman_implementation_create (fallback, vmx_fast_paths); + + /* Set up function pointers */ + + imp->combine_32[PIXMAN_OP_OVER] = vmx_combine_over_u; + imp->combine_32[PIXMAN_OP_OVER_REVERSE] = vmx_combine_over_reverse_u; + imp->combine_32[PIXMAN_OP_IN] = vmx_combine_in_u; + imp->combine_32[PIXMAN_OP_IN_REVERSE] = vmx_combine_in_reverse_u; + imp->combine_32[PIXMAN_OP_OUT] = vmx_combine_out_u; + imp->combine_32[PIXMAN_OP_OUT_REVERSE] = vmx_combine_out_reverse_u; + imp->combine_32[PIXMAN_OP_ATOP] = vmx_combine_atop_u; + imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = vmx_combine_atop_reverse_u; + imp->combine_32[PIXMAN_OP_XOR] = vmx_combine_xor_u; + + imp->combine_32[PIXMAN_OP_ADD] = vmx_combine_add_u; + + imp->combine_32_ca[PIXMAN_OP_SRC] = vmx_combine_src_ca; + imp->combine_32_ca[PIXMAN_OP_OVER] = vmx_combine_over_ca; + imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = vmx_combine_over_reverse_ca; + imp->combine_32_ca[PIXMAN_OP_IN] = vmx_combine_in_ca; + imp->combine_32_ca[PIXMAN_OP_IN_REVERSE] = vmx_combine_in_reverse_ca; + imp->combine_32_ca[PIXMAN_OP_OUT] = vmx_combine_out_ca; + imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = vmx_combine_out_reverse_ca; + imp->combine_32_ca[PIXMAN_OP_ATOP] = vmx_combine_atop_ca; + imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = vmx_combine_atop_reverse_ca; + imp->combine_32_ca[PIXMAN_OP_XOR] = vmx_combine_xor_ca; + imp->combine_32_ca[PIXMAN_OP_ADD] = vmx_combine_add_ca; + + return imp; +} diff --git a/src/pixman/pixman/pixman-x86.c b/src/pixman/pixman/pixman-x86.c new file mode 100644 index 0000000..05297c4 --- /dev/null +++ b/src/pixman/pixman/pixman-x86.c @@ -0,0 +1,248 @@ +/* + * Copyright © 2000 SuSE, Inc. + * Copyright © 2007 Red Hat, Inc. + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of SuSE not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. SuSE makes no representations about the + * suitability of this software for any purpose. It is provided "as is" + * without express or implied warranty. + * + * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE + * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include "pixman-private.h" + +#if defined(USE_X86_MMX) || defined (USE_SSE2) || defined (USE_SSSE3) + +/* The CPU detection code needs to be in a file not compiled with + * "-mmmx -msse", as gcc would generate CMOV instructions otherwise + * that would lead to SIGILL instructions on old CPUs that don't have + * it. + */ + +typedef enum +{ + X86_MMX = (1 << 0), + X86_MMX_EXTENSIONS = (1 << 1), + X86_SSE = (1 << 2) | X86_MMX_EXTENSIONS, + X86_SSE2 = (1 << 3), + X86_CMOV = (1 << 4), + X86_SSSE3 = (1 << 5) +} cpu_features_t; + +#ifdef HAVE_GETISAX + +#include <sys/auxv.h> + +static cpu_features_t +detect_cpu_features (void) +{ + cpu_features_t features = 0; + unsigned int result = 0; + + if (getisax (&result, 1)) + { + if (result & AV_386_CMOV) + features |= X86_CMOV; + if (result & AV_386_MMX) + features |= X86_MMX; + if (result & AV_386_AMD_MMX) + features |= X86_MMX_EXTENSIONS; + if (result & AV_386_SSE) + features |= X86_SSE; + if (result & AV_386_SSE2) + features |= X86_SSE2; + if (result & AV_386_SSSE3) + features |= X86_SSSE3; + } + + return features; +} + +#else + +#define _PIXMAN_X86_64 \ + (defined(__amd64__) || defined(__x86_64__) || defined(_M_AMD64)) + +static pixman_bool_t +have_cpuid (void) +{ +#if _PIXMAN_X86_64 || defined (_MSC_VER) + + return TRUE; + +#elif defined (__GNUC__) + uint32_t result; + + __asm__ volatile ( + "pushf" "\n\t" + "pop %%eax" "\n\t" + "mov %%eax, %%ecx" "\n\t" + "xor $0x00200000, %%eax" "\n\t" + "push %%eax" "\n\t" + "popf" "\n\t" + "pushf" "\n\t" + "pop %%eax" "\n\t" + "xor %%ecx, %%eax" "\n\t" + "mov %%eax, %0" "\n\t" + : "=r" (result) + : + : "%eax", "%ecx"); + + return !!result; + +#else +#error "Unknown compiler" +#endif +} + +static void +pixman_cpuid (uint32_t feature, + uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) +{ +#if defined (__GNUC__) + +#if _PIXMAN_X86_64 + __asm__ volatile ( + "cpuid" "\n\t" + : "=a" (*a), "=b" (*b), "=c" (*c), "=d" (*d) + : "a" (feature)); +#else + /* On x86-32 we need to be careful about the handling of %ebx + * and %esp. We can't declare either one as clobbered + * since they are special registers (%ebx is the "PIC + * register" holding an offset to global data, %esp the + * stack pointer), so we need to make sure that %ebx is + * preserved, and that %esp has its original value when + * accessing the output operands. + */ + __asm__ volatile ( + "xchg %%ebx, %1" "\n\t" + "cpuid" "\n\t" + "xchg %%ebx, %1" "\n\t" + : "=a" (*a), "=r" (*b), "=c" (*c), "=d" (*d) + : "a" (feature)); +#endif + +#elif defined (_MSC_VER) + int info[4]; + + __cpuid (info, feature); + + *a = info[0]; + *b = info[1]; + *c = info[2]; + *d = info[3]; +#else +#error Unknown compiler +#endif +} + +static cpu_features_t +detect_cpu_features (void) +{ + uint32_t a, b, c, d; + cpu_features_t features = 0; + + if (!have_cpuid()) + return features; + + /* Get feature bits */ + pixman_cpuid (0x01, &a, &b, &c, &d); + if (d & (1 << 15)) + features |= X86_CMOV; + if (d & (1 << 23)) + features |= X86_MMX; + if (d & (1 << 25)) + features |= X86_SSE; + if (d & (1 << 26)) + features |= X86_SSE2; + if (c & (1 << 9)) + features |= X86_SSSE3; + + /* Check for AMD specific features */ + if ((features & X86_MMX) && !(features & X86_SSE)) + { + char vendor[13]; + + /* Get vendor string */ + memset (vendor, 0, sizeof vendor); + + pixman_cpuid (0x00, &a, &b, &c, &d); + memcpy (vendor + 0, &b, 4); + memcpy (vendor + 4, &d, 4); + memcpy (vendor + 8, &c, 4); + + if (strcmp (vendor, "AuthenticAMD") == 0 || + strcmp (vendor, "Geode by NSC") == 0) + { + pixman_cpuid (0x80000000, &a, &b, &c, &d); + if (a >= 0x80000001) + { + pixman_cpuid (0x80000001, &a, &b, &c, &d); + + if (d & (1 << 22)) + features |= X86_MMX_EXTENSIONS; + } + } + } + + return features; +} + +#endif + +static pixman_bool_t +have_feature (cpu_features_t feature) +{ + static pixman_bool_t initialized; + static cpu_features_t features; + + if (!initialized) + { + features = detect_cpu_features(); + initialized = TRUE; + } + + return (features & feature) == feature; +} + +#endif + +pixman_implementation_t * +_pixman_x86_get_implementations (pixman_implementation_t *imp) +{ +#define MMX_BITS (X86_MMX | X86_MMX_EXTENSIONS) +#define SSE2_BITS (X86_MMX | X86_MMX_EXTENSIONS | X86_SSE | X86_SSE2) +#define SSSE3_BITS (X86_SSE | X86_SSE2 | X86_SSSE3) + +#ifdef USE_X86_MMX + if (!_pixman_disabled ("mmx") && have_feature (MMX_BITS)) + imp = _pixman_implementation_create_mmx (imp); +#endif + +#ifdef USE_SSE2 + if (!_pixman_disabled ("sse2") && have_feature (SSE2_BITS)) + imp = _pixman_implementation_create_sse2 (imp); +#endif + +#ifdef USE_SSSE3 + if (!_pixman_disabled ("ssse3") && have_feature (SSSE3_BITS)) + imp = _pixman_implementation_create_ssse3 (imp); +#endif + + return imp; +} diff --git a/src/pixman/pixman/pixman.c b/src/pixman/pixman/pixman.c new file mode 100644 index 0000000..9555cea --- /dev/null +++ b/src/pixman/pixman/pixman.c @@ -0,0 +1,1135 @@ +/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */ +/* + * Copyright © 2000 SuSE, Inc. + * Copyright © 2007 Red Hat, Inc. + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of SuSE not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. SuSE makes no representations about the + * suitability of this software for any purpose. It is provided "as is" + * without express or implied warranty. + * + * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE + * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Author: Keith Packard, SuSE, Inc. + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif +#include "pixman-private.h" + +#include <stdlib.h> + +pixman_implementation_t *global_implementation; + +#ifdef TOOLCHAIN_SUPPORTS_ATTRIBUTE_CONSTRUCTOR +static void __attribute__((constructor)) +pixman_constructor (void) +{ + global_implementation = _pixman_choose_implementation (); +} +#endif + +typedef struct operator_info_t operator_info_t; + +struct operator_info_t +{ + uint8_t opaque_info[4]; +}; + +#define PACK(neither, src, dest, both) \ + {{ (uint8_t)PIXMAN_OP_ ## neither, \ + (uint8_t)PIXMAN_OP_ ## src, \ + (uint8_t)PIXMAN_OP_ ## dest, \ + (uint8_t)PIXMAN_OP_ ## both }} + +static const operator_info_t operator_table[] = +{ + /* Neither Opaque Src Opaque Dst Opaque Both Opaque */ + PACK (CLEAR, CLEAR, CLEAR, CLEAR), + PACK (SRC, SRC, SRC, SRC), + PACK (DST, DST, DST, DST), + PACK (OVER, SRC, OVER, SRC), + PACK (OVER_REVERSE, OVER_REVERSE, DST, DST), + PACK (IN, IN, SRC, SRC), + PACK (IN_REVERSE, DST, IN_REVERSE, DST), + PACK (OUT, OUT, CLEAR, CLEAR), + PACK (OUT_REVERSE, CLEAR, OUT_REVERSE, CLEAR), + PACK (ATOP, IN, OVER, SRC), + PACK (ATOP_REVERSE, OVER_REVERSE, IN_REVERSE, DST), + PACK (XOR, OUT, OUT_REVERSE, CLEAR), + PACK (ADD, ADD, ADD, ADD), + PACK (SATURATE, OVER_REVERSE, DST, DST), + + {{ 0 /* 0x0e */ }}, + {{ 0 /* 0x0f */ }}, + + PACK (CLEAR, CLEAR, CLEAR, CLEAR), + PACK (SRC, SRC, SRC, SRC), + PACK (DST, DST, DST, DST), + PACK (DISJOINT_OVER, DISJOINT_OVER, DISJOINT_OVER, DISJOINT_OVER), + PACK (DISJOINT_OVER_REVERSE, DISJOINT_OVER_REVERSE, DISJOINT_OVER_REVERSE, DISJOINT_OVER_REVERSE), + PACK (DISJOINT_IN, DISJOINT_IN, DISJOINT_IN, DISJOINT_IN), + PACK (DISJOINT_IN_REVERSE, DISJOINT_IN_REVERSE, DISJOINT_IN_REVERSE, DISJOINT_IN_REVERSE), + PACK (DISJOINT_OUT, DISJOINT_OUT, DISJOINT_OUT, DISJOINT_OUT), + PACK (DISJOINT_OUT_REVERSE, DISJOINT_OUT_REVERSE, DISJOINT_OUT_REVERSE, DISJOINT_OUT_REVERSE), + PACK (DISJOINT_ATOP, DISJOINT_ATOP, DISJOINT_ATOP, DISJOINT_ATOP), + PACK (DISJOINT_ATOP_REVERSE, DISJOINT_ATOP_REVERSE, DISJOINT_ATOP_REVERSE, DISJOINT_ATOP_REVERSE), + PACK (DISJOINT_XOR, DISJOINT_XOR, DISJOINT_XOR, DISJOINT_XOR), + + {{ 0 /* 0x1c */ }}, + {{ 0 /* 0x1d */ }}, + {{ 0 /* 0x1e */ }}, + {{ 0 /* 0x1f */ }}, + + PACK (CLEAR, CLEAR, CLEAR, CLEAR), + PACK (SRC, SRC, SRC, SRC), + PACK (DST, DST, DST, DST), + PACK (CONJOINT_OVER, CONJOINT_OVER, CONJOINT_OVER, CONJOINT_OVER), + PACK (CONJOINT_OVER_REVERSE, CONJOINT_OVER_REVERSE, CONJOINT_OVER_REVERSE, CONJOINT_OVER_REVERSE), + PACK (CONJOINT_IN, CONJOINT_IN, CONJOINT_IN, CONJOINT_IN), + PACK (CONJOINT_IN_REVERSE, CONJOINT_IN_REVERSE, CONJOINT_IN_REVERSE, CONJOINT_IN_REVERSE), + PACK (CONJOINT_OUT, CONJOINT_OUT, CONJOINT_OUT, CONJOINT_OUT), + PACK (CONJOINT_OUT_REVERSE, CONJOINT_OUT_REVERSE, CONJOINT_OUT_REVERSE, CONJOINT_OUT_REVERSE), + PACK (CONJOINT_ATOP, CONJOINT_ATOP, CONJOINT_ATOP, CONJOINT_ATOP), + PACK (CONJOINT_ATOP_REVERSE, CONJOINT_ATOP_REVERSE, CONJOINT_ATOP_REVERSE, CONJOINT_ATOP_REVERSE), + PACK (CONJOINT_XOR, CONJOINT_XOR, CONJOINT_XOR, CONJOINT_XOR), + + {{ 0 /* 0x2c */ }}, + {{ 0 /* 0x2d */ }}, + {{ 0 /* 0x2e */ }}, + {{ 0 /* 0x2f */ }}, + + PACK (MULTIPLY, MULTIPLY, MULTIPLY, MULTIPLY), + PACK (SCREEN, SCREEN, SCREEN, SCREEN), + PACK (OVERLAY, OVERLAY, OVERLAY, OVERLAY), + PACK (DARKEN, DARKEN, DARKEN, DARKEN), + PACK (LIGHTEN, LIGHTEN, LIGHTEN, LIGHTEN), + PACK (COLOR_DODGE, COLOR_DODGE, COLOR_DODGE, COLOR_DODGE), + PACK (COLOR_BURN, COLOR_BURN, COLOR_BURN, COLOR_BURN), + PACK (HARD_LIGHT, HARD_LIGHT, HARD_LIGHT, HARD_LIGHT), + PACK (SOFT_LIGHT, SOFT_LIGHT, SOFT_LIGHT, SOFT_LIGHT), + PACK (DIFFERENCE, DIFFERENCE, DIFFERENCE, DIFFERENCE), + PACK (EXCLUSION, EXCLUSION, EXCLUSION, EXCLUSION), + PACK (HSL_HUE, HSL_HUE, HSL_HUE, HSL_HUE), + PACK (HSL_SATURATION, HSL_SATURATION, HSL_SATURATION, HSL_SATURATION), + PACK (HSL_COLOR, HSL_COLOR, HSL_COLOR, HSL_COLOR), + PACK (HSL_LUMINOSITY, HSL_LUMINOSITY, HSL_LUMINOSITY, HSL_LUMINOSITY), +}; + +/* + * Optimize the current operator based on opacity of source or destination + * The output operator should be mathematically equivalent to the source. + */ +static pixman_op_t +optimize_operator (pixman_op_t op, + uint32_t src_flags, + uint32_t mask_flags, + uint32_t dst_flags) +{ + pixman_bool_t is_source_opaque, is_dest_opaque; + +#define OPAQUE_SHIFT 13 + + COMPILE_TIME_ASSERT (FAST_PATH_IS_OPAQUE == (1 << OPAQUE_SHIFT)); + + is_dest_opaque = (dst_flags & FAST_PATH_IS_OPAQUE); + is_source_opaque = ((src_flags & mask_flags) & FAST_PATH_IS_OPAQUE); + + is_dest_opaque >>= OPAQUE_SHIFT - 1; + is_source_opaque >>= OPAQUE_SHIFT; + + return operator_table[op].opaque_info[is_dest_opaque | is_source_opaque]; +} + +/* + * Computing composite region + */ +static inline pixman_bool_t +clip_general_image (pixman_region32_t * region, + pixman_region32_t * clip, + int dx, + int dy) +{ + if (pixman_region32_n_rects (region) == 1 && + pixman_region32_n_rects (clip) == 1) + { + pixman_box32_t * rbox = pixman_region32_rectangles (region, NULL); + pixman_box32_t * cbox = pixman_region32_rectangles (clip, NULL); + int v; + + if (rbox->x1 < (v = cbox->x1 + dx)) + rbox->x1 = v; + if (rbox->x2 > (v = cbox->x2 + dx)) + rbox->x2 = v; + if (rbox->y1 < (v = cbox->y1 + dy)) + rbox->y1 = v; + if (rbox->y2 > (v = cbox->y2 + dy)) + rbox->y2 = v; + if (rbox->x1 >= rbox->x2 || rbox->y1 >= rbox->y2) + { + pixman_region32_init (region); + return FALSE; + } + } + else if (!pixman_region32_not_empty (clip)) + { + return FALSE; + } + else + { + if (dx || dy) + pixman_region32_translate (region, -dx, -dy); + + if (!pixman_region32_intersect (region, region, clip)) + return FALSE; + + if (dx || dy) + pixman_region32_translate (region, dx, dy); + } + + return pixman_region32_not_empty (region); +} + +static inline pixman_bool_t +clip_source_image (pixman_region32_t * region, + pixman_image_t * image, + int dx, + int dy) +{ + /* Source clips are ignored, unless they are explicitly turned on + * and the clip in question was set by an X client. (Because if + * the clip was not set by a client, then it is a hierarchy + * clip and those should always be ignored for sources). + */ + if (!image->common.clip_sources || !image->common.client_clip) + return TRUE; + + return clip_general_image (region, + &image->common.clip_region, + dx, dy); +} + +/* + * returns FALSE if the final region is empty. Indistinguishable from + * an allocation failure, but rendering ignores those anyways. + */ +pixman_bool_t +_pixman_compute_composite_region32 (pixman_region32_t * region, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dest_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + region->extents.x1 = dest_x; + region->extents.x2 = dest_x + width; + region->extents.y1 = dest_y; + region->extents.y2 = dest_y + height; + + region->extents.x1 = MAX (region->extents.x1, 0); + region->extents.y1 = MAX (region->extents.y1, 0); + region->extents.x2 = MIN (region->extents.x2, dest_image->bits.width); + region->extents.y2 = MIN (region->extents.y2, dest_image->bits.height); + + region->data = 0; + + /* Check for empty operation */ + if (region->extents.x1 >= region->extents.x2 || + region->extents.y1 >= region->extents.y2) + { + region->extents.x1 = 0; + region->extents.x2 = 0; + region->extents.y1 = 0; + region->extents.y2 = 0; + return FALSE; + } + + if (dest_image->common.have_clip_region) + { + if (!clip_general_image (region, &dest_image->common.clip_region, 0, 0)) + return FALSE; + } + + if (dest_image->common.alpha_map) + { + if (!pixman_region32_intersect_rect (region, region, + dest_image->common.alpha_origin_x, + dest_image->common.alpha_origin_y, + dest_image->common.alpha_map->width, + dest_image->common.alpha_map->height)) + { + return FALSE; + } + if (!pixman_region32_not_empty (region)) + return FALSE; + if (dest_image->common.alpha_map->common.have_clip_region) + { + if (!clip_general_image (region, &dest_image->common.alpha_map->common.clip_region, + -dest_image->common.alpha_origin_x, + -dest_image->common.alpha_origin_y)) + { + return FALSE; + } + } + } + + /* clip against src */ + if (src_image->common.have_clip_region) + { + if (!clip_source_image (region, src_image, dest_x - src_x, dest_y - src_y)) + return FALSE; + } + if (src_image->common.alpha_map && src_image->common.alpha_map->common.have_clip_region) + { + if (!clip_source_image (region, (pixman_image_t *)src_image->common.alpha_map, + dest_x - (src_x - src_image->common.alpha_origin_x), + dest_y - (src_y - src_image->common.alpha_origin_y))) + { + return FALSE; + } + } + /* clip against mask */ + if (mask_image && mask_image->common.have_clip_region) + { + if (!clip_source_image (region, mask_image, dest_x - mask_x, dest_y - mask_y)) + return FALSE; + + if (mask_image->common.alpha_map && mask_image->common.alpha_map->common.have_clip_region) + { + if (!clip_source_image (region, (pixman_image_t *)mask_image->common.alpha_map, + dest_x - (mask_x - mask_image->common.alpha_origin_x), + dest_y - (mask_y - mask_image->common.alpha_origin_y))) + { + return FALSE; + } + } + } + + return TRUE; +} + +typedef struct +{ + pixman_fixed_48_16_t x1; + pixman_fixed_48_16_t y1; + pixman_fixed_48_16_t x2; + pixman_fixed_48_16_t y2; +} box_48_16_t; + +static pixman_bool_t +compute_transformed_extents (pixman_transform_t *transform, + const pixman_box32_t *extents, + box_48_16_t *transformed) +{ + pixman_fixed_48_16_t tx1, ty1, tx2, ty2; + pixman_fixed_t x1, y1, x2, y2; + int i; + + x1 = pixman_int_to_fixed (extents->x1) + pixman_fixed_1 / 2; + y1 = pixman_int_to_fixed (extents->y1) + pixman_fixed_1 / 2; + x2 = pixman_int_to_fixed (extents->x2) - pixman_fixed_1 / 2; + y2 = pixman_int_to_fixed (extents->y2) - pixman_fixed_1 / 2; + + if (!transform) + { + transformed->x1 = x1; + transformed->y1 = y1; + transformed->x2 = x2; + transformed->y2 = y2; + + return TRUE; + } + + tx1 = ty1 = INT64_MAX; + tx2 = ty2 = INT64_MIN; + + for (i = 0; i < 4; ++i) + { + pixman_fixed_48_16_t tx, ty; + pixman_vector_t v; + + v.vector[0] = (i & 0x01)? x1 : x2; + v.vector[1] = (i & 0x02)? y1 : y2; + v.vector[2] = pixman_fixed_1; + + if (!pixman_transform_point (transform, &v)) + return FALSE; + + tx = (pixman_fixed_48_16_t)v.vector[0]; + ty = (pixman_fixed_48_16_t)v.vector[1]; + + if (tx < tx1) + tx1 = tx; + if (ty < ty1) + ty1 = ty; + if (tx > tx2) + tx2 = tx; + if (ty > ty2) + ty2 = ty; + } + + transformed->x1 = tx1; + transformed->y1 = ty1; + transformed->x2 = tx2; + transformed->y2 = ty2; + + return TRUE; +} + +#define IS_16BIT(x) (((x) >= INT16_MIN) && ((x) <= INT16_MAX)) +#define ABS(f) (((f) < 0)? (-(f)) : (f)) +#define IS_16_16(f) (((f) >= pixman_min_fixed_48_16 && ((f) <= pixman_max_fixed_48_16))) + +static pixman_bool_t +analyze_extent (pixman_image_t *image, + const pixman_box32_t *extents, + uint32_t *flags) +{ + pixman_transform_t *transform; + pixman_fixed_t x_off, y_off; + pixman_fixed_t width, height; + pixman_fixed_t *params; + box_48_16_t transformed; + pixman_box32_t exp_extents; + + if (!image) + return TRUE; + + /* Some compositing functions walk one step + * outside the destination rectangle, so we + * check here that the expanded-by-one source + * extents in destination space fits in 16 bits + */ + if (!IS_16BIT (extents->x1 - 1) || + !IS_16BIT (extents->y1 - 1) || + !IS_16BIT (extents->x2 + 1) || + !IS_16BIT (extents->y2 + 1)) + { + return FALSE; + } + + transform = image->common.transform; + if (image->common.type == BITS) + { + /* During repeat mode calculations we might convert the + * width/height of an image to fixed 16.16, so we need + * them to be smaller than 16 bits. + */ + if (image->bits.width >= 0x7fff || image->bits.height >= 0x7fff) + return FALSE; + + if ((image->common.flags & FAST_PATH_ID_TRANSFORM) == FAST_PATH_ID_TRANSFORM && + extents->x1 >= 0 && + extents->y1 >= 0 && + extents->x2 <= image->bits.width && + extents->y2 <= image->bits.height) + { + *flags |= FAST_PATH_SAMPLES_COVER_CLIP_NEAREST; + return TRUE; + } + + switch (image->common.filter) + { + case PIXMAN_FILTER_CONVOLUTION: + params = image->common.filter_params; + x_off = - pixman_fixed_e - ((params[0] - pixman_fixed_1) >> 1); + y_off = - pixman_fixed_e - ((params[1] - pixman_fixed_1) >> 1); + width = params[0]; + height = params[1]; + break; + + case PIXMAN_FILTER_SEPARABLE_CONVOLUTION: + params = image->common.filter_params; + x_off = - pixman_fixed_e - ((params[0] - pixman_fixed_1) >> 1); + y_off = - pixman_fixed_e - ((params[1] - pixman_fixed_1) >> 1); + width = params[0]; + height = params[1]; + break; + + case PIXMAN_FILTER_GOOD: + case PIXMAN_FILTER_BEST: + case PIXMAN_FILTER_BILINEAR: + x_off = - pixman_fixed_1 / 2; + y_off = - pixman_fixed_1 / 2; + width = pixman_fixed_1; + height = pixman_fixed_1; + break; + + case PIXMAN_FILTER_FAST: + case PIXMAN_FILTER_NEAREST: + x_off = - pixman_fixed_e; + y_off = - pixman_fixed_e; + width = 0; + height = 0; + break; + + default: + return FALSE; + } + } + else + { + x_off = 0; + y_off = 0; + width = 0; + height = 0; + } + + if (!compute_transformed_extents (transform, extents, &transformed)) + return FALSE; + + /* Expand the source area by a tiny bit so account of different rounding that + * may happen during sampling. Note that (8 * pixman_fixed_e) is very far from + * 0.5 so this won't cause the area computed to be overly pessimistic. + */ + transformed.x1 -= 8 * pixman_fixed_e; + transformed.y1 -= 8 * pixman_fixed_e; + transformed.x2 += 8 * pixman_fixed_e; + transformed.y2 += 8 * pixman_fixed_e; + + if (image->common.type == BITS) + { + if (pixman_fixed_to_int (transformed.x1) >= 0 && + pixman_fixed_to_int (transformed.y1) >= 0 && + pixman_fixed_to_int (transformed.x2) < image->bits.width && + pixman_fixed_to_int (transformed.y2) < image->bits.height) + { + *flags |= FAST_PATH_SAMPLES_COVER_CLIP_NEAREST; + } + + if (pixman_fixed_to_int (transformed.x1 - pixman_fixed_1 / 2) >= 0 && + pixman_fixed_to_int (transformed.y1 - pixman_fixed_1 / 2) >= 0 && + pixman_fixed_to_int (transformed.x2 + pixman_fixed_1 / 2) < image->bits.width && + pixman_fixed_to_int (transformed.y2 + pixman_fixed_1 / 2) < image->bits.height) + { + *flags |= FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR; + } + } + + /* Check we don't overflow when the destination extents are expanded by one. + * This ensures that compositing functions can simply walk the source space + * using 16.16 variables without worrying about overflow. + */ + exp_extents = *extents; + exp_extents.x1 -= 1; + exp_extents.y1 -= 1; + exp_extents.x2 += 1; + exp_extents.y2 += 1; + + if (!compute_transformed_extents (transform, &exp_extents, &transformed)) + return FALSE; + + if (!IS_16_16 (transformed.x1 + x_off - 8 * pixman_fixed_e) || + !IS_16_16 (transformed.y1 + y_off - 8 * pixman_fixed_e) || + !IS_16_16 (transformed.x2 + x_off + 8 * pixman_fixed_e + width) || + !IS_16_16 (transformed.y2 + y_off + 8 * pixman_fixed_e + height)) + { + return FALSE; + } + + return TRUE; +} + +/* + * Work around GCC bug causing crashes in Mozilla with SSE2 + * + * When using -msse, gcc generates movdqa instructions assuming that + * the stack is 16 byte aligned. Unfortunately some applications, such + * as Mozilla and Mono, end up aligning the stack to 4 bytes, which + * causes the movdqa instructions to fail. + * + * The __force_align_arg_pointer__ makes gcc generate a prologue that + * realigns the stack pointer to 16 bytes. + * + * On x86-64 this is not necessary because the standard ABI already + * calls for a 16 byte aligned stack. + * + * See https://bugs.freedesktop.org/show_bug.cgi?id=15693 + */ +#if defined (USE_SSE2) && defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__) +__attribute__((__force_align_arg_pointer__)) +#endif +PIXMAN_EXPORT void +pixman_image_composite32 (pixman_op_t op, + pixman_image_t * src, + pixman_image_t * mask, + pixman_image_t * dest, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + pixman_format_code_t src_format, mask_format, dest_format; + pixman_region32_t region; + pixman_box32_t extents; + pixman_implementation_t *imp; + pixman_composite_func_t func; + pixman_composite_info_t info; + const pixman_box32_t *pbox; + int n; + + _pixman_image_validate (src); + if (mask) + _pixman_image_validate (mask); + _pixman_image_validate (dest); + + src_format = src->common.extended_format_code; + info.src_flags = src->common.flags; + + if (mask && !(mask->common.flags & FAST_PATH_IS_OPAQUE)) + { + mask_format = mask->common.extended_format_code; + info.mask_flags = mask->common.flags; + } + else + { + mask_format = PIXMAN_null; + info.mask_flags = FAST_PATH_IS_OPAQUE | FAST_PATH_NO_ALPHA_MAP; + } + + dest_format = dest->common.extended_format_code; + info.dest_flags = dest->common.flags; + + /* Check for pixbufs */ + if ((mask_format == PIXMAN_a8r8g8b8 || mask_format == PIXMAN_a8b8g8r8) && + (src->type == BITS && src->bits.bits == mask->bits.bits) && + (src->common.repeat == mask->common.repeat) && + (info.src_flags & info.mask_flags & FAST_PATH_ID_TRANSFORM) && + (src_x == mask_x && src_y == mask_y)) + { + if (src_format == PIXMAN_x8b8g8r8) + src_format = mask_format = PIXMAN_pixbuf; + else if (src_format == PIXMAN_x8r8g8b8) + src_format = mask_format = PIXMAN_rpixbuf; + } + + pixman_region32_init (®ion); + + if (!_pixman_compute_composite_region32 ( + ®ion, src, mask, dest, + src_x, src_y, mask_x, mask_y, dest_x, dest_y, width, height)) + { + goto out; + } + + extents = *pixman_region32_extents (®ion); + + extents.x1 -= dest_x - src_x; + extents.y1 -= dest_y - src_y; + extents.x2 -= dest_x - src_x; + extents.y2 -= dest_y - src_y; + + if (!analyze_extent (src, &extents, &info.src_flags)) + goto out; + + extents.x1 -= src_x - mask_x; + extents.y1 -= src_y - mask_y; + extents.x2 -= src_x - mask_x; + extents.y2 -= src_y - mask_y; + + if (!analyze_extent (mask, &extents, &info.mask_flags)) + goto out; + + /* If the clip is within the source samples, and the samples are + * opaque, then the source is effectively opaque. + */ +#define NEAREST_OPAQUE (FAST_PATH_SAMPLES_OPAQUE | \ + FAST_PATH_NEAREST_FILTER | \ + FAST_PATH_SAMPLES_COVER_CLIP_NEAREST) +#define BILINEAR_OPAQUE (FAST_PATH_SAMPLES_OPAQUE | \ + FAST_PATH_BILINEAR_FILTER | \ + FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR) + + if ((info.src_flags & NEAREST_OPAQUE) == NEAREST_OPAQUE || + (info.src_flags & BILINEAR_OPAQUE) == BILINEAR_OPAQUE) + { + info.src_flags |= FAST_PATH_IS_OPAQUE; + } + + if ((info.mask_flags & NEAREST_OPAQUE) == NEAREST_OPAQUE || + (info.mask_flags & BILINEAR_OPAQUE) == BILINEAR_OPAQUE) + { + info.mask_flags |= FAST_PATH_IS_OPAQUE; + } + + /* + * Check if we can replace our operator by a simpler one + * if the src or dest are opaque. The output operator should be + * mathematically equivalent to the source. + */ + info.op = optimize_operator (op, info.src_flags, info.mask_flags, info.dest_flags); + + _pixman_implementation_lookup_composite ( + get_implementation (), info.op, + src_format, info.src_flags, + mask_format, info.mask_flags, + dest_format, info.dest_flags, + &imp, &func); + + info.src_image = src; + info.mask_image = mask; + info.dest_image = dest; + + pbox = pixman_region32_rectangles (®ion, &n); + + while (n--) + { + info.src_x = pbox->x1 + src_x - dest_x; + info.src_y = pbox->y1 + src_y - dest_y; + info.mask_x = pbox->x1 + mask_x - dest_x; + info.mask_y = pbox->y1 + mask_y - dest_y; + info.dest_x = pbox->x1; + info.dest_y = pbox->y1; + info.width = pbox->x2 - pbox->x1; + info.height = pbox->y2 - pbox->y1; + + func (imp, &info); + + pbox++; + } + +out: + pixman_region32_fini (®ion); +} + +PIXMAN_EXPORT void +pixman_image_composite (pixman_op_t op, + pixman_image_t * src, + pixman_image_t * mask, + pixman_image_t * dest, + int16_t src_x, + int16_t src_y, + int16_t mask_x, + int16_t mask_y, + int16_t dest_x, + int16_t dest_y, + uint16_t width, + uint16_t height) +{ + pixman_image_composite32 (op, src, mask, dest, src_x, src_y, + mask_x, mask_y, dest_x, dest_y, width, height); +} + +PIXMAN_EXPORT pixman_bool_t +pixman_blt (uint32_t *src_bits, + uint32_t *dst_bits, + int src_stride, + int dst_stride, + int src_bpp, + int dst_bpp, + int src_x, + int src_y, + int dest_x, + int dest_y, + int width, + int height) +{ + return _pixman_implementation_blt (get_implementation(), + src_bits, dst_bits, src_stride, dst_stride, + src_bpp, dst_bpp, + src_x, src_y, + dest_x, dest_y, + width, height); +} + +PIXMAN_EXPORT pixman_bool_t +pixman_fill (uint32_t *bits, + int stride, + int bpp, + int x, + int y, + int width, + int height, + uint32_t filler) +{ + return _pixman_implementation_fill ( + get_implementation(), bits, stride, bpp, x, y, width, height, filler); +} + +static uint32_t +color_to_uint32 (const pixman_color_t *color) +{ + return + (color->alpha >> 8 << 24) | + (color->red >> 8 << 16) | + (color->green & 0xff00) | + (color->blue >> 8); +} + +static pixman_bool_t +color_to_pixel (const pixman_color_t *color, + uint32_t * pixel, + pixman_format_code_t format) +{ + uint32_t c = color_to_uint32 (color); + + if (!(format == PIXMAN_a8r8g8b8 || + format == PIXMAN_x8r8g8b8 || + format == PIXMAN_a8b8g8r8 || + format == PIXMAN_x8b8g8r8 || + format == PIXMAN_b8g8r8a8 || + format == PIXMAN_b8g8r8x8 || + format == PIXMAN_r8g8b8a8 || + format == PIXMAN_r8g8b8x8 || + format == PIXMAN_r5g6b5 || + format == PIXMAN_b5g6r5 || + format == PIXMAN_a8 || + format == PIXMAN_a1)) + { + return FALSE; + } + + if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_ABGR) + { + c = ((c & 0xff000000) >> 0) | + ((c & 0x00ff0000) >> 16) | + ((c & 0x0000ff00) >> 0) | + ((c & 0x000000ff) << 16); + } + if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_BGRA) + { + c = ((c & 0xff000000) >> 24) | + ((c & 0x00ff0000) >> 8) | + ((c & 0x0000ff00) << 8) | + ((c & 0x000000ff) << 24); + } + if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_RGBA) + c = ((c & 0xff000000) >> 24) | (c << 8); + + if (format == PIXMAN_a1) + c = c >> 31; + else if (format == PIXMAN_a8) + c = c >> 24; + else if (format == PIXMAN_r5g6b5 || + format == PIXMAN_b5g6r5) + c = convert_8888_to_0565 (c); + +#if 0 + printf ("color: %x %x %x %x\n", color->alpha, color->red, color->green, color->blue); + printf ("pixel: %x\n", c); +#endif + + *pixel = c; + return TRUE; +} + +PIXMAN_EXPORT pixman_bool_t +pixman_image_fill_rectangles (pixman_op_t op, + pixman_image_t * dest, + const pixman_color_t * color, + int n_rects, + const pixman_rectangle16_t *rects) +{ + pixman_box32_t stack_boxes[6]; + pixman_box32_t *boxes; + pixman_bool_t result; + int i; + + if (n_rects > 6) + { + boxes = pixman_malloc_ab (sizeof (pixman_box32_t), n_rects); + if (boxes == NULL) + return FALSE; + } + else + { + boxes = stack_boxes; + } + + for (i = 0; i < n_rects; ++i) + { + boxes[i].x1 = rects[i].x; + boxes[i].y1 = rects[i].y; + boxes[i].x2 = boxes[i].x1 + rects[i].width; + boxes[i].y2 = boxes[i].y1 + rects[i].height; + } + + result = pixman_image_fill_boxes (op, dest, color, n_rects, boxes); + + if (boxes != stack_boxes) + free (boxes); + + return result; +} + +PIXMAN_EXPORT pixman_bool_t +pixman_image_fill_boxes (pixman_op_t op, + pixman_image_t * dest, + const pixman_color_t *color, + int n_boxes, + const pixman_box32_t *boxes) +{ + pixman_image_t *solid; + pixman_color_t c; + int i; + + _pixman_image_validate (dest); + + if (color->alpha == 0xffff) + { + if (op == PIXMAN_OP_OVER) + op = PIXMAN_OP_SRC; + } + + if (op == PIXMAN_OP_CLEAR) + { + c.red = 0; + c.green = 0; + c.blue = 0; + c.alpha = 0; + + color = &c; + + op = PIXMAN_OP_SRC; + } + + if (op == PIXMAN_OP_SRC) + { + uint32_t pixel; + + if (color_to_pixel (color, &pixel, dest->bits.format)) + { + pixman_region32_t fill_region; + int n_rects, j; + pixman_box32_t *rects; + + if (!pixman_region32_init_rects (&fill_region, boxes, n_boxes)) + return FALSE; + + if (dest->common.have_clip_region) + { + if (!pixman_region32_intersect (&fill_region, + &fill_region, + &dest->common.clip_region)) + return FALSE; + } + + rects = pixman_region32_rectangles (&fill_region, &n_rects); + for (j = 0; j < n_rects; ++j) + { + const pixman_box32_t *rect = &(rects[j]); + pixman_fill (dest->bits.bits, dest->bits.rowstride, PIXMAN_FORMAT_BPP (dest->bits.format), + rect->x1, rect->y1, rect->x2 - rect->x1, rect->y2 - rect->y1, + pixel); + } + + pixman_region32_fini (&fill_region); + return TRUE; + } + } + + solid = pixman_image_create_solid_fill (color); + if (!solid) + return FALSE; + + for (i = 0; i < n_boxes; ++i) + { + const pixman_box32_t *box = &(boxes[i]); + + pixman_image_composite32 (op, solid, NULL, dest, + 0, 0, 0, 0, + box->x1, box->y1, + box->x2 - box->x1, box->y2 - box->y1); + } + + pixman_image_unref (solid); + + return TRUE; +} + +/** + * pixman_version: + * + * Returns the version of the pixman library encoded in a single + * integer as per %PIXMAN_VERSION_ENCODE. The encoding ensures that + * later versions compare greater than earlier versions. + * + * A run-time comparison to check that pixman's version is greater than + * or equal to version X.Y.Z could be performed as follows: + * + * <informalexample><programlisting> + * if (pixman_version() >= PIXMAN_VERSION_ENCODE(X,Y,Z)) {...} + * </programlisting></informalexample> + * + * See also pixman_version_string() as well as the compile-time + * equivalents %PIXMAN_VERSION and %PIXMAN_VERSION_STRING. + * + * Return value: the encoded version. + **/ +PIXMAN_EXPORT int +pixman_version (void) +{ + return PIXMAN_VERSION; +} + +/** + * pixman_version_string: + * + * Returns the version of the pixman library as a human-readable string + * of the form "X.Y.Z". + * + * See also pixman_version() as well as the compile-time equivalents + * %PIXMAN_VERSION_STRING and %PIXMAN_VERSION. + * + * Return value: a string containing the version. + **/ +PIXMAN_EXPORT const char* +pixman_version_string (void) +{ + return PIXMAN_VERSION_STRING; +} + +/** + * pixman_format_supported_source: + * @format: A pixman_format_code_t format + * + * Return value: whether the provided format code is a supported + * format for a pixman surface used as a source in + * rendering. + * + * Currently, all pixman_format_code_t values are supported. + **/ +PIXMAN_EXPORT pixman_bool_t +pixman_format_supported_source (pixman_format_code_t format) +{ + switch (format) + { + /* 32 bpp formats */ + case PIXMAN_a2b10g10r10: + case PIXMAN_x2b10g10r10: + case PIXMAN_a2r10g10b10: + case PIXMAN_x2r10g10b10: + case PIXMAN_a8r8g8b8: + case PIXMAN_a8r8g8b8_sRGB: + case PIXMAN_x8r8g8b8: + case PIXMAN_a8b8g8r8: + case PIXMAN_x8b8g8r8: + case PIXMAN_b8g8r8a8: + case PIXMAN_b8g8r8x8: + case PIXMAN_r8g8b8a8: + case PIXMAN_r8g8b8x8: + case PIXMAN_r8g8b8: + case PIXMAN_b8g8r8: + case PIXMAN_r5g6b5: + case PIXMAN_b5g6r5: + case PIXMAN_x14r6g6b6: + /* 16 bpp formats */ + case PIXMAN_a1r5g5b5: + case PIXMAN_x1r5g5b5: + case PIXMAN_a1b5g5r5: + case PIXMAN_x1b5g5r5: + case PIXMAN_a4r4g4b4: + case PIXMAN_x4r4g4b4: + case PIXMAN_a4b4g4r4: + case PIXMAN_x4b4g4r4: + /* 8bpp formats */ + case PIXMAN_a8: + case PIXMAN_r3g3b2: + case PIXMAN_b2g3r3: + case PIXMAN_a2r2g2b2: + case PIXMAN_a2b2g2r2: + case PIXMAN_c8: + case PIXMAN_g8: + case PIXMAN_x4a4: + /* Collides with PIXMAN_c8 + case PIXMAN_x4c4: + */ + /* Collides with PIXMAN_g8 + case PIXMAN_x4g4: + */ + /* 4bpp formats */ + case PIXMAN_a4: + case PIXMAN_r1g2b1: + case PIXMAN_b1g2r1: + case PIXMAN_a1r1g1b1: + case PIXMAN_a1b1g1r1: + case PIXMAN_c4: + case PIXMAN_g4: + /* 1bpp formats */ + case PIXMAN_a1: + case PIXMAN_g1: + /* YUV formats */ + case PIXMAN_yuy2: + case PIXMAN_yv12: + return TRUE; + + default: + return FALSE; + } +} + +/** + * pixman_format_supported_destination: + * @format: A pixman_format_code_t format + * + * Return value: whether the provided format code is a supported + * format for a pixman surface used as a destination in + * rendering. + * + * Currently, all pixman_format_code_t values are supported + * except for the YUV formats. + **/ +PIXMAN_EXPORT pixman_bool_t +pixman_format_supported_destination (pixman_format_code_t format) +{ + /* YUV formats cannot be written to at the moment */ + if (format == PIXMAN_yuy2 || format == PIXMAN_yv12) + return FALSE; + + return pixman_format_supported_source (format); +} + +PIXMAN_EXPORT pixman_bool_t +pixman_compute_composite_region (pixman_region16_t * region, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dest_image, + int16_t src_x, + int16_t src_y, + int16_t mask_x, + int16_t mask_y, + int16_t dest_x, + int16_t dest_y, + uint16_t width, + uint16_t height) +{ + pixman_region32_t r32; + pixman_bool_t retval; + + pixman_region32_init (&r32); + + retval = _pixman_compute_composite_region32 ( + &r32, src_image, mask_image, dest_image, + src_x, src_y, mask_x, mask_y, dest_x, dest_y, + width, height); + + if (retval) + { + if (!pixman_region16_copy_from_region32 (region, &r32)) + retval = FALSE; + } + + pixman_region32_fini (&r32); + return retval; +} diff --git a/src/pixman/pixman/pixman.h b/src/pixman/pixman/pixman.h new file mode 100644 index 0000000..509ba5e --- /dev/null +++ b/src/pixman/pixman/pixman.h @@ -0,0 +1,1111 @@ +/*********************************************************** + +Copyright 1987, 1998 The Open Group + +Permission to use, copy, modify, distribute, and sell this software and its +documentation for any purpose is hereby granted without fee, provided that +the above copyright notice appear in all copies and that both that +copyright notice and this permission notice appear in supporting +documentation. + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +OPEN GROUP BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN +AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +Except as contained in this notice, the name of The Open Group shall not be +used in advertising or otherwise to promote the sale, use or other dealings +in this Software without prior written authorization from The Open Group. + +Copyright 1987 by Digital Equipment Corporation, Maynard, Massachusetts. + + All Rights Reserved + +Permission to use, copy, modify, and distribute this software and its +documentation for any purpose and without fee is hereby granted, +provided that the above copyright notice appear in all copies and that +both that copyright notice and this permission notice appear in +supporting documentation, and that the name of Digital not be +used in advertising or publicity pertaining to distribution of the +software without specific, written prior permission. + +DIGITAL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING +ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL +DIGITAL BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR +ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, +WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, +ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS +SOFTWARE. + +******************************************************************/ +/* + * Copyright © 1998, 2004 Keith Packard + * Copyright 2007 Red Hat, Inc. + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of Keith Packard not be used in + * advertising or publicity pertaining to distribution of the software without + * specific, written prior permission. Keith Packard makes no + * representations about the suitability of this software for any purpose. It + * is provided "as is" without express or implied warranty. + * + * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef PIXMAN_H__ +#define PIXMAN_H__ + +#include <pixman-version.h> + +#ifdef __cplusplus +#define PIXMAN_BEGIN_DECLS extern "C" { +#define PIXMAN_END_DECLS } +#else +#define PIXMAN_BEGIN_DECLS +#define PIXMAN_END_DECLS +#endif + +PIXMAN_BEGIN_DECLS + +/* + * Standard integers + */ + +#if !defined (PIXMAN_DONT_DEFINE_STDINT) + +#if defined (_SVR4) || defined (SVR4) || defined (__OpenBSD__) || defined (_sgi) || defined (__sun) || defined (sun) || defined (__digital__) || defined (__HP_cc) +# include <inttypes.h> +/* VS 2010 (_MSC_VER 1600) has stdint.h */ +#elif defined (_MSC_VER) && _MSC_VER < 1600 +typedef __int8 int8_t; +typedef unsigned __int8 uint8_t; +typedef __int16 int16_t; +typedef unsigned __int16 uint16_t; +typedef __int32 int32_t; +typedef unsigned __int32 uint32_t; +typedef __int64 int64_t; +typedef unsigned __int64 uint64_t; +#elif defined (_AIX) +# include <sys/inttypes.h> +#else +# include <stdint.h> +#endif + +#endif + +/* + * Boolean + */ +typedef int pixman_bool_t; + +/* + * Fixpoint numbers + */ +typedef int64_t pixman_fixed_32_32_t; +typedef pixman_fixed_32_32_t pixman_fixed_48_16_t; +typedef uint32_t pixman_fixed_1_31_t; +typedef uint32_t pixman_fixed_1_16_t; +typedef int32_t pixman_fixed_16_16_t; +typedef pixman_fixed_16_16_t pixman_fixed_t; + +#define pixman_fixed_e ((pixman_fixed_t) 1) +#define pixman_fixed_1 (pixman_int_to_fixed(1)) +#define pixman_fixed_1_minus_e (pixman_fixed_1 - pixman_fixed_e) +#define pixman_fixed_minus_1 (pixman_int_to_fixed(-1)) +#define pixman_fixed_to_int(f) ((int) ((f) >> 16)) +#define pixman_int_to_fixed(i) ((pixman_fixed_t) ((i) << 16)) +#define pixman_fixed_to_double(f) (double) ((f) / (double) pixman_fixed_1) +#define pixman_double_to_fixed(d) ((pixman_fixed_t) ((d) * 65536.0)) +#define pixman_fixed_frac(f) ((f) & pixman_fixed_1_minus_e) +#define pixman_fixed_floor(f) ((f) & ~pixman_fixed_1_minus_e) +#define pixman_fixed_ceil(f) pixman_fixed_floor ((f) + pixman_fixed_1_minus_e) +#define pixman_fixed_fraction(f) ((f) & pixman_fixed_1_minus_e) +#define pixman_fixed_mod_2(f) ((f) & (pixman_fixed1 | pixman_fixed_1_minus_e)) +#define pixman_max_fixed_48_16 ((pixman_fixed_48_16_t) 0x7fffffff) +#define pixman_min_fixed_48_16 (-((pixman_fixed_48_16_t) 1 << 31)) + +/* + * Misc structs + */ +typedef struct pixman_color pixman_color_t; +typedef struct pixman_point_fixed pixman_point_fixed_t; +typedef struct pixman_line_fixed pixman_line_fixed_t; +typedef struct pixman_vector pixman_vector_t; +typedef struct pixman_transform pixman_transform_t; + +struct pixman_color +{ + uint16_t red; + uint16_t green; + uint16_t blue; + uint16_t alpha; +}; + +struct pixman_point_fixed +{ + pixman_fixed_t x; + pixman_fixed_t y; +}; + +struct pixman_line_fixed +{ + pixman_point_fixed_t p1, p2; +}; + +/* + * Fixed point matrices + */ + +struct pixman_vector +{ + pixman_fixed_t vector[3]; +}; + +struct pixman_transform +{ + pixman_fixed_t matrix[3][3]; +}; + +/* forward declaration (sorry) */ +struct pixman_box16; +typedef union pixman_image pixman_image_t; + +void pixman_transform_init_identity (struct pixman_transform *matrix); +pixman_bool_t pixman_transform_point_3d (const struct pixman_transform *transform, + struct pixman_vector *vector); +pixman_bool_t pixman_transform_point (const struct pixman_transform *transform, + struct pixman_vector *vector); +pixman_bool_t pixman_transform_multiply (struct pixman_transform *dst, + const struct pixman_transform *l, + const struct pixman_transform *r); +void pixman_transform_init_scale (struct pixman_transform *t, + pixman_fixed_t sx, + pixman_fixed_t sy); +pixman_bool_t pixman_transform_scale (struct pixman_transform *forward, + struct pixman_transform *reverse, + pixman_fixed_t sx, + pixman_fixed_t sy); +void pixman_transform_init_rotate (struct pixman_transform *t, + pixman_fixed_t cos, + pixman_fixed_t sin); +pixman_bool_t pixman_transform_rotate (struct pixman_transform *forward, + struct pixman_transform *reverse, + pixman_fixed_t c, + pixman_fixed_t s); +void pixman_transform_init_translate (struct pixman_transform *t, + pixman_fixed_t tx, + pixman_fixed_t ty); +pixman_bool_t pixman_transform_translate (struct pixman_transform *forward, + struct pixman_transform *reverse, + pixman_fixed_t tx, + pixman_fixed_t ty); +pixman_bool_t pixman_transform_bounds (const struct pixman_transform *matrix, + struct pixman_box16 *b); +pixman_bool_t pixman_transform_invert (struct pixman_transform *dst, + const struct pixman_transform *src); +pixman_bool_t pixman_transform_is_identity (const struct pixman_transform *t); +pixman_bool_t pixman_transform_is_scale (const struct pixman_transform *t); +pixman_bool_t pixman_transform_is_int_translate (const struct pixman_transform *t); +pixman_bool_t pixman_transform_is_inverse (const struct pixman_transform *a, + const struct pixman_transform *b); + +/* + * Floating point matrices + */ +typedef struct pixman_f_transform pixman_f_transform_t; +typedef struct pixman_f_vector pixman_f_vector_t; + +struct pixman_f_vector +{ + double v[3]; +}; + +struct pixman_f_transform +{ + double m[3][3]; +}; + +pixman_bool_t pixman_transform_from_pixman_f_transform (struct pixman_transform *t, + const struct pixman_f_transform *ft); +void pixman_f_transform_from_pixman_transform (struct pixman_f_transform *ft, + const struct pixman_transform *t); +pixman_bool_t pixman_f_transform_invert (struct pixman_f_transform *dst, + const struct pixman_f_transform *src); +pixman_bool_t pixman_f_transform_point (const struct pixman_f_transform *t, + struct pixman_f_vector *v); +void pixman_f_transform_point_3d (const struct pixman_f_transform *t, + struct pixman_f_vector *v); +void pixman_f_transform_multiply (struct pixman_f_transform *dst, + const struct pixman_f_transform *l, + const struct pixman_f_transform *r); +void pixman_f_transform_init_scale (struct pixman_f_transform *t, + double sx, + double sy); +pixman_bool_t pixman_f_transform_scale (struct pixman_f_transform *forward, + struct pixman_f_transform *reverse, + double sx, + double sy); +void pixman_f_transform_init_rotate (struct pixman_f_transform *t, + double cos, + double sin); +pixman_bool_t pixman_f_transform_rotate (struct pixman_f_transform *forward, + struct pixman_f_transform *reverse, + double c, + double s); +void pixman_f_transform_init_translate (struct pixman_f_transform *t, + double tx, + double ty); +pixman_bool_t pixman_f_transform_translate (struct pixman_f_transform *forward, + struct pixman_f_transform *reverse, + double tx, + double ty); +pixman_bool_t pixman_f_transform_bounds (const struct pixman_f_transform *t, + struct pixman_box16 *b); +void pixman_f_transform_init_identity (struct pixman_f_transform *t); + +typedef enum +{ + PIXMAN_REPEAT_NONE, + PIXMAN_REPEAT_NORMAL, + PIXMAN_REPEAT_PAD, + PIXMAN_REPEAT_REFLECT +} pixman_repeat_t; + +typedef enum +{ + PIXMAN_FILTER_FAST, + PIXMAN_FILTER_GOOD, + PIXMAN_FILTER_BEST, + PIXMAN_FILTER_NEAREST, + PIXMAN_FILTER_BILINEAR, + PIXMAN_FILTER_CONVOLUTION, + + /* The SEPARABLE_CONVOLUTION filter takes the following parameters: + * + * width: integer given as 16.16 fixpoint number + * height: integer given as 16.16 fixpoint number + * x_phase_bits: integer given as 16.16 fixpoint + * y_phase_bits: integer given as 16.16 fixpoint + * xtables: (1 << x_phase_bits) tables of size width + * ytables: (1 << y_phase_bits) tables of size height + * + * When sampling at (x, y), the location is first rounded to one of + * n_x_phases * n_y_phases subpixel positions. These subpixel positions + * determine an xtable and a ytable to use. + * + * Conceptually a width x height matrix is then formed in which each entry + * is the product of the corresponding entries in the x and y tables. + * This matrix is then aligned with the image pixels such that its center + * is as close as possible to the subpixel location chosen earlier. Then + * the image is convolved with the matrix and the resulting pixel returned. + */ + PIXMAN_FILTER_SEPARABLE_CONVOLUTION +} pixman_filter_t; + +typedef enum +{ + PIXMAN_OP_CLEAR = 0x00, + PIXMAN_OP_SRC = 0x01, + PIXMAN_OP_DST = 0x02, + PIXMAN_OP_OVER = 0x03, + PIXMAN_OP_OVER_REVERSE = 0x04, + PIXMAN_OP_IN = 0x05, + PIXMAN_OP_IN_REVERSE = 0x06, + PIXMAN_OP_OUT = 0x07, + PIXMAN_OP_OUT_REVERSE = 0x08, + PIXMAN_OP_ATOP = 0x09, + PIXMAN_OP_ATOP_REVERSE = 0x0a, + PIXMAN_OP_XOR = 0x0b, + PIXMAN_OP_ADD = 0x0c, + PIXMAN_OP_SATURATE = 0x0d, + + PIXMAN_OP_DISJOINT_CLEAR = 0x10, + PIXMAN_OP_DISJOINT_SRC = 0x11, + PIXMAN_OP_DISJOINT_DST = 0x12, + PIXMAN_OP_DISJOINT_OVER = 0x13, + PIXMAN_OP_DISJOINT_OVER_REVERSE = 0x14, + PIXMAN_OP_DISJOINT_IN = 0x15, + PIXMAN_OP_DISJOINT_IN_REVERSE = 0x16, + PIXMAN_OP_DISJOINT_OUT = 0x17, + PIXMAN_OP_DISJOINT_OUT_REVERSE = 0x18, + PIXMAN_OP_DISJOINT_ATOP = 0x19, + PIXMAN_OP_DISJOINT_ATOP_REVERSE = 0x1a, + PIXMAN_OP_DISJOINT_XOR = 0x1b, + + PIXMAN_OP_CONJOINT_CLEAR = 0x20, + PIXMAN_OP_CONJOINT_SRC = 0x21, + PIXMAN_OP_CONJOINT_DST = 0x22, + PIXMAN_OP_CONJOINT_OVER = 0x23, + PIXMAN_OP_CONJOINT_OVER_REVERSE = 0x24, + PIXMAN_OP_CONJOINT_IN = 0x25, + PIXMAN_OP_CONJOINT_IN_REVERSE = 0x26, + PIXMAN_OP_CONJOINT_OUT = 0x27, + PIXMAN_OP_CONJOINT_OUT_REVERSE = 0x28, + PIXMAN_OP_CONJOINT_ATOP = 0x29, + PIXMAN_OP_CONJOINT_ATOP_REVERSE = 0x2a, + PIXMAN_OP_CONJOINT_XOR = 0x2b, + + PIXMAN_OP_MULTIPLY = 0x30, + PIXMAN_OP_SCREEN = 0x31, + PIXMAN_OP_OVERLAY = 0x32, + PIXMAN_OP_DARKEN = 0x33, + PIXMAN_OP_LIGHTEN = 0x34, + PIXMAN_OP_COLOR_DODGE = 0x35, + PIXMAN_OP_COLOR_BURN = 0x36, + PIXMAN_OP_HARD_LIGHT = 0x37, + PIXMAN_OP_SOFT_LIGHT = 0x38, + PIXMAN_OP_DIFFERENCE = 0x39, + PIXMAN_OP_EXCLUSION = 0x3a, + PIXMAN_OP_HSL_HUE = 0x3b, + PIXMAN_OP_HSL_SATURATION = 0x3c, + PIXMAN_OP_HSL_COLOR = 0x3d, + PIXMAN_OP_HSL_LUMINOSITY = 0x3e + +#ifdef PIXMAN_USE_INTERNAL_API + , + PIXMAN_N_OPERATORS, + PIXMAN_OP_NONE = PIXMAN_N_OPERATORS +#endif +} pixman_op_t; + +/* + * Regions + */ +typedef struct pixman_region16_data pixman_region16_data_t; +typedef struct pixman_box16 pixman_box16_t; +typedef struct pixman_rectangle16 pixman_rectangle16_t; +typedef struct pixman_region16 pixman_region16_t; + +struct pixman_region16_data { + long size; + long numRects; +/* pixman_box16_t rects[size]; in memory but not explicitly declared */ +}; + +struct pixman_rectangle16 +{ + int16_t x, y; + uint16_t width, height; +}; + +struct pixman_box16 +{ + int16_t x1, y1, x2, y2; +}; + +struct pixman_region16 +{ + pixman_box16_t extents; + pixman_region16_data_t *data; +}; + +typedef enum +{ + PIXMAN_REGION_OUT, + PIXMAN_REGION_IN, + PIXMAN_REGION_PART +} pixman_region_overlap_t; + +/* This function exists only to make it possible to preserve + * the X ABI - it should go away at first opportunity. + */ +void pixman_region_set_static_pointers (pixman_box16_t *empty_box, + pixman_region16_data_t *empty_data, + pixman_region16_data_t *broken_data); + +/* creation/destruction */ +void pixman_region_init (pixman_region16_t *region); +void pixman_region_init_rect (pixman_region16_t *region, + int x, + int y, + unsigned int width, + unsigned int height); +pixman_bool_t pixman_region_init_rects (pixman_region16_t *region, + const pixman_box16_t *boxes, + int count); +void pixman_region_init_with_extents (pixman_region16_t *region, + pixman_box16_t *extents); +void pixman_region_init_from_image (pixman_region16_t *region, + pixman_image_t *image); +void pixman_region_fini (pixman_region16_t *region); + + +/* manipulation */ +void pixman_region_translate (pixman_region16_t *region, + int x, + int y); +pixman_bool_t pixman_region_copy (pixman_region16_t *dest, + pixman_region16_t *source); +pixman_bool_t pixman_region_intersect (pixman_region16_t *new_reg, + pixman_region16_t *reg1, + pixman_region16_t *reg2); +pixman_bool_t pixman_region_union (pixman_region16_t *new_reg, + pixman_region16_t *reg1, + pixman_region16_t *reg2); +pixman_bool_t pixman_region_union_rect (pixman_region16_t *dest, + pixman_region16_t *source, + int x, + int y, + unsigned int width, + unsigned int height); +pixman_bool_t pixman_region_intersect_rect (pixman_region16_t *dest, + pixman_region16_t *source, + int x, + int y, + unsigned int width, + unsigned int height); +pixman_bool_t pixman_region_subtract (pixman_region16_t *reg_d, + pixman_region16_t *reg_m, + pixman_region16_t *reg_s); +pixman_bool_t pixman_region_inverse (pixman_region16_t *new_reg, + pixman_region16_t *reg1, + pixman_box16_t *inv_rect); +pixman_bool_t pixman_region_contains_point (pixman_region16_t *region, + int x, + int y, + pixman_box16_t *box); +pixman_region_overlap_t pixman_region_contains_rectangle (pixman_region16_t *region, + pixman_box16_t *prect); +pixman_bool_t pixman_region_not_empty (pixman_region16_t *region); +pixman_box16_t * pixman_region_extents (pixman_region16_t *region); +int pixman_region_n_rects (pixman_region16_t *region); +pixman_box16_t * pixman_region_rectangles (pixman_region16_t *region, + int *n_rects); +pixman_bool_t pixman_region_equal (pixman_region16_t *region1, + pixman_region16_t *region2); +pixman_bool_t pixman_region_selfcheck (pixman_region16_t *region); +void pixman_region_reset (pixman_region16_t *region, + pixman_box16_t *box); +void pixman_region_clear (pixman_region16_t *region); +/* + * 32 bit regions + */ +typedef struct pixman_region32_data pixman_region32_data_t; +typedef struct pixman_box32 pixman_box32_t; +typedef struct pixman_rectangle32 pixman_rectangle32_t; +typedef struct pixman_region32 pixman_region32_t; + +struct pixman_region32_data { + long size; + long numRects; +/* pixman_box32_t rects[size]; in memory but not explicitly declared */ +}; + +struct pixman_rectangle32 +{ + int32_t x, y; + uint32_t width, height; +}; + +struct pixman_box32 +{ + int32_t x1, y1, x2, y2; +}; + +struct pixman_region32 +{ + pixman_box32_t extents; + pixman_region32_data_t *data; +}; + +/* creation/destruction */ +void pixman_region32_init (pixman_region32_t *region); +void pixman_region32_init_rect (pixman_region32_t *region, + int x, + int y, + unsigned int width, + unsigned int height); +pixman_bool_t pixman_region32_init_rects (pixman_region32_t *region, + const pixman_box32_t *boxes, + int count); +void pixman_region32_init_with_extents (pixman_region32_t *region, + pixman_box32_t *extents); +void pixman_region32_init_from_image (pixman_region32_t *region, + pixman_image_t *image); +void pixman_region32_fini (pixman_region32_t *region); + + +/* manipulation */ +void pixman_region32_translate (pixman_region32_t *region, + int x, + int y); +pixman_bool_t pixman_region32_copy (pixman_region32_t *dest, + pixman_region32_t *source); +pixman_bool_t pixman_region32_intersect (pixman_region32_t *new_reg, + pixman_region32_t *reg1, + pixman_region32_t *reg2); +pixman_bool_t pixman_region32_union (pixman_region32_t *new_reg, + pixman_region32_t *reg1, + pixman_region32_t *reg2); +pixman_bool_t pixman_region32_intersect_rect (pixman_region32_t *dest, + pixman_region32_t *source, + int x, + int y, + unsigned int width, + unsigned int height); +pixman_bool_t pixman_region32_union_rect (pixman_region32_t *dest, + pixman_region32_t *source, + int x, + int y, + unsigned int width, + unsigned int height); +pixman_bool_t pixman_region32_subtract (pixman_region32_t *reg_d, + pixman_region32_t *reg_m, + pixman_region32_t *reg_s); +pixman_bool_t pixman_region32_inverse (pixman_region32_t *new_reg, + pixman_region32_t *reg1, + pixman_box32_t *inv_rect); +pixman_bool_t pixman_region32_contains_point (pixman_region32_t *region, + int x, + int y, + pixman_box32_t *box); +pixman_region_overlap_t pixman_region32_contains_rectangle (pixman_region32_t *region, + pixman_box32_t *prect); +pixman_bool_t pixman_region32_not_empty (pixman_region32_t *region); +pixman_box32_t * pixman_region32_extents (pixman_region32_t *region); +int pixman_region32_n_rects (pixman_region32_t *region); +pixman_box32_t * pixman_region32_rectangles (pixman_region32_t *region, + int *n_rects); +pixman_bool_t pixman_region32_equal (pixman_region32_t *region1, + pixman_region32_t *region2); +pixman_bool_t pixman_region32_selfcheck (pixman_region32_t *region); +void pixman_region32_reset (pixman_region32_t *region, + pixman_box32_t *box); +void pixman_region32_clear (pixman_region32_t *region); + + +/* Copy / Fill / Misc */ +pixman_bool_t pixman_blt (uint32_t *src_bits, + uint32_t *dst_bits, + int src_stride, + int dst_stride, + int src_bpp, + int dst_bpp, + int src_x, + int src_y, + int dest_x, + int dest_y, + int width, + int height); +pixman_bool_t pixman_fill (uint32_t *bits, + int stride, + int bpp, + int x, + int y, + int width, + int height, + uint32_t _xor); + +int pixman_version (void); +const char* pixman_version_string (void); + +/* + * Images + */ +typedef struct pixman_indexed pixman_indexed_t; +typedef struct pixman_gradient_stop pixman_gradient_stop_t; + +typedef uint32_t (* pixman_read_memory_func_t) (const void *src, int size); +typedef void (* pixman_write_memory_func_t) (void *dst, uint32_t value, int size); + +typedef void (* pixman_image_destroy_func_t) (pixman_image_t *image, void *data); + +struct pixman_gradient_stop { + pixman_fixed_t x; + pixman_color_t color; +}; + +#define PIXMAN_MAX_INDEXED 256 /* XXX depth must be <= 8 */ + +#if PIXMAN_MAX_INDEXED <= 256 +typedef uint8_t pixman_index_type; +#endif + +struct pixman_indexed +{ + pixman_bool_t color; + uint32_t rgba[PIXMAN_MAX_INDEXED]; + pixman_index_type ent[32768]; +}; + +/* + * While the protocol is generous in format support, the + * sample implementation allows only packed RGB and GBR + * representations for data to simplify software rendering, + */ +#define PIXMAN_FORMAT(bpp,type,a,r,g,b) (((bpp) << 24) | \ + ((type) << 16) | \ + ((a) << 12) | \ + ((r) << 8) | \ + ((g) << 4) | \ + ((b))) + +#define PIXMAN_FORMAT_BPP(f) (((f) >> 24) ) +#define PIXMAN_FORMAT_TYPE(f) (((f) >> 16) & 0xff) +#define PIXMAN_FORMAT_A(f) (((f) >> 12) & 0x0f) +#define PIXMAN_FORMAT_R(f) (((f) >> 8) & 0x0f) +#define PIXMAN_FORMAT_G(f) (((f) >> 4) & 0x0f) +#define PIXMAN_FORMAT_B(f) (((f) ) & 0x0f) +#define PIXMAN_FORMAT_RGB(f) (((f) ) & 0xfff) +#define PIXMAN_FORMAT_VIS(f) (((f) ) & 0xffff) +#define PIXMAN_FORMAT_DEPTH(f) (PIXMAN_FORMAT_A(f) + \ + PIXMAN_FORMAT_R(f) + \ + PIXMAN_FORMAT_G(f) + \ + PIXMAN_FORMAT_B(f)) + +#define PIXMAN_TYPE_OTHER 0 +#define PIXMAN_TYPE_A 1 +#define PIXMAN_TYPE_ARGB 2 +#define PIXMAN_TYPE_ABGR 3 +#define PIXMAN_TYPE_COLOR 4 +#define PIXMAN_TYPE_GRAY 5 +#define PIXMAN_TYPE_YUY2 6 +#define PIXMAN_TYPE_YV12 7 +#define PIXMAN_TYPE_BGRA 8 +#define PIXMAN_TYPE_RGBA 9 +#define PIXMAN_TYPE_ARGB_SRGB 10 + +#define PIXMAN_FORMAT_COLOR(f) \ + (PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_ARGB || \ + PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_ABGR || \ + PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_BGRA || \ + PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_RGBA) + +/* 32bpp formats */ +typedef enum { + PIXMAN_a8r8g8b8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,8,8,8,8), + PIXMAN_x8r8g8b8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,0,8,8,8), + PIXMAN_a8b8g8r8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,8,8,8,8), + PIXMAN_x8b8g8r8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,0,8,8,8), + PIXMAN_b8g8r8a8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_BGRA,8,8,8,8), + PIXMAN_b8g8r8x8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_BGRA,0,8,8,8), + PIXMAN_r8g8b8a8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_RGBA,8,8,8,8), + PIXMAN_r8g8b8x8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_RGBA,0,8,8,8), + PIXMAN_x14r6g6b6 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,0,6,6,6), + PIXMAN_x2r10g10b10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,0,10,10,10), + PIXMAN_a2r10g10b10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,2,10,10,10), + PIXMAN_x2b10g10r10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,0,10,10,10), + PIXMAN_a2b10g10r10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,2,10,10,10), + +/* sRGB formats */ + PIXMAN_a8r8g8b8_sRGB = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB_SRGB,8,8,8,8), + +/* 24bpp formats */ + PIXMAN_r8g8b8 = PIXMAN_FORMAT(24,PIXMAN_TYPE_ARGB,0,8,8,8), + PIXMAN_b8g8r8 = PIXMAN_FORMAT(24,PIXMAN_TYPE_ABGR,0,8,8,8), + +/* 16bpp formats */ + PIXMAN_r5g6b5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,0,5,6,5), + PIXMAN_b5g6r5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,0,5,6,5), + + PIXMAN_a1r5g5b5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,1,5,5,5), + PIXMAN_x1r5g5b5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,0,5,5,5), + PIXMAN_a1b5g5r5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,1,5,5,5), + PIXMAN_x1b5g5r5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,0,5,5,5), + PIXMAN_a4r4g4b4 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,4,4,4,4), + PIXMAN_x4r4g4b4 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,0,4,4,4), + PIXMAN_a4b4g4r4 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,4,4,4,4), + PIXMAN_x4b4g4r4 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,0,4,4,4), + +/* 8bpp formats */ + PIXMAN_a8 = PIXMAN_FORMAT(8,PIXMAN_TYPE_A,8,0,0,0), + PIXMAN_r3g3b2 = PIXMAN_FORMAT(8,PIXMAN_TYPE_ARGB,0,3,3,2), + PIXMAN_b2g3r3 = PIXMAN_FORMAT(8,PIXMAN_TYPE_ABGR,0,3,3,2), + PIXMAN_a2r2g2b2 = PIXMAN_FORMAT(8,PIXMAN_TYPE_ARGB,2,2,2,2), + PIXMAN_a2b2g2r2 = PIXMAN_FORMAT(8,PIXMAN_TYPE_ABGR,2,2,2,2), + + PIXMAN_c8 = PIXMAN_FORMAT(8,PIXMAN_TYPE_COLOR,0,0,0,0), + PIXMAN_g8 = PIXMAN_FORMAT(8,PIXMAN_TYPE_GRAY,0,0,0,0), + + PIXMAN_x4a4 = PIXMAN_FORMAT(8,PIXMAN_TYPE_A,4,0,0,0), + + PIXMAN_x4c4 = PIXMAN_FORMAT(8,PIXMAN_TYPE_COLOR,0,0,0,0), + PIXMAN_x4g4 = PIXMAN_FORMAT(8,PIXMAN_TYPE_GRAY,0,0,0,0), + +/* 4bpp formats */ + PIXMAN_a4 = PIXMAN_FORMAT(4,PIXMAN_TYPE_A,4,0,0,0), + PIXMAN_r1g2b1 = PIXMAN_FORMAT(4,PIXMAN_TYPE_ARGB,0,1,2,1), + PIXMAN_b1g2r1 = PIXMAN_FORMAT(4,PIXMAN_TYPE_ABGR,0,1,2,1), + PIXMAN_a1r1g1b1 = PIXMAN_FORMAT(4,PIXMAN_TYPE_ARGB,1,1,1,1), + PIXMAN_a1b1g1r1 = PIXMAN_FORMAT(4,PIXMAN_TYPE_ABGR,1,1,1,1), + + PIXMAN_c4 = PIXMAN_FORMAT(4,PIXMAN_TYPE_COLOR,0,0,0,0), + PIXMAN_g4 = PIXMAN_FORMAT(4,PIXMAN_TYPE_GRAY,0,0,0,0), + +/* 1bpp formats */ + PIXMAN_a1 = PIXMAN_FORMAT(1,PIXMAN_TYPE_A,1,0,0,0), + + PIXMAN_g1 = PIXMAN_FORMAT(1,PIXMAN_TYPE_GRAY,0,0,0,0), + +/* YUV formats */ + PIXMAN_yuy2 = PIXMAN_FORMAT(16,PIXMAN_TYPE_YUY2,0,0,0,0), + PIXMAN_yv12 = PIXMAN_FORMAT(12,PIXMAN_TYPE_YV12,0,0,0,0) +} pixman_format_code_t; + +/* Querying supported format values. */ +pixman_bool_t pixman_format_supported_destination (pixman_format_code_t format); +pixman_bool_t pixman_format_supported_source (pixman_format_code_t format); + +/* Constructors */ +pixman_image_t *pixman_image_create_solid_fill (const pixman_color_t *color); +pixman_image_t *pixman_image_create_linear_gradient (const pixman_point_fixed_t *p1, + const pixman_point_fixed_t *p2, + const pixman_gradient_stop_t *stops, + int n_stops); +pixman_image_t *pixman_image_create_radial_gradient (const pixman_point_fixed_t *inner, + const pixman_point_fixed_t *outer, + pixman_fixed_t inner_radius, + pixman_fixed_t outer_radius, + const pixman_gradient_stop_t *stops, + int n_stops); +pixman_image_t *pixman_image_create_conical_gradient (const pixman_point_fixed_t *center, + pixman_fixed_t angle, + const pixman_gradient_stop_t *stops, + int n_stops); +pixman_image_t *pixman_image_create_bits (pixman_format_code_t format, + int width, + int height, + uint32_t *bits, + int rowstride_bytes); +pixman_image_t *pixman_image_create_bits_no_clear (pixman_format_code_t format, + int width, + int height, + uint32_t * bits, + int rowstride_bytes); + +/* Destructor */ +pixman_image_t *pixman_image_ref (pixman_image_t *image); +pixman_bool_t pixman_image_unref (pixman_image_t *image); + +void pixman_image_set_destroy_function (pixman_image_t *image, + pixman_image_destroy_func_t function, + void *data); +void * pixman_image_get_destroy_data (pixman_image_t *image); + +/* Set properties */ +pixman_bool_t pixman_image_set_clip_region (pixman_image_t *image, + pixman_region16_t *region); +pixman_bool_t pixman_image_set_clip_region32 (pixman_image_t *image, + pixman_region32_t *region); +void pixman_image_set_has_client_clip (pixman_image_t *image, + pixman_bool_t clien_clip); +pixman_bool_t pixman_image_set_transform (pixman_image_t *image, + const pixman_transform_t *transform); +void pixman_image_set_repeat (pixman_image_t *image, + pixman_repeat_t repeat); +pixman_bool_t pixman_image_set_filter (pixman_image_t *image, + pixman_filter_t filter, + const pixman_fixed_t *filter_params, + int n_filter_params); +void pixman_image_set_source_clipping (pixman_image_t *image, + pixman_bool_t source_clipping); +void pixman_image_set_alpha_map (pixman_image_t *image, + pixman_image_t *alpha_map, + int16_t x, + int16_t y); +void pixman_image_set_component_alpha (pixman_image_t *image, + pixman_bool_t component_alpha); +pixman_bool_t pixman_image_get_component_alpha (pixman_image_t *image); +void pixman_image_set_accessors (pixman_image_t *image, + pixman_read_memory_func_t read_func, + pixman_write_memory_func_t write_func); +void pixman_image_set_indexed (pixman_image_t *image, + const pixman_indexed_t *indexed); +uint32_t *pixman_image_get_data (pixman_image_t *image); +int pixman_image_get_width (pixman_image_t *image); +int pixman_image_get_height (pixman_image_t *image); +int pixman_image_get_stride (pixman_image_t *image); /* in bytes */ +int pixman_image_get_depth (pixman_image_t *image); +pixman_format_code_t pixman_image_get_format (pixman_image_t *image); + +typedef enum +{ + PIXMAN_KERNEL_IMPULSE, + PIXMAN_KERNEL_BOX, + PIXMAN_KERNEL_LINEAR, + PIXMAN_KERNEL_CUBIC, + PIXMAN_KERNEL_GAUSSIAN, + PIXMAN_KERNEL_LANCZOS2, + PIXMAN_KERNEL_LANCZOS3, + PIXMAN_KERNEL_LANCZOS3_STRETCHED /* Jim Blinn's 'nice' filter */ +} pixman_kernel_t; + +/* Create the parameter list for a SEPARABLE_CONVOLUTION filter + * with the given kernels and scale parameters. + */ +pixman_fixed_t * +pixman_filter_create_separable_convolution (int *n_values, + pixman_fixed_t scale_x, + pixman_fixed_t scale_y, + pixman_kernel_t reconstruct_x, + pixman_kernel_t reconstruct_y, + pixman_kernel_t sample_x, + pixman_kernel_t sample_y, + int subsample_bits_x, + int subsample_bits_y); + +pixman_bool_t pixman_image_fill_rectangles (pixman_op_t op, + pixman_image_t *image, + const pixman_color_t *color, + int n_rects, + const pixman_rectangle16_t *rects); +pixman_bool_t pixman_image_fill_boxes (pixman_op_t op, + pixman_image_t *dest, + const pixman_color_t *color, + int n_boxes, + const pixman_box32_t *boxes); + +/* Composite */ +pixman_bool_t pixman_compute_composite_region (pixman_region16_t *region, + pixman_image_t *src_image, + pixman_image_t *mask_image, + pixman_image_t *dest_image, + int16_t src_x, + int16_t src_y, + int16_t mask_x, + int16_t mask_y, + int16_t dest_x, + int16_t dest_y, + uint16_t width, + uint16_t height); +void pixman_image_composite (pixman_op_t op, + pixman_image_t *src, + pixman_image_t *mask, + pixman_image_t *dest, + int16_t src_x, + int16_t src_y, + int16_t mask_x, + int16_t mask_y, + int16_t dest_x, + int16_t dest_y, + uint16_t width, + uint16_t height); +void pixman_image_composite32 (pixman_op_t op, + pixman_image_t *src, + pixman_image_t *mask, + pixman_image_t *dest, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height); + +/* Executive Summary: This function is a no-op that only exists + * for historical reasons. + * + * There used to be a bug in the X server where it would rely on + * out-of-bounds accesses when it was asked to composite with a + * window as the source. It would create a pixman image pointing + * to some bogus position in memory, but then set a clip region + * to the position where the actual bits were. + * + * Due to a bug in old versions of pixman, where it would not clip + * against the image bounds when a clip region was set, this would + * actually work. So when the pixman bug was fixed, a workaround was + * added to allow certain out-of-bound accesses. This function disabled + * those workarounds. + * + * Since 0.21.2, pixman doesn't do these workarounds anymore, so now this + * function is a no-op. + */ +void pixman_disable_out_of_bounds_workaround (void); + +/* + * Glyphs + */ +typedef struct pixman_glyph_cache_t pixman_glyph_cache_t; +typedef struct +{ + int x, y; + const void *glyph; +} pixman_glyph_t; + +pixman_glyph_cache_t *pixman_glyph_cache_create (void); +void pixman_glyph_cache_destroy (pixman_glyph_cache_t *cache); +void pixman_glyph_cache_freeze (pixman_glyph_cache_t *cache); +void pixman_glyph_cache_thaw (pixman_glyph_cache_t *cache); +const void * pixman_glyph_cache_lookup (pixman_glyph_cache_t *cache, + void *font_key, + void *glyph_key); +const void * pixman_glyph_cache_insert (pixman_glyph_cache_t *cache, + void *font_key, + void *glyph_key, + int origin_x, + int origin_y, + pixman_image_t *glyph_image); +void pixman_glyph_cache_remove (pixman_glyph_cache_t *cache, + void *font_key, + void *glyph_key); +void pixman_glyph_get_extents (pixman_glyph_cache_t *cache, + int n_glyphs, + pixman_glyph_t *glyphs, + pixman_box32_t *extents); +pixman_format_code_t pixman_glyph_get_mask_format (pixman_glyph_cache_t *cache, + int n_glyphs, + const pixman_glyph_t *glyphs); +void pixman_composite_glyphs (pixman_op_t op, + pixman_image_t *src, + pixman_image_t *dest, + pixman_format_code_t mask_format, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height, + pixman_glyph_cache_t *cache, + int n_glyphs, + const pixman_glyph_t *glyphs); +void pixman_composite_glyphs_no_mask (pixman_op_t op, + pixman_image_t *src, + pixman_image_t *dest, + int32_t src_x, + int32_t src_y, + int32_t dest_x, + int32_t dest_y, + pixman_glyph_cache_t *cache, + int n_glyphs, + const pixman_glyph_t *glyphs); + +/* + * Trapezoids + */ +typedef struct pixman_edge pixman_edge_t; +typedef struct pixman_trapezoid pixman_trapezoid_t; +typedef struct pixman_trap pixman_trap_t; +typedef struct pixman_span_fix pixman_span_fix_t; +typedef struct pixman_triangle pixman_triangle_t; + +/* + * An edge structure. This represents a single polygon edge + * and can be quickly stepped across small or large gaps in the + * sample grid + */ +struct pixman_edge +{ + pixman_fixed_t x; + pixman_fixed_t e; + pixman_fixed_t stepx; + pixman_fixed_t signdx; + pixman_fixed_t dy; + pixman_fixed_t dx; + + pixman_fixed_t stepx_small; + pixman_fixed_t stepx_big; + pixman_fixed_t dx_small; + pixman_fixed_t dx_big; +}; + +struct pixman_trapezoid +{ + pixman_fixed_t top, bottom; + pixman_line_fixed_t left, right; +}; + +struct pixman_triangle +{ + pixman_point_fixed_t p1, p2, p3; +}; + +/* whether 't' is a well defined not obviously empty trapezoid */ +#define pixman_trapezoid_valid(t) \ + ((t)->left.p1.y != (t)->left.p2.y && \ + (t)->right.p1.y != (t)->right.p2.y && \ + ((t)->bottom > (t)->top)) + +struct pixman_span_fix +{ + pixman_fixed_t l, r, y; +}; + +struct pixman_trap +{ + pixman_span_fix_t top, bot; +}; + +pixman_fixed_t pixman_sample_ceil_y (pixman_fixed_t y, + int bpp); +pixman_fixed_t pixman_sample_floor_y (pixman_fixed_t y, + int bpp); +void pixman_edge_step (pixman_edge_t *e, + int n); +void pixman_edge_init (pixman_edge_t *e, + int bpp, + pixman_fixed_t y_start, + pixman_fixed_t x_top, + pixman_fixed_t y_top, + pixman_fixed_t x_bot, + pixman_fixed_t y_bot); +void pixman_line_fixed_edge_init (pixman_edge_t *e, + int bpp, + pixman_fixed_t y, + const pixman_line_fixed_t *line, + int x_off, + int y_off); +void pixman_rasterize_edges (pixman_image_t *image, + pixman_edge_t *l, + pixman_edge_t *r, + pixman_fixed_t t, + pixman_fixed_t b); +void pixman_add_traps (pixman_image_t *image, + int16_t x_off, + int16_t y_off, + int ntrap, + const pixman_trap_t *traps); +void pixman_add_trapezoids (pixman_image_t *image, + int16_t x_off, + int y_off, + int ntraps, + const pixman_trapezoid_t *traps); +void pixman_rasterize_trapezoid (pixman_image_t *image, + const pixman_trapezoid_t *trap, + int x_off, + int y_off); +void pixman_composite_trapezoids (pixman_op_t op, + pixman_image_t * src, + pixman_image_t * dst, + pixman_format_code_t mask_format, + int x_src, + int y_src, + int x_dst, + int y_dst, + int n_traps, + const pixman_trapezoid_t * traps); +void pixman_composite_triangles (pixman_op_t op, + pixman_image_t * src, + pixman_image_t * dst, + pixman_format_code_t mask_format, + int x_src, + int y_src, + int x_dst, + int y_dst, + int n_tris, + const pixman_triangle_t * tris); +void pixman_add_triangles (pixman_image_t *image, + int32_t x_off, + int32_t y_off, + int n_tris, + const pixman_triangle_t *tris); + +PIXMAN_END_DECLS + +#endif /* PIXMAN_H__ */ diff --git a/src/pixman/pixman/rounding.txt b/src/pixman/pixman/rounding.txt new file mode 100644 index 0000000..b52b084 --- /dev/null +++ b/src/pixman/pixman/rounding.txt @@ -0,0 +1,167 @@ +*** General notes about rounding + +Suppose a function is sampled at positions [k + o] where k is an +integer and o is a fractional offset 0 <= o < 1. + +To round a value to the nearest sample, breaking ties by rounding up, +we can do this: + + round(x) = floor(x - o + 0.5) + o + +That is, first subtract o to let us pretend that the samples are at +integer coordinates, then add 0.5 and floor to round to nearest +integer, then add the offset back in. + +To break ties by rounding down: + + round(x) = ceil(x - o - 0.5) + o + +or if we have an epsilon value: + + round(x) = floor(x - o + 0.5 - e) + o + +To always round *up* to the next sample: + + round_up(x) = ceil(x - o) + o + +To always round *down* to the previous sample: + + round_down(x) = floor(x - o) + o + +If a set of samples is stored in an array, you get from the sample +position to an index by subtracting the position of the first sample +in the array: + + index(s) = s - first_sample + + +*** Application to pixman + +In pixman, images are sampled with o = 0.5, that is, pixels are +located midways between integers. We usually break ties by rounding +down (i.e., "round towards north-west"). + + +-- NEAREST filtering: + +The NEAREST filter simply picks the closest pixel to the given +position: + + round(x) = floor(x - 0.5 + 0.5 - e) + 0.5 = floor (x - e) + 0.5 + +The first sample of a pixman image has position 0.5, so to find the +index in the pixel array, we have to subtract 0.5: + + floor (x - e) + 0.5 - 0.5 = floor (x - e). + +Therefore a 16.16 fixed-point image location is turned into a pixel +value with NEAREST filtering by doing this: + + pixels[((y - e) >> 16) * stride + ((x - e) >> 16)] + +where stride is the number of pixels allocated per scanline and e = +0x0001. + + +-- CONVOLUTION filtering: + +A convolution matrix is considered a sampling of a function f at +values surrounding 0. For example, this convolution matrix: + + [a, b, c, d] + +is interpreted as the values of a function f: + + a = f(-1.5) + b = f(-0.5) + c = f(0.5) + d = f(1.5) + +The sample offset in this case is o = 0.5 and the first sample has +position s0 = -1.5. If the matrix is: + + [a, b, c, d, e] + +the sample offset is o = 0 and the first sample has position s0 = +-2.0. In general we have + + s0 = (- width / 2.0 + 0.5). + +and + + o = frac (s0) + +To evaluate f at a position between the samples, we round to the +closest sample, and then we subtract the position of the first sample +to get the index in the matrix: + + f(t) = matrix[floor(t - o + 0.5) + o - s0] + +Note that in this case we break ties by rounding up. + +If we write s0 = m + o, where m is an integer, this is equivalent to + + f(t) = matrix[floor(t - o + 0.5) + o - (m + o)] + = matrix[floor(t - o + 0.5 - m) + o - o] + = matrix[floor(t - s0 + 0.5)] + +The convolution filter in pixman positions f such that 0 aligns with +the given position x. For a given pixel x0 in the image, the closest +sample of f is then computed by taking (x - x0) and rounding that to +the closest index: + + i = floor ((x0 - x) - s0 + 0.5) + +To perform the convolution, we have to find the first pixel x0 whose +corresponding sample has index 0. We can write x0 = k + 0.5, where k +is an integer: + + 0 = floor(k + 0.5 - x - s0 + 0.5) + + = k + floor(1 - x - s0) + + = k - ceil(x + s0 - 1) + + = k - floor(x + s0 - e) + + = k - floor(x - (width - 1) / 2.0 - e) + +And so the final formula for the index k of x0 in the image is: + + k = floor(x - (width - 1) / 2.0 - e) + +Computing the result is then simply a matter of convolving all the +pixels starting at k with all the samples in the matrix. + + +--- SEPARABLE_CONVOLUTION + +For this filter, x is first rounded to one of n regularly spaced +subpixel positions. This subpixel position determines which of n +convolution matrices is being used. + +Then, as in a regular convolution filter, the first pixel to be used +is determined: + + k = floor (x - (width - 1) / 2.0 - e) + +and then the image pixels starting there are convolved with the chosen +matrix. If we write x = xi + frac, where xi is an integer, we get + + k = xi + floor (frac - (width - 1) / 2.0 - e) + +so the location of k relative to x is given by: + + (k + 0.5 - x) = xi + floor (frac - (width - 1) / 2.0 - e) + 0.5 - x + + = floor (frac - (width - 1) / 2.0 - e) + 0.5 - frac + +which means the contents of the matrix corresponding to (frac) should +contain width samplings of the function, with the first sample at: + + floor (frac - (width - 1) / 2.0 - e) + 0.5 - frac + +This filter is called separable because each of the k x k convolution +matrices is specified with two k-wide vectors, one for each dimension, +where each entry in the matrix is computed as the product of the +corresponding entries in the vectors. diff --git a/src/pixman/pixman/solaris-hwcap.mapfile b/src/pixman/pixman/solaris-hwcap.mapfile new file mode 100644 index 0000000..87efce1 --- /dev/null +++ b/src/pixman/pixman/solaris-hwcap.mapfile @@ -0,0 +1,30 @@ +############################################################################### +# +# Copyright 2009, Oracle and/or its affiliates. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. +# +############################################################################### +# +# Override the linker's detection of CMOV/MMX/SSE instructions so this +# library isn't flagged as only usable on CPU's with those ISA's, since it +# checks at runtime for availability before calling them + +hwcap_1 = V0x0 FPU OVERRIDE; diff --git a/src/pixman/test/Makefile.am b/src/pixman/test/Makefile.am new file mode 100644 index 0000000..88dc36d --- /dev/null +++ b/src/pixman/test/Makefile.am @@ -0,0 +1,13 @@ +include $(top_srcdir)/test/Makefile.sources + +AM_CFLAGS = $(OPENMP_CFLAGS) $(PTHREAD_CFLAGS) +AM_LDFLAGS = $(OPENMP_CFLAGS) $(TESTPROGS_EXTRA_LDFLAGS) $(PTHREAD_LDFLAGS) +LDADD = libutils.la $(top_builddir)/pixman/libpixman-1.la -lm $(PNG_LIBS) $(PTHREAD_LIBS) +AM_CPPFLAGS = -I$(top_srcdir)/pixman -I$(top_builddir)/pixman $(PNG_CFLAGS) + +libutils_la_SOURCES = $(libutils_sources) $(libutils_headers) + +noinst_LTLIBRARIES = libutils.la +noinst_PROGRAMS = $(TESTPROGRAMS) $(OTHERPROGRAMS) + +TESTS = $(TESTPROGRAMS) diff --git a/src/pixman/test/Makefile.sources b/src/pixman/test/Makefile.sources new file mode 100644 index 0000000..2ae5d9f --- /dev/null +++ b/src/pixman/test/Makefile.sources @@ -0,0 +1,49 @@ +# Tests (sorted by expected completion time) +TESTPROGRAMS = \ + prng-test \ + a1-trap-test \ + pdf-op-test \ + region-test \ + region-translate-test \ + combiner-test \ + pixel-test \ + fetch-test \ + rotate-test \ + oob-test \ + infinite-loop \ + trap-crasher \ + alpha-loop \ + thread-test \ + scaling-crash-test \ + scaling-helpers-test \ + gradient-crash-test \ + region-contains-test \ + alphamap \ + matrix-test \ + stress-test \ + composite-traps-test \ + blitters-test \ + glyph-test \ + scaling-test \ + affine-test \ + composite \ + $(NULL) + +# Other programs +OTHERPROGRAMS = \ + lowlevel-blt-bench \ + radial-perf-test \ + check-formats \ + scaling-bench \ + $(NULL) + +# Utility functions +libutils_sources = \ + utils.c \ + utils-prng.c \ + $(NULL) + +libutils_headers = \ + utils.h \ + utils-prng.h \ + $(NULL) diff --git a/src/pixman/test/Makefile.win32 b/src/pixman/test/Makefile.win32 new file mode 100644 index 0000000..6cfb4a7 --- /dev/null +++ b/src/pixman/test/Makefile.win32 @@ -0,0 +1,54 @@ +default: all + +top_srcdir = .. +include $(top_srcdir)/test/Makefile.sources +include $(top_srcdir)/Makefile.win32.common + +TEST_LDADD = \ + $(top_builddir)/pixman/$(CFG_VAR)/$(LIBRARY).lib \ + $(CFG_VAR)/libutils.lib \ + $(NULL) + +libutils_OBJECTS = $(patsubst %.c, $(CFG_VAR)/%.obj, $(libutils_sources)) + +SOURCES = $(patsubst %, %.c, $(TESTPROGRAMS) $(OTHERPROGRAMS)) +OBJECTS = $(patsubst %.c, $(CFG_VAR)/%.obj, $(SOURCES)) +TESTS = $(patsubst %, $(CFG_VAR)/%.exe, $(TESTPROGRAMS)) +OTHERS = $(patsubst %, $(CFG_VAR)/%.exe, $(OTHERPROGRAMS)) + +all: pixman inform $(TESTS) $(OTHERS) + +check: pixman inform $(TESTS) + @failures=0 ; \ + total=0 ; \ + for test in $(TESTS) ; \ + do \ + total=`expr $$total + 1` ; \ + if ./$$test ; \ + then echo "PASS: $$test" ; \ + else echo "FAIL: $$test" ; \ + failures=`expr $$failures + 1` ; \ + fi ; \ + done ; \ + if test $$failures -eq 0 ; \ + then banner="All $$total tests passed" ; \ + else banner="$$failures of $$total tests failed" ; \ + fi ; \ + dashes=`echo "$$banner" | sed s/./=/g`; \ + echo "$$dashes" ; \ + echo "$$banner" ; \ + echo "$$dashes" ; \ + test $$failures -eq 0 + +$(CFG_VAR)/libutils.lib: $(libutils_OBJECTS) + @$(AR) $(PIXMAN_ARFLAGS) -OUT:$@ $^ + +$(CFG_VAR)/%.exe: $(CFG_VAR)/%.obj $(TEST_LDADD) + @$(LD) $(PIXMAN_LDFLAGS) -OUT:$@ $^ + +$(top_builddir)/pixman/$(CFG_VAR)/$(LIBRARY).lib: pixman + +pixman: + @$(MAKE) -C $(top_builddir)/pixman -f Makefile.win32 + +.PHONY: all check pixman diff --git a/src/pixman/test/a1-trap-test.c b/src/pixman/test/a1-trap-test.c new file mode 100644 index 0000000..c2b4883 --- /dev/null +++ b/src/pixman/test/a1-trap-test.c @@ -0,0 +1,58 @@ +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "utils.h" + +int +main (int argc, char **argv) +{ +#define WIDTH 20 +#define HEIGHT 20 + + pixman_image_t *src_img; + pixman_image_t *mask_img; + pixman_image_t *dest_img; + pixman_trap_t trap; + pixman_color_t red = { 0xffff, 0x0000, 0x0000, 0xffff }; + uint32_t *bits = malloc (WIDTH * HEIGHT * 4); + uint32_t *mbits = malloc (WIDTH * HEIGHT); + + memset (mbits, 0, WIDTH * HEIGHT); + memset (bits, 0xff, WIDTH * HEIGHT * 4); + + trap.top.l = pixman_double_to_fixed (0.5); + trap.top.r = pixman_double_to_fixed (1.5); + trap.top.y = pixman_double_to_fixed (0.5); + + trap.bot.l = pixman_double_to_fixed (0.5); + trap.bot.r = pixman_double_to_fixed (1.5); + trap.bot.y = pixman_double_to_fixed (1.5); + + mask_img = pixman_image_create_bits ( + PIXMAN_a1, WIDTH, HEIGHT, mbits, WIDTH); + src_img = pixman_image_create_solid_fill (&red); + dest_img = pixman_image_create_bits ( + PIXMAN_a8r8g8b8, WIDTH, HEIGHT, bits, WIDTH * 4); + + pixman_add_traps (mask_img, 0, 0, 1, &trap); + + pixman_image_composite (PIXMAN_OP_OVER, + src_img, mask_img, dest_img, + 0, 0, 0, 0, 0, 0, WIDTH, HEIGHT); + + assert (bits[0] == 0xffff0000); + assert (bits[1] == 0xffffffff); + assert (bits[1 * WIDTH + 0] == 0xffffffff); + assert (bits[1 * WIDTH + 1] == 0xffffffff); + + /* The check-formats test depends on operator_name() and format_name() returning + * these precise formats, so if those change, check-formats.c must be updated too. + */ + assert ( + strcmp (operator_name (PIXMAN_OP_DISJOINT_OVER), "PIXMAN_OP_DISJOINT_OVER") == 0); + assert ( + strcmp (format_name (PIXMAN_r5g6b5), "r5g6b5") == 0); + + return 0; +} diff --git a/src/pixman/test/affine-test.c b/src/pixman/test/affine-test.c new file mode 100644 index 0000000..8e19023 --- /dev/null +++ b/src/pixman/test/affine-test.c @@ -0,0 +1,324 @@ +/* + * Test program, which can detect some problems with affine transformations + * in pixman. Testing is done by running lots of random SRC and OVER + * compositing operations a8r8g8b8, x8a8r8g8b8, r5g6b5 and a8 color formats + * with random scaled, rotated and translated transforms. + * + * Script 'fuzzer-find-diff.pl' can be used to narrow down the problem in + * the case of test failure. + */ +#include <assert.h> +#include <stdlib.h> +#include <stdio.h> +#include "utils.h" + +#define MAX_SRC_WIDTH 16 +#define MAX_SRC_HEIGHT 16 +#define MAX_DST_WIDTH 16 +#define MAX_DST_HEIGHT 16 +#define MAX_STRIDE 4 + +/* + * Composite operation with pseudorandom images + */ +uint32_t +test_composite (int testnum, + int verbose) +{ + int i; + pixman_image_t * src_img; + pixman_image_t * dst_img; + pixman_transform_t transform; + pixman_region16_t clip; + int src_width, src_height; + int dst_width, dst_height; + int src_stride, dst_stride; + int src_x, src_y; + int dst_x, dst_y; + int src_bpp; + int dst_bpp; + int w, h; + pixman_fixed_t scale_x = 65536, scale_y = 65536; + pixman_fixed_t translate_x = 0, translate_y = 0; + pixman_op_t op; + pixman_repeat_t repeat = PIXMAN_REPEAT_NONE; + pixman_format_code_t src_fmt, dst_fmt; + uint32_t * srcbuf; + uint32_t * dstbuf; + uint32_t crc32; + FLOAT_REGS_CORRUPTION_DETECTOR_START (); + + prng_srand (testnum); + + src_bpp = (prng_rand_n (2) == 0) ? 2 : 4; + dst_bpp = (prng_rand_n (2) == 0) ? 2 : 4; + op = (prng_rand_n (2) == 0) ? PIXMAN_OP_SRC : PIXMAN_OP_OVER; + + src_width = prng_rand_n (MAX_SRC_WIDTH) + 1; + src_height = prng_rand_n (MAX_SRC_HEIGHT) + 1; + dst_width = prng_rand_n (MAX_DST_WIDTH) + 1; + dst_height = prng_rand_n (MAX_DST_HEIGHT) + 1; + src_stride = src_width * src_bpp + prng_rand_n (MAX_STRIDE) * src_bpp; + dst_stride = dst_width * dst_bpp + prng_rand_n (MAX_STRIDE) * dst_bpp; + + if (src_stride & 3) + src_stride += 2; + + if (dst_stride & 3) + dst_stride += 2; + + src_x = -(src_width / 4) + prng_rand_n (src_width * 3 / 2); + src_y = -(src_height / 4) + prng_rand_n (src_height * 3 / 2); + dst_x = -(dst_width / 4) + prng_rand_n (dst_width * 3 / 2); + dst_y = -(dst_height / 4) + prng_rand_n (dst_height * 3 / 2); + w = prng_rand_n (dst_width * 3 / 2 - dst_x); + h = prng_rand_n (dst_height * 3 / 2 - dst_y); + + srcbuf = (uint32_t *)malloc (src_stride * src_height); + dstbuf = (uint32_t *)malloc (dst_stride * dst_height); + + prng_randmemset (srcbuf, src_stride * src_height, 0); + prng_randmemset (dstbuf, dst_stride * dst_height, 0); + + if (prng_rand_n (2) == 0) + { + srcbuf += (src_stride / 4) * (src_height - 1); + src_stride = - src_stride; + } + + if (prng_rand_n (2) == 0) + { + dstbuf += (dst_stride / 4) * (dst_height - 1); + dst_stride = - dst_stride; + } + + src_fmt = src_bpp == 4 ? (prng_rand_n (2) == 0 ? + PIXMAN_a8r8g8b8 : PIXMAN_x8r8g8b8) : PIXMAN_r5g6b5; + + dst_fmt = dst_bpp == 4 ? (prng_rand_n (2) == 0 ? + PIXMAN_a8r8g8b8 : PIXMAN_x8r8g8b8) : PIXMAN_r5g6b5; + + src_img = pixman_image_create_bits ( + src_fmt, src_width, src_height, srcbuf, src_stride); + + dst_img = pixman_image_create_bits ( + dst_fmt, dst_width, dst_height, dstbuf, dst_stride); + + image_endian_swap (src_img); + image_endian_swap (dst_img); + + pixman_transform_init_identity (&transform); + + if (prng_rand_n (3) > 0) + { + scale_x = -65536 * 3 + prng_rand_n (65536 * 6); + if (prng_rand_n (2)) + scale_y = -65536 * 3 + prng_rand_n (65536 * 6); + else + scale_y = scale_x; + pixman_transform_init_scale (&transform, scale_x, scale_y); + } + if (prng_rand_n (3) > 0) + { + translate_x = -65536 * 3 + prng_rand_n (6 * 65536); + if (prng_rand_n (2)) + translate_y = -65536 * 3 + prng_rand_n (6 * 65536); + else + translate_y = translate_x; + pixman_transform_translate (&transform, NULL, translate_x, translate_y); + } + + if (prng_rand_n (4) > 0) + { + int c, s, tx = 0, ty = 0; + switch (prng_rand_n (4)) + { + case 0: + /* 90 degrees */ + c = 0; + s = pixman_fixed_1; + tx = pixman_int_to_fixed (MAX_SRC_HEIGHT); + break; + case 1: + /* 180 degrees */ + c = -pixman_fixed_1; + s = 0; + tx = pixman_int_to_fixed (MAX_SRC_WIDTH); + ty = pixman_int_to_fixed (MAX_SRC_HEIGHT); + break; + case 2: + /* 270 degrees */ + c = 0; + s = -pixman_fixed_1; + ty = pixman_int_to_fixed (MAX_SRC_WIDTH); + break; + default: + /* arbitrary rotation */ + c = prng_rand_n (2 * 65536) - 65536; + s = prng_rand_n (2 * 65536) - 65536; + break; + } + pixman_transform_rotate (&transform, NULL, c, s); + pixman_transform_translate (&transform, NULL, tx, ty); + } + + if (prng_rand_n (8) == 0) + { + /* Flip random bits */ + int maxflipcount = 8; + while (maxflipcount--) + { + int i = prng_rand_n (2); + int j = prng_rand_n (3); + int bitnum = prng_rand_n (32); + transform.matrix[i][j] ^= 1 << bitnum; + if (prng_rand_n (2)) + break; + } + } + + pixman_image_set_transform (src_img, &transform); + + switch (prng_rand_n (4)) + { + case 0: + repeat = PIXMAN_REPEAT_NONE; + break; + + case 1: + repeat = PIXMAN_REPEAT_NORMAL; + break; + + case 2: + repeat = PIXMAN_REPEAT_PAD; + break; + + case 3: + repeat = PIXMAN_REPEAT_REFLECT; + break; + + default: + break; + } + pixman_image_set_repeat (src_img, repeat); + + if (prng_rand_n (2)) + pixman_image_set_filter (src_img, PIXMAN_FILTER_NEAREST, NULL, 0); + else + pixman_image_set_filter (src_img, PIXMAN_FILTER_BILINEAR, NULL, 0); + + if (verbose) + { +#define M(r,c) \ + transform.matrix[r][c] + + printf ("src_fmt=%s, dst_fmt=%s\n", format_name (src_fmt), format_name (dst_fmt)); + printf ("op=%s, repeat=%d, transform=\n", + operator_name (op), repeat); + printf (" { { { 0x%08x, 0x%08x, 0x%08x },\n" + " { 0x%08x, 0x%08x, 0x%08x },\n" + " { 0x%08x, 0x%08x, 0x%08x },\n" + " } };\n", + M(0,0), M(0,1), M(0,2), + M(1,0), M(1,1), M(1,2), + M(2,0), M(2,1), M(2,2)); + printf ("src_width=%d, src_height=%d, dst_width=%d, dst_height=%d\n", + src_width, src_height, dst_width, dst_height); + printf ("src_x=%d, src_y=%d, dst_x=%d, dst_y=%d\n", + src_x, src_y, dst_x, dst_y); + printf ("w=%d, h=%d\n", w, h); + } + + if (prng_rand_n (8) == 0) + { + pixman_box16_t clip_boxes[2]; + int n = prng_rand_n (2) + 1; + + for (i = 0; i < n; i++) + { + clip_boxes[i].x1 = prng_rand_n (src_width); + clip_boxes[i].y1 = prng_rand_n (src_height); + clip_boxes[i].x2 = + clip_boxes[i].x1 + prng_rand_n (src_width - clip_boxes[i].x1); + clip_boxes[i].y2 = + clip_boxes[i].y1 + prng_rand_n (src_height - clip_boxes[i].y1); + + if (verbose) + { + printf ("source clip box: [%d,%d-%d,%d]\n", + clip_boxes[i].x1, clip_boxes[i].y1, + clip_boxes[i].x2, clip_boxes[i].y2); + } + } + + pixman_region_init_rects (&clip, clip_boxes, n); + pixman_image_set_clip_region (src_img, &clip); + pixman_image_set_source_clipping (src_img, 1); + pixman_region_fini (&clip); + } + + if (prng_rand_n (8) == 0) + { + pixman_box16_t clip_boxes[2]; + int n = prng_rand_n (2) + 1; + for (i = 0; i < n; i++) + { + clip_boxes[i].x1 = prng_rand_n (dst_width); + clip_boxes[i].y1 = prng_rand_n (dst_height); + clip_boxes[i].x2 = + clip_boxes[i].x1 + prng_rand_n (dst_width - clip_boxes[i].x1); + clip_boxes[i].y2 = + clip_boxes[i].y1 + prng_rand_n (dst_height - clip_boxes[i].y1); + + if (verbose) + { + printf ("destination clip box: [%d,%d-%d,%d]\n", + clip_boxes[i].x1, clip_boxes[i].y1, + clip_boxes[i].x2, clip_boxes[i].y2); + } + } + pixman_region_init_rects (&clip, clip_boxes, n); + pixman_image_set_clip_region (dst_img, &clip); + pixman_region_fini (&clip); + } + + pixman_image_composite (op, src_img, NULL, dst_img, + src_x, src_y, 0, 0, dst_x, dst_y, w, h); + + crc32 = compute_crc32_for_image (0, dst_img); + + if (verbose) + print_image (dst_img); + + pixman_image_unref (src_img); + pixman_image_unref (dst_img); + + if (src_stride < 0) + srcbuf += (src_stride / 4) * (src_height - 1); + + if (dst_stride < 0) + dstbuf += (dst_stride / 4) * (dst_height - 1); + + free (srcbuf); + free (dstbuf); + + FLOAT_REGS_CORRUPTION_DETECTOR_FINISH (); + return crc32; +} + +#if BILINEAR_INTERPOLATION_BITS == 7 +#define CHECKSUM 0xBE724CFE +#elif BILINEAR_INTERPOLATION_BITS == 4 +#define CHECKSUM 0x79BBE501 +#else +#define CHECKSUM 0x00000000 +#endif + +int +main (int argc, const char *argv[]) +{ + pixman_disable_out_of_bounds_workaround (); + + return fuzzer_test_main ("affine", 8000000, CHECKSUM, + test_composite, argc, argv); +} diff --git a/src/pixman/test/alpha-loop.c b/src/pixman/test/alpha-loop.c new file mode 100644 index 0000000..4d4384d --- /dev/null +++ b/src/pixman/test/alpha-loop.c @@ -0,0 +1,35 @@ +#include <stdio.h> +#include <stdlib.h> +#include "utils.h" + +#define WIDTH 400 +#define HEIGHT 200 + +int +main (int argc, char **argv) +{ + pixman_image_t *a, *d, *s; + uint8_t *alpha; + uint32_t *src, *dest; + + prng_srand (0); + + alpha = make_random_bytes (WIDTH * HEIGHT); + src = (uint32_t *)make_random_bytes (WIDTH * HEIGHT * 4); + dest = (uint32_t *)make_random_bytes (WIDTH * HEIGHT * 4); + + a = pixman_image_create_bits (PIXMAN_a8, WIDTH, HEIGHT, (uint32_t *)alpha, WIDTH); + d = pixman_image_create_bits (PIXMAN_a8r8g8b8, WIDTH, HEIGHT, dest, WIDTH * 4); + s = pixman_image_create_bits (PIXMAN_a2r10g10b10, WIDTH, HEIGHT, src, WIDTH * 4); + + fail_after (5, "Infinite loop detected: 5 seconds without progress\n"); + + pixman_image_set_alpha_map (s, a, 0, 0); + pixman_image_set_alpha_map (a, s, 0, 0); + + pixman_image_composite (PIXMAN_OP_SRC, s, NULL, d, 0, 0, 0, 0, 0, 0, WIDTH, HEIGHT); + + pixman_image_unref (s); + + return 0; +} diff --git a/src/pixman/test/alphamap.c b/src/pixman/test/alphamap.c new file mode 100644 index 0000000..4d09076 --- /dev/null +++ b/src/pixman/test/alphamap.c @@ -0,0 +1,315 @@ +#include <stdio.h> +#include <stdlib.h> +#include "utils.h" + +#define WIDTH 48 +#define HEIGHT 48 + +static const pixman_format_code_t formats[] = +{ + PIXMAN_a8r8g8b8, + PIXMAN_a2r10g10b10, + PIXMAN_a4r4g4b4, + PIXMAN_a8 +}; + +static const pixman_format_code_t alpha_formats[] = +{ + PIXMAN_null, + PIXMAN_a8, + PIXMAN_a2r10g10b10, + PIXMAN_a4r4g4b4 +}; + +static const int origins[] = +{ + 0, 10, -100 +}; + +static void +on_destroy (pixman_image_t *image, void *data) +{ + uint32_t *bits = pixman_image_get_data (image); + + fence_free (bits); +} + +static pixman_image_t * +make_image (pixman_format_code_t format) +{ + uint32_t *bits; + uint8_t bpp = PIXMAN_FORMAT_BPP (format) / 8; + pixman_image_t *image; + + bits = (uint32_t *)make_random_bytes (WIDTH * HEIGHT * bpp); + + image = pixman_image_create_bits (format, WIDTH, HEIGHT, bits, WIDTH * bpp); + + if (image && bits) + pixman_image_set_destroy_function (image, on_destroy, NULL); + + return image; +} + +static uint8_t +get_alpha (pixman_image_t *image, int x, int y, int orig_x, int orig_y) +{ + uint8_t *bits; + uint8_t r; + + if (image->common.alpha_map) + { + if (x - orig_x >= 0 && x - orig_x < WIDTH && + y - orig_y >= 0 && y - orig_y < HEIGHT) + { + image = (pixman_image_t *)image->common.alpha_map; + + x -= orig_x; + y -= orig_y; + } + else + { + return 0; + } + } + + bits = (uint8_t *)image->bits.bits; + + if (image->bits.format == PIXMAN_a8) + { + r = bits[y * WIDTH + x]; + } + else if (image->bits.format == PIXMAN_a2r10g10b10) + { + r = ((uint32_t *)bits)[y * WIDTH + x] >> 30; + r |= r << 2; + r |= r << 4; + } + else if (image->bits.format == PIXMAN_a8r8g8b8) + { + r = ((uint32_t *)bits)[y * WIDTH + x] >> 24; + } + else if (image->bits.format == PIXMAN_a4r4g4b4) + { + r = ((uint16_t *)bits)[y * WIDTH + x] >> 12; + r |= r << 4; + } + else + { + assert (0); + } + + return r; +} + +static uint16_t +get_red (pixman_image_t *image, int x, int y, int orig_x, int orig_y) +{ + uint8_t *bits; + uint16_t r; + + bits = (uint8_t *)image->bits.bits; + + if (image->bits.format == PIXMAN_a8) + { + r = 0x00; + } + else if (image->bits.format == PIXMAN_a2r10g10b10) + { + r = ((uint32_t *)bits)[y * WIDTH + x] >> 14; + r &= 0xffc0; + r |= (r >> 10); + } + else if (image->bits.format == PIXMAN_a8r8g8b8) + { + r = ((uint32_t *)bits)[y * WIDTH + x] >> 16; + r &= 0xff; + r |= r << 8; + } + else if (image->bits.format == PIXMAN_a4r4g4b4) + { + r = ((uint16_t *)bits)[y * WIDTH + x] >> 8; + r &= 0xf; + r |= r << 4; + r |= r << 8; + } + else + { + assert (0); + } + + return r; +} + +static int +run_test (int s, int d, int sa, int da, int soff, int doff) +{ + pixman_format_code_t sf = formats[s]; + pixman_format_code_t df = formats[d]; + pixman_format_code_t saf = alpha_formats[sa]; + pixman_format_code_t daf = alpha_formats[da]; + pixman_image_t *src, *dst, *orig_dst, *alpha, *orig_alpha; + pixman_transform_t t1; + int j, k; + int n_alpha_bits, n_red_bits; + + soff = origins[soff]; + doff = origins[doff]; + + n_alpha_bits = PIXMAN_FORMAT_A (df); + if (daf != PIXMAN_null) + n_alpha_bits = PIXMAN_FORMAT_A (daf); + + n_red_bits = PIXMAN_FORMAT_R (df); + + /* Source */ + src = make_image (sf); + if (saf != PIXMAN_null) + { + alpha = make_image (saf); + pixman_image_set_alpha_map (src, alpha, soff, soff); + pixman_image_unref (alpha); + } + + /* Destination */ + orig_dst = make_image (df); + dst = make_image (df); + pixman_image_composite (PIXMAN_OP_SRC, orig_dst, NULL, dst, + 0, 0, 0, 0, 0, 0, WIDTH, HEIGHT); + + if (daf != PIXMAN_null) + { + orig_alpha = make_image (daf); + alpha = make_image (daf); + + pixman_image_composite (PIXMAN_OP_SRC, orig_alpha, NULL, alpha, + 0, 0, 0, 0, 0, 0, WIDTH, HEIGHT); + + pixman_image_set_alpha_map (orig_dst, orig_alpha, doff, doff); + pixman_image_set_alpha_map (dst, alpha, doff, doff); + + pixman_image_unref (orig_alpha); + pixman_image_unref (alpha); + } + + /* Transformations, repeats and filters on destinations should be ignored, + * so just set some random ones. + */ + pixman_transform_init_identity (&t1); + pixman_transform_scale (&t1, NULL, pixman_int_to_fixed (100), pixman_int_to_fixed (11)); + pixman_transform_rotate (&t1, NULL, pixman_double_to_fixed (0.5), pixman_double_to_fixed (0.11)); + pixman_transform_translate (&t1, NULL, pixman_int_to_fixed (11), pixman_int_to_fixed (17)); + + pixman_image_set_transform (dst, &t1); + pixman_image_set_filter (dst, PIXMAN_FILTER_BILINEAR, NULL, 0); + pixman_image_set_repeat (dst, PIXMAN_REPEAT_REFLECT); + + pixman_image_composite (PIXMAN_OP_ADD, src, NULL, dst, + 0, 0, 0, 0, 0, 0, WIDTH, HEIGHT); + + for (j = MAX (doff, 0); j < MIN (HEIGHT, HEIGHT + doff); ++j) + { + for (k = MAX (doff, 0); k < MIN (WIDTH, WIDTH + doff); ++k) + { + uint8_t sa, da, oda, refa; + uint16_t sr, dr, odr, refr; + + sa = get_alpha (src, k, j, soff, soff); + da = get_alpha (dst, k, j, doff, doff); + oda = get_alpha (orig_dst, k, j, doff, doff); + + if (sa + oda > 255) + refa = 255; + else + refa = sa + oda; + + if (da >> (8 - n_alpha_bits) != refa >> (8 - n_alpha_bits)) + { + printf ("\nWrong alpha value at (%d, %d). Should be 0x%x; got 0x%x. Source was 0x%x, original dest was 0x%x\n", + k, j, refa, da, sa, oda); + + printf ("src: %s, alpha: %s, origin %d %d\ndst: %s, alpha: %s, origin: %d %d\n\n", + format_name (sf), + format_name (saf), + soff, soff, + format_name (df), + format_name (daf), + doff, doff); + return 1; + } + + /* There are cases where we go through the 8 bit compositing + * path even with 10bpc formats. This results in incorrect + * results here, so only do the red check for narrow formats + */ + if (n_red_bits <= 8) + { + sr = get_red (src, k, j, soff, soff); + dr = get_red (dst, k, j, doff, doff); + odr = get_red (orig_dst, k, j, doff, doff); + + if (sr + odr > 0xffff) + refr = 0xffff; + else + refr = sr + odr; + + if (abs ((dr >> (16 - n_red_bits)) - (refr >> (16 - n_red_bits))) > 1) + { + printf ("%d red bits\n", n_red_bits); + printf ("\nWrong red value at (%d, %d). Should be 0x%x; got 0x%x. Source was 0x%x, original dest was 0x%x\n", + k, j, refr, dr, sr, odr); + + printf ("src: %s, alpha: %s, origin %d %d\ndst: %s, alpha: %s, origin: %d %d\n\n", + format_name (sf), + format_name (saf), + soff, soff, + format_name (df), + format_name (daf), + doff, doff); + return 1; + } + } + } + } + + pixman_image_set_alpha_map (src, NULL, 0, 0); + pixman_image_set_alpha_map (dst, NULL, 0, 0); + pixman_image_set_alpha_map (orig_dst, NULL, 0, 0); + + pixman_image_unref (src); + pixman_image_unref (dst); + pixman_image_unref (orig_dst); + + return 0; +} + +int +main (int argc, char **argv) +{ + int i, j, a, b, x, y; + + prng_srand (0); + + for (i = 0; i < ARRAY_LENGTH (formats); ++i) + { + for (j = 0; j < ARRAY_LENGTH (formats); ++j) + { + for (a = 0; a < ARRAY_LENGTH (alpha_formats); ++a) + { + for (b = 0; b < ARRAY_LENGTH (alpha_formats); ++b) + { + for (x = 0; x < ARRAY_LENGTH (origins); ++x) + { + for (y = 0; y < ARRAY_LENGTH (origins); ++y) + { + if (run_test (i, j, a, b, x, y) != 0) + return 1; + } + } + } + } + } + } + + return 0; +} diff --git a/src/pixman/test/blitters-test.c b/src/pixman/test/blitters-test.c new file mode 100644 index 0000000..ea03f47 --- /dev/null +++ b/src/pixman/test/blitters-test.c @@ -0,0 +1,399 @@ +/* + * Test program, which stresses the use of different color formats and + * compositing operations. + * + * Script 'fuzzer-find-diff.pl' can be used to narrow down the problem in + * the case of test failure. + */ +#include <stdlib.h> +#include <stdio.h> +#include "utils.h" + +static pixman_indexed_t rgb_palette[9]; +static pixman_indexed_t y_palette[9]; + +/* The first eight format in the list are by far the most widely + * used formats, so we test those more than the others + */ +#define N_MOST_LIKELY_FORMATS 8 + +/* Create random image for testing purposes */ +static pixman_image_t * +create_random_image (pixman_format_code_t *allowed_formats, + int max_width, + int max_height, + int max_extra_stride, + pixman_format_code_t *used_fmt) +{ + int n = 0, width, height, stride; + pixman_format_code_t fmt; + uint32_t *buf; + pixman_image_t *img; + + while (allowed_formats[n] != PIXMAN_null) + n++; + + if (n > N_MOST_LIKELY_FORMATS && prng_rand_n (4) != 0) + n = N_MOST_LIKELY_FORMATS; + fmt = allowed_formats[prng_rand_n (n)]; + + width = prng_rand_n (max_width) + 1; + height = prng_rand_n (max_height) + 1; + stride = (width * PIXMAN_FORMAT_BPP (fmt) + 7) / 8 + + prng_rand_n (max_extra_stride + 1); + stride = (stride + 3) & ~3; + + /* do the allocation */ + buf = aligned_malloc (64, stride * height); + + if (prng_rand_n (4) == 0) + { + /* uniform distribution */ + prng_randmemset (buf, stride * height, 0); + } + else + { + /* significantly increased probability for 0x00 and 0xFF */ + prng_randmemset (buf, stride * height, RANDMEMSET_MORE_00_AND_FF); + } + + /* test negative stride */ + if (prng_rand_n (4) == 0) + { + buf += (stride / 4) * (height - 1); + stride = - stride; + } + + img = pixman_image_create_bits (fmt, width, height, buf, stride); + + if (PIXMAN_FORMAT_TYPE (fmt) == PIXMAN_TYPE_COLOR) + { + pixman_image_set_indexed (img, &(rgb_palette[PIXMAN_FORMAT_BPP (fmt)])); + } + else if (PIXMAN_FORMAT_TYPE (fmt) == PIXMAN_TYPE_GRAY) + { + pixman_image_set_indexed (img, &(y_palette[PIXMAN_FORMAT_BPP (fmt)])); + } + + if (prng_rand_n (16) == 0) + pixman_image_set_filter (img, PIXMAN_FILTER_BILINEAR, NULL, 0); + + image_endian_swap (img); + + if (used_fmt) *used_fmt = fmt; + return img; +} + +/* Free random image, and optionally update crc32 based on its data */ +static uint32_t +free_random_image (uint32_t initcrc, + pixman_image_t *img, + pixman_format_code_t fmt) +{ + uint32_t crc32 = 0; + uint32_t *data = pixman_image_get_data (img); + + if (fmt != PIXMAN_null) + crc32 = compute_crc32_for_image (initcrc, img); + + if (img->bits.rowstride < 0) + data += img->bits.rowstride * (img->bits.height - 1); + + pixman_image_unref (img); + free (data); + + return crc32; +} + +static pixman_op_t op_list[] = { + PIXMAN_OP_SRC, + PIXMAN_OP_OVER, + PIXMAN_OP_ADD, + PIXMAN_OP_CLEAR, + PIXMAN_OP_SRC, + PIXMAN_OP_DST, + PIXMAN_OP_OVER, + PIXMAN_OP_OVER_REVERSE, + PIXMAN_OP_IN, + PIXMAN_OP_IN_REVERSE, + PIXMAN_OP_OUT, + PIXMAN_OP_OUT_REVERSE, + PIXMAN_OP_ATOP, + PIXMAN_OP_ATOP_REVERSE, + PIXMAN_OP_XOR, + PIXMAN_OP_ADD, + PIXMAN_OP_SATURATE, + PIXMAN_OP_DISJOINT_CLEAR, + PIXMAN_OP_DISJOINT_SRC, + PIXMAN_OP_DISJOINT_DST, + PIXMAN_OP_DISJOINT_OVER, + PIXMAN_OP_DISJOINT_OVER_REVERSE, + PIXMAN_OP_DISJOINT_IN, + PIXMAN_OP_DISJOINT_IN_REVERSE, + PIXMAN_OP_DISJOINT_OUT, + PIXMAN_OP_DISJOINT_OUT_REVERSE, + PIXMAN_OP_DISJOINT_ATOP, + PIXMAN_OP_DISJOINT_ATOP_REVERSE, + PIXMAN_OP_DISJOINT_XOR, + PIXMAN_OP_CONJOINT_CLEAR, + PIXMAN_OP_CONJOINT_SRC, + PIXMAN_OP_CONJOINT_DST, + PIXMAN_OP_CONJOINT_OVER, + PIXMAN_OP_CONJOINT_OVER_REVERSE, + PIXMAN_OP_CONJOINT_IN, + PIXMAN_OP_CONJOINT_IN_REVERSE, + PIXMAN_OP_CONJOINT_OUT, + PIXMAN_OP_CONJOINT_OUT_REVERSE, + PIXMAN_OP_CONJOINT_ATOP, + PIXMAN_OP_CONJOINT_ATOP_REVERSE, + PIXMAN_OP_CONJOINT_XOR, + PIXMAN_OP_MULTIPLY, + PIXMAN_OP_SCREEN, + PIXMAN_OP_OVERLAY, + PIXMAN_OP_DARKEN, + PIXMAN_OP_LIGHTEN, + PIXMAN_OP_COLOR_DODGE, + PIXMAN_OP_COLOR_BURN, + PIXMAN_OP_HARD_LIGHT, + PIXMAN_OP_DIFFERENCE, + PIXMAN_OP_EXCLUSION, +#if 0 /* these use floating point math and are not always bitexact on different platforms */ + PIXMAN_OP_SOFT_LIGHT, + PIXMAN_OP_HSL_HUE, + PIXMAN_OP_HSL_SATURATION, + PIXMAN_OP_HSL_COLOR, + PIXMAN_OP_HSL_LUMINOSITY, +#endif +}; + +static pixman_format_code_t img_fmt_list[] = { + PIXMAN_a8r8g8b8, + PIXMAN_a8b8g8r8, + PIXMAN_x8r8g8b8, + PIXMAN_x8b8g8r8, + PIXMAN_r5g6b5, + PIXMAN_b5g6r5, + PIXMAN_a8, + PIXMAN_a1, + PIXMAN_r3g3b2, + PIXMAN_b8g8r8a8, + PIXMAN_b8g8r8x8, + PIXMAN_r8g8b8a8, + PIXMAN_r8g8b8x8, + PIXMAN_x14r6g6b6, + PIXMAN_r8g8b8, + PIXMAN_b8g8r8, +#if 0 /* These are going to use floating point in the near future */ + PIXMAN_x2r10g10b10, + PIXMAN_a2r10g10b10, + PIXMAN_x2b10g10r10, + PIXMAN_a2b10g10r10, +#endif + PIXMAN_a1r5g5b5, + PIXMAN_x1r5g5b5, + PIXMAN_a1b5g5r5, + PIXMAN_x1b5g5r5, + PIXMAN_a4r4g4b4, + PIXMAN_x4r4g4b4, + PIXMAN_a4b4g4r4, + PIXMAN_x4b4g4r4, + PIXMAN_r3g3b2, + PIXMAN_b2g3r3, + PIXMAN_a2r2g2b2, + PIXMAN_a2b2g2r2, + PIXMAN_c8, + PIXMAN_g8, + PIXMAN_x4c4, + PIXMAN_x4g4, + PIXMAN_c4, + PIXMAN_g4, + PIXMAN_g1, + PIXMAN_x4a4, + PIXMAN_a4, + PIXMAN_r1g2b1, + PIXMAN_b1g2r1, + PIXMAN_a1r1g1b1, + PIXMAN_a1b1g1r1, + PIXMAN_null +}; + +static pixman_format_code_t mask_fmt_list[] = { + PIXMAN_a8r8g8b8, + PIXMAN_a8, + PIXMAN_a4, + PIXMAN_a1, + PIXMAN_null +}; + + +/* + * Composite operation with pseudorandom images + */ +uint32_t +test_composite (int testnum, int verbose) +{ + pixman_image_t *src_img = NULL; + pixman_image_t *dst_img = NULL; + pixman_image_t *mask_img = NULL; + int src_width, src_height; + int dst_width, dst_height; + int src_stride, dst_stride; + int src_x, src_y; + int dst_x, dst_y; + int mask_x, mask_y; + int w, h; + pixman_op_t op; + pixman_format_code_t src_fmt, dst_fmt, mask_fmt; + uint32_t *srcbuf, *maskbuf; + uint32_t crc32; + int max_width, max_height, max_extra_stride; + FLOAT_REGS_CORRUPTION_DETECTOR_START (); + + max_width = max_height = 24 + testnum / 10000; + max_extra_stride = 4 + testnum / 1000000; + + if (max_width > 256) + max_width = 256; + + if (max_height > 16) + max_height = 16; + + if (max_extra_stride > 8) + max_extra_stride = 8; + + prng_srand (testnum); + + op = op_list[prng_rand_n (ARRAY_LENGTH (op_list))]; + + if (prng_rand_n (8)) + { + /* normal image */ + src_img = create_random_image (img_fmt_list, max_width, max_height, + max_extra_stride, &src_fmt); + } + else + { + /* solid case */ + src_img = create_random_image (img_fmt_list, 1, 1, + max_extra_stride, &src_fmt); + + pixman_image_set_repeat (src_img, PIXMAN_REPEAT_NORMAL); + } + + dst_img = create_random_image (img_fmt_list, max_width, max_height, + max_extra_stride, &dst_fmt); + + src_width = pixman_image_get_width (src_img); + src_height = pixman_image_get_height (src_img); + src_stride = pixman_image_get_stride (src_img); + + dst_width = pixman_image_get_width (dst_img); + dst_height = pixman_image_get_height (dst_img); + dst_stride = pixman_image_get_stride (dst_img); + + srcbuf = pixman_image_get_data (src_img); + + src_x = prng_rand_n (src_width); + src_y = prng_rand_n (src_height); + dst_x = prng_rand_n (dst_width); + dst_y = prng_rand_n (dst_height); + + mask_img = NULL; + mask_fmt = PIXMAN_null; + mask_x = 0; + mask_y = 0; + maskbuf = NULL; + + if ((src_fmt == PIXMAN_x8r8g8b8 || src_fmt == PIXMAN_x8b8g8r8) && + (prng_rand_n (4) == 0)) + { + /* PIXBUF */ + mask_fmt = prng_rand_n (2) ? PIXMAN_a8r8g8b8 : PIXMAN_a8b8g8r8; + mask_img = pixman_image_create_bits (mask_fmt, + src_width, + src_height, + srcbuf, + src_stride); + mask_x = src_x; + mask_y = src_y; + maskbuf = srcbuf; + } + else if (prng_rand_n (2)) + { + if (prng_rand_n (2)) + { + mask_img = create_random_image (mask_fmt_list, max_width, max_height, + max_extra_stride, &mask_fmt); + } + else + { + /* solid case */ + mask_img = create_random_image (mask_fmt_list, 1, 1, + max_extra_stride, &mask_fmt); + pixman_image_set_repeat (mask_img, PIXMAN_REPEAT_NORMAL); + } + + if (prng_rand_n (2)) + pixman_image_set_component_alpha (mask_img, 1); + + mask_x = prng_rand_n (pixman_image_get_width (mask_img)); + mask_y = prng_rand_n (pixman_image_get_height (mask_img)); + } + + + w = prng_rand_n (dst_width - dst_x + 1); + h = prng_rand_n (dst_height - dst_y + 1); + + if (verbose) + { + printf ("op=%s\n", operator_name (op)); + printf ("src_fmt=%s, dst_fmt=%s, mask_fmt=%s\n", + format_name (src_fmt), format_name (dst_fmt), + format_name (mask_fmt)); + printf ("src_width=%d, src_height=%d, dst_width=%d, dst_height=%d\n", + src_width, src_height, dst_width, dst_height); + printf ("src_x=%d, src_y=%d, dst_x=%d, dst_y=%d\n", + src_x, src_y, dst_x, dst_y); + printf ("src_stride=%d, dst_stride=%d\n", + src_stride, dst_stride); + printf ("w=%d, h=%d\n", w, h); + } + + pixman_image_composite (op, src_img, mask_img, dst_img, + src_x, src_y, mask_x, mask_y, dst_x, dst_y, w, h); + + if (verbose) + print_image (dst_img); + + free_random_image (0, src_img, PIXMAN_null); + crc32 = free_random_image (0, dst_img, dst_fmt); + + if (mask_img) + { + if (srcbuf == maskbuf) + pixman_image_unref(mask_img); + else + free_random_image (0, mask_img, PIXMAN_null); + } + + FLOAT_REGS_CORRUPTION_DETECTOR_FINISH (); + return crc32; +} + +int +main (int argc, const char *argv[]) +{ + int i; + + prng_srand (0); + + for (i = 1; i <= 8; i++) + { + initialize_palette (&(rgb_palette[i]), i, TRUE); + initialize_palette (&(y_palette[i]), i, FALSE); + } + + return fuzzer_test_main("blitters", 2000000, + 0xE0A07495, + test_composite, argc, argv); +} diff --git a/src/pixman/test/check-formats.c b/src/pixman/test/check-formats.c new file mode 100644 index 0000000..7edc198 --- /dev/null +++ b/src/pixman/test/check-formats.c @@ -0,0 +1,352 @@ +#include <ctype.h> +#include "utils.h" + +static int +check_op (pixman_op_t op, + pixman_format_code_t src_format, + pixman_format_code_t dest_format) +{ + uint32_t src_alpha_mask, src_green_mask; + uint32_t dest_alpha_mask, dest_green_mask; + pixel_checker_t src_checker, dest_checker; + pixman_image_t *si, *di; + uint32_t sa, sg, da, dg; + uint32_t s, d; + int retval = 0; + + pixel_checker_init (&src_checker, src_format); + pixel_checker_init (&dest_checker, dest_format); + + pixel_checker_get_masks ( + &src_checker, &src_alpha_mask, NULL, &src_green_mask, NULL); + pixel_checker_get_masks ( + &dest_checker, &dest_alpha_mask, NULL, &dest_green_mask, NULL); + + /* printf ("masks: %x %x %x %x\n", */ + /* src_alpha_mask, src_green_mask, */ + /* dest_alpha_mask, dest_green_mask); */ + + si = pixman_image_create_bits (src_format, 1, 1, &s, 4); + di = pixman_image_create_bits (dest_format, 1, 1, &d, 4); + + sa = 0; + do + { + sg = 0; + do + { + da = 0; + do + { + dg = 0; + do + { + color_t src_color, dest_color, result_color; + uint32_t orig_d; + + s = sa | sg; + d = da | dg; + + orig_d = d; + + pixel_checker_convert_pixel_to_color (&src_checker, s, &src_color); + pixel_checker_convert_pixel_to_color (&dest_checker, d, &dest_color); + + do_composite (op, &src_color, NULL, &dest_color, &result_color, FALSE); + + + if (!is_little_endian()) + { + s <<= 32 - PIXMAN_FORMAT_BPP (src_format); + d <<= 32 - PIXMAN_FORMAT_BPP (dest_format); + } + + pixman_image_composite32 (op, si, NULL, di, + 0, 0, 0, 0, 0, 0, 1, 1); + + if (!is_little_endian()) + d >>= (32 - PIXMAN_FORMAT_BPP (dest_format)); + + if (!pixel_checker_check (&dest_checker, d, &result_color)) + { + printf ("---- test failed ----\n"); + printf ("operator: %-32s\n", operator_name (op)); + printf ("source: %-12s pixel: %08x\n", format_name (src_format), s); + printf ("dest: %-12s pixel: %08x\n", format_name (dest_format), orig_d); + printf ("got: %-12s pixel: %08x\n", format_name (dest_format), d); + + retval = 1; + } + + dg -= dest_green_mask; + dg &= dest_green_mask; + } + while (dg != 0); + + da -= dest_alpha_mask; + da &= dest_alpha_mask; + } + while (da != 0); + + sg -= src_green_mask; + sg &= src_green_mask; + } + while (sg != 0); + + sa -= src_alpha_mask; + sa &= src_alpha_mask; + } + while (sa != 0); + + pixman_image_unref (si); + pixman_image_unref (di); + + return retval; +} + +static const pixman_op_t op_list[] = +{ + PIXMAN_OP_CLEAR, + PIXMAN_OP_SRC, + PIXMAN_OP_DST, + PIXMAN_OP_OVER, + PIXMAN_OP_OVER_REVERSE, + PIXMAN_OP_IN, + PIXMAN_OP_IN_REVERSE, + PIXMAN_OP_OUT, + PIXMAN_OP_OUT_REVERSE, + PIXMAN_OP_ATOP, + PIXMAN_OP_ATOP_REVERSE, + PIXMAN_OP_XOR, + PIXMAN_OP_ADD, + PIXMAN_OP_SATURATE, + + PIXMAN_OP_DISJOINT_CLEAR, + PIXMAN_OP_DISJOINT_SRC, + PIXMAN_OP_DISJOINT_DST, + PIXMAN_OP_DISJOINT_OVER, + PIXMAN_OP_DISJOINT_OVER_REVERSE, + PIXMAN_OP_DISJOINT_IN, + PIXMAN_OP_DISJOINT_IN_REVERSE, + PIXMAN_OP_DISJOINT_OUT, + PIXMAN_OP_DISJOINT_OUT_REVERSE, + PIXMAN_OP_DISJOINT_ATOP, + PIXMAN_OP_DISJOINT_ATOP_REVERSE, + PIXMAN_OP_DISJOINT_XOR, + + PIXMAN_OP_CONJOINT_CLEAR, + PIXMAN_OP_CONJOINT_SRC, + PIXMAN_OP_CONJOINT_DST, + PIXMAN_OP_CONJOINT_OVER, + PIXMAN_OP_CONJOINT_OVER_REVERSE, + PIXMAN_OP_CONJOINT_IN, + PIXMAN_OP_CONJOINT_IN_REVERSE, + PIXMAN_OP_CONJOINT_OUT, + PIXMAN_OP_CONJOINT_OUT_REVERSE, + PIXMAN_OP_CONJOINT_ATOP, + PIXMAN_OP_CONJOINT_ATOP_REVERSE, + PIXMAN_OP_CONJOINT_XOR, +}; + +static const pixman_format_code_t format_list[] = +{ + PIXMAN_a8r8g8b8, + PIXMAN_x8r8g8b8, + PIXMAN_a8b8g8r8, + PIXMAN_x8b8g8r8, + PIXMAN_b8g8r8a8, + PIXMAN_b8g8r8x8, + PIXMAN_r8g8b8a8, + PIXMAN_r8g8b8x8, + PIXMAN_x14r6g6b6, + PIXMAN_x2r10g10b10, + PIXMAN_a2r10g10b10, + PIXMAN_x2b10g10r10, + PIXMAN_a2b10g10r10, + PIXMAN_a8r8g8b8_sRGB, + PIXMAN_r8g8b8, + PIXMAN_b8g8r8, + PIXMAN_r5g6b5, + PIXMAN_b5g6r5, + PIXMAN_a1r5g5b5, + PIXMAN_x1r5g5b5, + PIXMAN_a1b5g5r5, + PIXMAN_x1b5g5r5, + PIXMAN_a4r4g4b4, + PIXMAN_x4r4g4b4, + PIXMAN_a4b4g4r4, + PIXMAN_x4b4g4r4, + PIXMAN_a8, + PIXMAN_r3g3b2, + PIXMAN_b2g3r3, + PIXMAN_a2r2g2b2, + PIXMAN_a2b2g2r2, + PIXMAN_x4a4, + PIXMAN_a4, + PIXMAN_r1g2b1, + PIXMAN_b1g2r1, + PIXMAN_a1r1g1b1, + PIXMAN_a1b1g1r1, + PIXMAN_a1, +}; + +static pixman_format_code_t +format_from_string (const char *s) +{ + int i; + + for (i = 0; i < ARRAY_LENGTH (format_list); ++i) + { + if (strcasecmp (format_name (format_list[i]), s) == 0) + return format_list[i]; + } + + return PIXMAN_null; +} + +static void +emit (const char *s, int *n_chars) +{ + *n_chars += printf ("%s,", s); + if (*n_chars > 60) + { + printf ("\n "); + *n_chars = 0; + } + else + { + printf (" "); + (*n_chars)++; + } +} + +static void +list_formats (void) +{ + int n_chars; + int i; + + printf ("Formats:\n "); + + n_chars = 0; + for (i = 0; i < ARRAY_LENGTH (format_list); ++i) + emit (format_name (format_list[i]), &n_chars); + + printf ("\n\n"); +} + +static void +list_operators (void) +{ + char short_name [128] = { 0 }; + int i, n_chars; + + printf ("Operators:\n "); + + n_chars = 0; + for (i = 0; i < ARRAY_LENGTH (op_list); ++i) + { + pixman_op_t op = op_list[i]; + int j; + + snprintf (short_name, sizeof (short_name) - 1, "%s", + operator_name (op) + strlen ("PIXMAN_OP_")); + + for (j = 0; short_name[j] != '\0'; ++j) + short_name[j] = tolower (short_name[j]); + + emit (short_name, &n_chars); + } + + printf ("\n\n"); +} + +static pixman_op_t +operator_from_string (const char *s) +{ + char full_name[128] = { 0 }; + int i; + + snprintf (full_name, (sizeof full_name) - 1, "PIXMAN_OP_%s", s); + + for (i = 0; i < ARRAY_LENGTH (op_list); ++i) + { + pixman_op_t op = op_list[i]; + + if (strcasecmp (operator_name (op), full_name) == 0) + return op; + } + + return PIXMAN_OP_NONE; +} + +int +main (int argc, char **argv) +{ + enum { OPTION_OP, OPTION_SRC, OPTION_DEST, LAST_OPTION } option; + pixman_format_code_t src_fmt, dest_fmt; + pixman_op_t op; + + op = PIXMAN_OP_NONE; + src_fmt = PIXMAN_null; + dest_fmt = PIXMAN_null; + + argc--; + argv++; + + for (option = OPTION_OP; option < LAST_OPTION; ++option) + { + char *arg = NULL; + + if (argc) + { + argc--; + arg = *argv++; + } + + switch (option) + { + case OPTION_OP: + if (!arg) + printf (" - missing operator\n"); + else if ((op = operator_from_string (arg)) == PIXMAN_OP_NONE) + printf (" - unknown operator %s\n", arg); + break; + + case OPTION_SRC: + if (!arg) + printf (" - missing source format\n"); + else if ((src_fmt = format_from_string (arg)) == PIXMAN_null) + printf (" - unknown source format %s\n", arg); + break; + + case OPTION_DEST: + if (!arg) + printf (" - missing destination format\n"); + else if ((dest_fmt = format_from_string (arg)) == PIXMAN_null) + printf (" - unknown destination format %s\n", arg); + break; + + default: + assert (0); + break; + } + } + + while (argc--) + { + op = PIXMAN_OP_NONE; + printf (" - unexpected argument: %s\n", *argv++); + } + + if (op == PIXMAN_OP_NONE || src_fmt == PIXMAN_null || dest_fmt == PIXMAN_null) + { + printf ("\nUsage:\n check-formats <operator> <src-format> <dest-format>\n\n"); + list_operators(); + list_formats(); + + return -1; + } + + return check_op (op, src_fmt, dest_fmt); +} diff --git a/src/pixman/test/combiner-test.c b/src/pixman/test/combiner-test.c new file mode 100644 index 0000000..01f63a5 --- /dev/null +++ b/src/pixman/test/combiner-test.c @@ -0,0 +1,151 @@ +#include <stdio.h> +#include <stdlib.h> +#include "utils.h" +#include <sys/types.h> +#include "pixman-private.h" + +static const pixman_op_t op_list[] = +{ + PIXMAN_OP_SRC, + PIXMAN_OP_OVER, + PIXMAN_OP_ADD, + PIXMAN_OP_CLEAR, + PIXMAN_OP_SRC, + PIXMAN_OP_DST, + PIXMAN_OP_OVER, + PIXMAN_OP_OVER_REVERSE, + PIXMAN_OP_IN, + PIXMAN_OP_IN_REVERSE, + PIXMAN_OP_OUT, + PIXMAN_OP_OUT_REVERSE, + PIXMAN_OP_ATOP, + PIXMAN_OP_ATOP_REVERSE, + PIXMAN_OP_XOR, + PIXMAN_OP_ADD, + PIXMAN_OP_SATURATE, + PIXMAN_OP_DISJOINT_CLEAR, + PIXMAN_OP_DISJOINT_SRC, + PIXMAN_OP_DISJOINT_DST, + PIXMAN_OP_DISJOINT_OVER, + PIXMAN_OP_DISJOINT_OVER_REVERSE, + PIXMAN_OP_DISJOINT_IN, + PIXMAN_OP_DISJOINT_IN_REVERSE, + PIXMAN_OP_DISJOINT_OUT, + PIXMAN_OP_DISJOINT_OUT_REVERSE, + PIXMAN_OP_DISJOINT_ATOP, + PIXMAN_OP_DISJOINT_ATOP_REVERSE, + PIXMAN_OP_DISJOINT_XOR, + PIXMAN_OP_CONJOINT_CLEAR, + PIXMAN_OP_CONJOINT_SRC, + PIXMAN_OP_CONJOINT_DST, + PIXMAN_OP_CONJOINT_OVER, + PIXMAN_OP_CONJOINT_OVER_REVERSE, + PIXMAN_OP_CONJOINT_IN, + PIXMAN_OP_CONJOINT_IN_REVERSE, + PIXMAN_OP_CONJOINT_OUT, + PIXMAN_OP_CONJOINT_OUT_REVERSE, + PIXMAN_OP_CONJOINT_ATOP, + PIXMAN_OP_CONJOINT_ATOP_REVERSE, + PIXMAN_OP_CONJOINT_XOR, + PIXMAN_OP_MULTIPLY, + PIXMAN_OP_SCREEN, + PIXMAN_OP_OVERLAY, + PIXMAN_OP_DARKEN, + PIXMAN_OP_LIGHTEN, + PIXMAN_OP_COLOR_DODGE, + PIXMAN_OP_COLOR_BURN, + PIXMAN_OP_HARD_LIGHT, + PIXMAN_OP_DIFFERENCE, + PIXMAN_OP_EXCLUSION, + PIXMAN_OP_SOFT_LIGHT, + PIXMAN_OP_HSL_HUE, + PIXMAN_OP_HSL_SATURATION, + PIXMAN_OP_HSL_COLOR, + PIXMAN_OP_HSL_LUMINOSITY, +}; + +static float +rand_float (void) +{ + uint32_t u = prng_rand(); + + return *(float *)&u; +} + +static void +random_floats (argb_t *argb, int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + argb_t *p = argb + i; + + p->a = rand_float(); + p->r = rand_float(); + p->g = rand_float(); + p->b = rand_float(); + } +} + +#define WIDTH 512 + +static pixman_combine_float_func_t +lookup_combiner (pixman_implementation_t *imp, pixman_op_t op, + pixman_bool_t component_alpha) +{ + pixman_combine_float_func_t f; + + do + { + if (component_alpha) + f = imp->combine_float_ca[op]; + else + f = imp->combine_float[op]; + + imp = imp->fallback; + } + while (!f); + + return f; +} + +int +main () +{ + pixman_implementation_t *impl; + argb_t *src_bytes = malloc (WIDTH * sizeof (argb_t)); + argb_t *mask_bytes = malloc (WIDTH * sizeof (argb_t)); + argb_t *dest_bytes = malloc (WIDTH * sizeof (argb_t)); + int i; + + enable_divbyzero_exceptions(); + + impl = _pixman_internal_only_get_implementation(); + + prng_srand (0); + + for (i = 0; i < ARRAY_LENGTH (op_list); ++i) + { + pixman_op_t op = op_list[i]; + pixman_combine_float_func_t combiner; + int ca; + + for (ca = 0; ca < 2; ++ca) + { + combiner = lookup_combiner (impl, op, ca); + + random_floats (src_bytes, WIDTH); + random_floats (mask_bytes, WIDTH); + random_floats (dest_bytes, WIDTH); + + combiner (impl, op, + (float *)dest_bytes, + (float *)mask_bytes, + (float *)src_bytes, + WIDTH); + } + } + + return 0; +} diff --git a/src/pixman/test/composite-traps-test.c b/src/pixman/test/composite-traps-test.c new file mode 100644 index 0000000..86a0355 --- /dev/null +++ b/src/pixman/test/composite-traps-test.c @@ -0,0 +1,252 @@ +/* Based loosely on scaling-test */ + +#include <stdlib.h> +#include <stdio.h> +#include "utils.h" + +#define MAX_SRC_WIDTH 48 +#define MAX_SRC_HEIGHT 48 +#define MAX_DST_WIDTH 48 +#define MAX_DST_HEIGHT 48 +#define MAX_STRIDE 4 + +static pixman_format_code_t formats[] = +{ + PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_r5g6b5, PIXMAN_a1, PIXMAN_a4 +}; + +static pixman_format_code_t mask_formats[] = +{ + PIXMAN_a1, PIXMAN_a4, PIXMAN_a8, +}; + +static pixman_op_t operators[] = +{ + PIXMAN_OP_OVER, PIXMAN_OP_ADD, PIXMAN_OP_SRC, PIXMAN_OP_IN +}; + +#define RANDOM_ELT(array) \ + ((array)[prng_rand_n(ARRAY_LENGTH((array)))]) + +static void +destroy_bits (pixman_image_t *image, void *data) +{ + fence_free (data); +} + +static pixman_fixed_t +random_fixed (int n) +{ + return prng_rand_n (n << 16); +} + +/* + * Composite operation with pseudorandom images + */ +uint32_t +test_composite (int testnum, + int verbose) +{ + int i; + pixman_image_t * src_img; + pixman_image_t * dst_img; + pixman_region16_t clip; + int dst_width, dst_height; + int dst_stride; + int dst_x, dst_y; + int dst_bpp; + pixman_op_t op; + uint32_t * dst_bits; + uint32_t crc32; + pixman_format_code_t mask_format, dst_format; + pixman_trapezoid_t *traps; + int src_x, src_y; + int n_traps; + + static pixman_color_t colors[] = + { + { 0xffff, 0xffff, 0xffff, 0xffff }, + { 0x0000, 0x0000, 0x0000, 0x0000 }, + { 0xabcd, 0xabcd, 0x0000, 0xabcd }, + { 0x0000, 0x0000, 0x0000, 0xffff }, + { 0x0101, 0x0101, 0x0101, 0x0101 }, + { 0x7777, 0x6666, 0x5555, 0x9999 }, + }; + + FLOAT_REGS_CORRUPTION_DETECTOR_START (); + + prng_srand (testnum); + + op = RANDOM_ELT (operators); + mask_format = RANDOM_ELT (mask_formats); + + /* Create source image */ + + if (prng_rand_n (4) == 0) + { + src_img = pixman_image_create_solid_fill ( + &(colors[prng_rand_n (ARRAY_LENGTH (colors))])); + + src_x = 10; + src_y = 234; + } + else + { + pixman_format_code_t src_format = RANDOM_ELT(formats); + int src_bpp = (PIXMAN_FORMAT_BPP (src_format) + 7) / 8; + int src_width = prng_rand_n (MAX_SRC_WIDTH) + 1; + int src_height = prng_rand_n (MAX_SRC_HEIGHT) + 1; + int src_stride = src_width * src_bpp + prng_rand_n (MAX_STRIDE) * src_bpp; + uint32_t *bits, *orig; + + src_x = -(src_width / 4) + prng_rand_n (src_width * 3 / 2); + src_y = -(src_height / 4) + prng_rand_n (src_height * 3 / 2); + + src_stride = (src_stride + 3) & ~3; + + orig = bits = (uint32_t *)make_random_bytes (src_stride * src_height); + + if (prng_rand_n (2) == 0) + { + bits += (src_stride / 4) * (src_height - 1); + src_stride = - src_stride; + } + + src_img = pixman_image_create_bits ( + src_format, src_width, src_height, bits, src_stride); + + pixman_image_set_destroy_function (src_img, destroy_bits, orig); + + if (prng_rand_n (8) == 0) + { + pixman_box16_t clip_boxes[2]; + int n = prng_rand_n (2) + 1; + + for (i = 0; i < n; i++) + { + clip_boxes[i].x1 = prng_rand_n (src_width); + clip_boxes[i].y1 = prng_rand_n (src_height); + clip_boxes[i].x2 = + clip_boxes[i].x1 + prng_rand_n (src_width - clip_boxes[i].x1); + clip_boxes[i].y2 = + clip_boxes[i].y1 + prng_rand_n (src_height - clip_boxes[i].y1); + + if (verbose) + { + printf ("source clip box: [%d,%d-%d,%d]\n", + clip_boxes[i].x1, clip_boxes[i].y1, + clip_boxes[i].x2, clip_boxes[i].y2); + } + } + + pixman_region_init_rects (&clip, clip_boxes, n); + pixman_image_set_clip_region (src_img, &clip); + pixman_image_set_source_clipping (src_img, 1); + pixman_region_fini (&clip); + } + + image_endian_swap (src_img); + } + + /* Create destination image */ + { + dst_format = RANDOM_ELT(formats); + dst_bpp = (PIXMAN_FORMAT_BPP (dst_format) + 7) / 8; + dst_width = prng_rand_n (MAX_DST_WIDTH) + 1; + dst_height = prng_rand_n (MAX_DST_HEIGHT) + 1; + dst_stride = dst_width * dst_bpp + prng_rand_n (MAX_STRIDE) * dst_bpp; + dst_stride = (dst_stride + 3) & ~3; + + dst_bits = (uint32_t *)make_random_bytes (dst_stride * dst_height); + + if (prng_rand_n (2) == 0) + { + dst_bits += (dst_stride / 4) * (dst_height - 1); + dst_stride = - dst_stride; + } + + dst_x = -(dst_width / 4) + prng_rand_n (dst_width * 3 / 2); + dst_y = -(dst_height / 4) + prng_rand_n (dst_height * 3 / 2); + + dst_img = pixman_image_create_bits ( + dst_format, dst_width, dst_height, dst_bits, dst_stride); + + image_endian_swap (dst_img); + } + + /* Create traps */ + { + int i; + + n_traps = prng_rand_n (25); + traps = fence_malloc (n_traps * sizeof (pixman_trapezoid_t)); + + for (i = 0; i < n_traps; ++i) + { + pixman_trapezoid_t *t = &(traps[i]); + + t->top = random_fixed (MAX_DST_HEIGHT) - MAX_DST_HEIGHT / 2; + t->bottom = t->top + random_fixed (MAX_DST_HEIGHT); + t->left.p1.x = random_fixed (MAX_DST_WIDTH) - MAX_DST_WIDTH / 2; + t->left.p1.y = t->top - random_fixed (50); + t->left.p2.x = random_fixed (MAX_DST_WIDTH) - MAX_DST_WIDTH / 2; + t->left.p2.y = t->bottom + random_fixed (50); + t->right.p1.x = t->left.p1.x + random_fixed (MAX_DST_WIDTH); + t->right.p1.y = t->top - random_fixed (50); + t->right.p2.x = t->left.p2.x + random_fixed (MAX_DST_WIDTH); + t->right.p2.y = t->bottom - random_fixed (50); + } + } + + if (prng_rand_n (8) == 0) + { + pixman_box16_t clip_boxes[2]; + int n = prng_rand_n (2) + 1; + for (i = 0; i < n; i++) + { + clip_boxes[i].x1 = prng_rand_n (dst_width); + clip_boxes[i].y1 = prng_rand_n (dst_height); + clip_boxes[i].x2 = + clip_boxes[i].x1 + prng_rand_n (dst_width - clip_boxes[i].x1); + clip_boxes[i].y2 = + clip_boxes[i].y1 + prng_rand_n (dst_height - clip_boxes[i].y1); + + if (verbose) + { + printf ("destination clip box: [%d,%d-%d,%d]\n", + clip_boxes[i].x1, clip_boxes[i].y1, + clip_boxes[i].x2, clip_boxes[i].y2); + } + } + pixman_region_init_rects (&clip, clip_boxes, n); + pixman_image_set_clip_region (dst_img, &clip); + pixman_region_fini (&clip); + } + + pixman_composite_trapezoids (op, src_img, dst_img, mask_format, + src_x, src_y, dst_x, dst_y, n_traps, traps); + + crc32 = compute_crc32_for_image (0, dst_img); + + if (verbose) + print_image (dst_img); + + if (dst_stride < 0) + dst_bits += (dst_stride / 4) * (dst_height - 1); + + fence_free (dst_bits); + + pixman_image_unref (src_img); + pixman_image_unref (dst_img); + fence_free (traps); + + FLOAT_REGS_CORRUPTION_DETECTOR_FINISH (); + return crc32; +} + +int +main (int argc, const char *argv[]) +{ + return fuzzer_test_main("composite traps", 40000, 0xAF41D210, + test_composite, argc, argv); +} diff --git a/src/pixman/test/composite.c b/src/pixman/test/composite.c new file mode 100644 index 0000000..9e51a8f --- /dev/null +++ b/src/pixman/test/composite.c @@ -0,0 +1,536 @@ +/* + * Copyright © 2005 Eric Anholt + * Copyright © 2009 Chris Wilson + * Copyright © 2010 Soeren Sandmann + * Copyright © 2010 Red Hat, Inc. + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of Eric Anholt not be used in + * advertising or publicity pertaining to distribution of the software without + * specific, written prior permission. Eric Anholt makes no + * representations about the suitability of this software for any purpose. It + * is provided "as is" without express or implied warranty. + * + * ERIC ANHOLT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL ERIC ANHOLT BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * PERFORMANCE OF THIS SOFTWARE. + */ +#include <stdio.h> +#include <stdlib.h> /* abort() */ +#include <math.h> +#include <time.h> +#include "utils.h" + +typedef struct image_t image_t; + +static const color_t colors[] = +{ + { 1.0, 1.0, 1.0, 1.0 }, + { 1.0, 1.0, 1.0, 0.0 }, + { 0.0, 0.0, 0.0, 1.0 }, + { 0.0, 0.0, 0.0, 0.0 }, + { 1.0, 0.0, 0.0, 1.0 }, + { 0.0, 1.0, 0.0, 1.0 }, + { 0.0, 0.0, 1.0, 1.0 }, + { 0.5, 0.0, 0.0, 0.5 }, +}; + +static uint16_t +_color_double_to_short (double d) +{ + uint32_t i; + + i = (uint32_t) (d * 65536); + i -= (i >> 16); + + return i; +} + +static void +compute_pixman_color (const color_t *color, + pixman_color_t *out) +{ + out->red = _color_double_to_short (color->r); + out->green = _color_double_to_short (color->g); + out->blue = _color_double_to_short (color->b); + out->alpha = _color_double_to_short (color->a); +} + +#define REPEAT 0x01000000 +#define FLAGS 0xff000000 + +static const int sizes[] = +{ + 0, + 1, + 1 | REPEAT, + 10 +}; + +static const pixman_format_code_t formats[] = +{ + /* 32 bpp formats */ + PIXMAN_a8r8g8b8, + PIXMAN_x8r8g8b8, + PIXMAN_a8b8g8r8, + PIXMAN_x8b8g8r8, + PIXMAN_b8g8r8a8, + PIXMAN_b8g8r8x8, + PIXMAN_r8g8b8a8, + PIXMAN_r8g8b8x8, + PIXMAN_x2r10g10b10, + PIXMAN_x2b10g10r10, + PIXMAN_a2r10g10b10, + PIXMAN_a2b10g10r10, + + /* sRGB formats */ + PIXMAN_a8r8g8b8_sRGB, + + /* 24 bpp formats */ + PIXMAN_r8g8b8, + PIXMAN_b8g8r8, + PIXMAN_r5g6b5, + PIXMAN_b5g6r5, + + /* 16 bpp formats */ + PIXMAN_x1r5g5b5, + PIXMAN_x1b5g5r5, + PIXMAN_a1r5g5b5, + PIXMAN_a1b5g5r5, + PIXMAN_a4b4g4r4, + PIXMAN_x4b4g4r4, + PIXMAN_a4r4g4b4, + PIXMAN_x4r4g4b4, + + /* 8 bpp formats */ + PIXMAN_a8, + PIXMAN_r3g3b2, + PIXMAN_b2g3r3, + PIXMAN_a2r2g2b2, + PIXMAN_a2b2g2r2, + PIXMAN_x4a4, + + /* 4 bpp formats */ + PIXMAN_a4, + PIXMAN_r1g2b1, + PIXMAN_b1g2r1, + PIXMAN_a1r1g1b1, + PIXMAN_a1b1g1r1, + + /* 1 bpp formats */ + PIXMAN_a1, +}; + +struct image_t +{ + pixman_image_t *image; + pixman_format_code_t format; + const color_t *color; + pixman_repeat_t repeat; + int size; +}; + +static const pixman_op_t operators[] = +{ + PIXMAN_OP_CLEAR, + PIXMAN_OP_SRC, + PIXMAN_OP_DST, + PIXMAN_OP_OVER, + PIXMAN_OP_OVER_REVERSE, + PIXMAN_OP_IN, + PIXMAN_OP_IN_REVERSE, + PIXMAN_OP_OUT, + PIXMAN_OP_OUT_REVERSE, + PIXMAN_OP_ATOP, + PIXMAN_OP_ATOP_REVERSE, + PIXMAN_OP_XOR, + PIXMAN_OP_ADD, + PIXMAN_OP_SATURATE, + + PIXMAN_OP_DISJOINT_CLEAR, + PIXMAN_OP_DISJOINT_SRC, + PIXMAN_OP_DISJOINT_DST, + PIXMAN_OP_DISJOINT_OVER, + PIXMAN_OP_DISJOINT_OVER_REVERSE, + PIXMAN_OP_DISJOINT_IN, + PIXMAN_OP_DISJOINT_IN_REVERSE, + PIXMAN_OP_DISJOINT_OUT, + PIXMAN_OP_DISJOINT_OUT_REVERSE, + PIXMAN_OP_DISJOINT_ATOP, + PIXMAN_OP_DISJOINT_ATOP_REVERSE, + PIXMAN_OP_DISJOINT_XOR, + + PIXMAN_OP_CONJOINT_CLEAR, + PIXMAN_OP_CONJOINT_SRC, + PIXMAN_OP_CONJOINT_DST, + PIXMAN_OP_CONJOINT_OVER, + PIXMAN_OP_CONJOINT_OVER_REVERSE, + PIXMAN_OP_CONJOINT_IN, + PIXMAN_OP_CONJOINT_IN_REVERSE, + PIXMAN_OP_CONJOINT_OUT, + PIXMAN_OP_CONJOINT_OUT_REVERSE, + PIXMAN_OP_CONJOINT_ATOP, + PIXMAN_OP_CONJOINT_ATOP_REVERSE, + PIXMAN_OP_CONJOINT_XOR, +}; + +static uint32_t +get_value (pixman_image_t *image) +{ + uint32_t value = *(uint32_t *)pixman_image_get_data (image); + +#ifdef WORDS_BIGENDIAN + { + pixman_format_code_t format = pixman_image_get_format (image); + value >>= 8 * sizeof(value) - PIXMAN_FORMAT_BPP (format); + } +#endif + + return value; +} + +static char * +describe_image (image_t *info, char *buf) +{ + if (info->size) + { + sprintf (buf, "%s, %dx%d%s", + format_name (info->format), + info->size, info->size, + info->repeat ? " R" :""); + } + else + { + sprintf (buf, "solid"); + } + + return buf; +} + +static char * +describe_color (const color_t *color, char *buf) +{ + sprintf (buf, "%.3f %.3f %.3f %.3f", + color->r, color->g, color->b, color->a); + + return buf; +} + +static pixman_bool_t +composite_test (image_t *dst, + pixman_op_t op, + image_t *src, + image_t *mask, + pixman_bool_t component_alpha, + int testno) +{ + color_t expected, tdst, tsrc, tmsk; + pixel_checker_t checker; + + if (mask) + { + pixman_image_set_component_alpha (mask->image, component_alpha); + + pixman_image_composite (op, src->image, mask->image, dst->image, + 0, 0, 0, 0, 0, 0, dst->size, dst->size); + } + else + { + pixman_image_composite (op, src->image, NULL, dst->image, + 0, 0, + 0, 0, + 0, 0, + dst->size, dst->size); + } + + tdst = *dst->color; + tsrc = *src->color; + + if (mask) + { + tmsk = *mask->color; + } + + /* It turns out that by construction all source, mask etc. colors are + * linear because they are made from fills, and fills are always in linear + * color space. However, if they have been converted to bitmaps, we need + * to simulate the sRGB approximation to pass the test cases. + */ + if (src->size) + { + if (PIXMAN_FORMAT_TYPE (src->format) == PIXMAN_TYPE_ARGB_SRGB) + { + tsrc.r = convert_linear_to_srgb (tsrc.r); + tsrc.g = convert_linear_to_srgb (tsrc.g); + tsrc.b = convert_linear_to_srgb (tsrc.b); + round_color (src->format, &tsrc); + tsrc.r = convert_srgb_to_linear (tsrc.r); + tsrc.g = convert_srgb_to_linear (tsrc.g); + tsrc.b = convert_srgb_to_linear (tsrc.b); + } + else + { + round_color (src->format, &tsrc); + } + } + + if (mask && mask->size) + { + if (PIXMAN_FORMAT_TYPE (mask->format) == PIXMAN_TYPE_ARGB_SRGB) + { + tmsk.r = convert_linear_to_srgb (tmsk.r); + tmsk.g = convert_linear_to_srgb (tmsk.g); + tmsk.b = convert_linear_to_srgb (tmsk.b); + round_color (mask->format, &tmsk); + tmsk.r = convert_srgb_to_linear (tmsk.r); + tmsk.g = convert_srgb_to_linear (tmsk.g); + tmsk.b = convert_srgb_to_linear (tmsk.b); + } + else + { + round_color (mask->format, &tmsk); + } + } + + if (mask) + { + if (component_alpha && PIXMAN_FORMAT_R (mask->format) == 0) + { + /* Ax component-alpha masks expand alpha into + * all color channels. + */ + tmsk.r = tmsk.g = tmsk.b = tmsk.a; + } + } + + if (PIXMAN_FORMAT_TYPE (dst->format) == PIXMAN_TYPE_ARGB_SRGB) + { + tdst.r = convert_linear_to_srgb (tdst.r); + tdst.g = convert_linear_to_srgb (tdst.g); + tdst.b = convert_linear_to_srgb (tdst.b); + round_color (dst->format, &tdst); + tdst.r = convert_srgb_to_linear (tdst.r); + tdst.g = convert_srgb_to_linear (tdst.g); + tdst.b = convert_srgb_to_linear (tdst.b); + } + else + { + round_color (dst->format, &tdst); + } + + do_composite (op, + &tsrc, + mask? &tmsk : NULL, + &tdst, + &expected, + component_alpha); + + pixel_checker_init (&checker, dst->format); + + if (!pixel_checker_check (&checker, get_value (dst->image), &expected)) + { + char buf[40], buf2[40]; + int a, r, g, b; + uint32_t pixel; + + printf ("---- Test %d failed ----\n", testno); + printf ("Operator: %s %s\n", + operator_name (op), component_alpha ? "CA" : ""); + + printf ("Source: %s\n", describe_image (src, buf)); + if (mask != NULL) + printf ("Mask: %s\n", describe_image (mask, buf)); + + printf ("Destination: %s\n\n", describe_image (dst, buf)); + printf (" R G B A Rounded\n"); + printf ("Source color: %s %s\n", + describe_color (src->color, buf), + describe_color (&tsrc, buf2)); + if (mask) + { + printf ("Mask color: %s %s\n", + describe_color (mask->color, buf), + describe_color (&tmsk, buf2)); + } + printf ("Dest. color: %s %s\n", + describe_color (dst->color, buf), + describe_color (&tdst, buf2)); + + pixel = get_value (dst->image); + + printf ("Expected: %s\n", describe_color (&expected, buf)); + + pixel_checker_split_pixel (&checker, pixel, &a, &r, &g, &b); + + printf ("Got: %5d %5d %5d %5d [pixel: 0x%08x]\n", r, g, b, a, pixel); + pixel_checker_get_min (&checker, &expected, &a, &r, &g, &b); + printf ("Min accepted: %5d %5d %5d %5d\n", r, g, b, a); + pixel_checker_get_max (&checker, &expected, &a, &r, &g, &b); + printf ("Max accepted: %5d %5d %5d %5d\n", r, g, b, a); + + return FALSE; + } + return TRUE; +} + +static void +image_init (image_t *info, + int color, + int format, + int size) +{ + pixman_color_t fill; + + info->color = &colors[color]; + compute_pixman_color (info->color, &fill); + + info->format = formats[format]; + info->size = sizes[size] & ~FLAGS; + info->repeat = PIXMAN_REPEAT_NONE; + + if (info->size) + { + pixman_image_t *solid; + + info->image = pixman_image_create_bits (info->format, + info->size, info->size, + NULL, 0); + + solid = pixman_image_create_solid_fill (&fill); + pixman_image_composite32 (PIXMAN_OP_SRC, solid, NULL, info->image, + 0, 0, 0, 0, 0, 0, info->size, info->size); + pixman_image_unref (solid); + + if (sizes[size] & REPEAT) + { + pixman_image_set_repeat (info->image, PIXMAN_REPEAT_NORMAL); + info->repeat = PIXMAN_REPEAT_NORMAL; + } + } + else + { + info->image = pixman_image_create_solid_fill (&fill); + } +} + +static void +image_fini (image_t *info) +{ + pixman_image_unref (info->image); +} + +static int +random_size (void) +{ + return prng_rand_n (ARRAY_LENGTH (sizes)); +} + +static int +random_color (void) +{ + return prng_rand_n (ARRAY_LENGTH (colors)); +} + +static int +random_format (void) +{ + return prng_rand_n (ARRAY_LENGTH (formats)); +} + +static pixman_bool_t +run_test (uint32_t seed) +{ + image_t src, mask, dst; + pixman_op_t op; + int ca; + int ok; + + prng_srand (seed); + + image_init (&dst, random_color(), random_format(), 1); + image_init (&src, random_color(), random_format(), random_size()); + image_init (&mask, random_color(), random_format(), random_size()); + + op = operators [prng_rand_n (ARRAY_LENGTH (operators))]; + + ca = prng_rand_n (3); + + switch (ca) + { + case 0: + ok = composite_test (&dst, op, &src, NULL, FALSE, seed); + break; + case 1: + ok = composite_test (&dst, op, &src, &mask, FALSE, seed); + break; + case 2: + ok = composite_test (&dst, op, &src, &mask, + mask.size? TRUE : FALSE, seed); + break; + default: + ok = FALSE; + break; + } + + image_fini (&src); + image_fini (&mask); + image_fini (&dst); + + return ok; +} + +int +main (int argc, char **argv) +{ +#define N_TESTS (8 * 1024 * 1024) + int result = 0; + uint32_t seed; + int32_t i; + + if (argc > 1) + { + char *end; + + i = strtol (argv[1], &end, 0); + + if (end != argv[1]) + { + if (!run_test (i)) + return 1; + else + return 0; + } + else + { + printf ("Usage:\n\n %s <number>\n\n", argv[0]); + return -1; + } + } + + if (getenv ("PIXMAN_RANDOMIZE_TESTS")) + seed = get_random_seed(); + else + seed = 1; + +#ifdef USE_OPENMP +# pragma omp parallel for default(none) shared(result, argv, seed) +#endif + for (i = 0; i <= N_TESTS; ++i) + { + if (!result && !run_test (i + seed)) + { + printf ("Test 0x%08X failed.\n", seed + i); + + result = seed + i; + } + } + + return result; +} diff --git a/src/pixman/test/fetch-test.c b/src/pixman/test/fetch-test.c new file mode 100644 index 0000000..04e8cc5 --- /dev/null +++ b/src/pixman/test/fetch-test.c @@ -0,0 +1,205 @@ +#include <assert.h> +#include <stdlib.h> +#include <stdio.h> +#include "utils.h" + +#define SIZE 1024 + +static pixman_indexed_t mono_palette = +{ + 0, { 0x00000000, 0x00ffffff }, +}; + + +typedef struct { + pixman_format_code_t format; + int width, height; + int stride; + uint32_t src[SIZE]; + uint32_t dst[SIZE]; + pixman_indexed_t *indexed; +} testcase_t; + +static testcase_t testcases[] = +{ + { + PIXMAN_a8r8g8b8, + 2, 2, + 8, + { 0x00112233, 0x44556677, + 0x8899aabb, 0xccddeeff }, + { 0x00112233, 0x44556677, + 0x8899aabb, 0xccddeeff }, + NULL, + }, + { + PIXMAN_r8g8b8a8, + 2, 2, + 8, + { 0x11223300, 0x55667744, + 0x99aabb88, 0xddeeffcc }, + { 0x00112233, 0x44556677, + 0x8899aabb, 0xccddeeff }, + NULL, + }, + { + PIXMAN_g1, + 8, 2, + 4, +#ifdef WORDS_BIGENDIAN + { + 0xaa000000, + 0x55000000 + }, +#else + { + 0x00000055, + 0x000000aa + }, +#endif + { + 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000, + 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff + }, + &mono_palette, + }, +#if 0 + { + PIXMAN_g8, + 4, 2, + 4, + { 0x01234567, + 0x89abcdef }, + { 0x00010101, 0x00232323, 0x00454545, 0x00676767, + 0x00898989, 0x00ababab, 0x00cdcdcd, 0x00efefef, }, + }, +#endif + /* FIXME: make this work on big endian */ + { + PIXMAN_yv12, + 8, 2, + 8, +#ifdef WORDS_BIGENDIAN + { + 0x00ff00ff, 0x00ff00ff, + 0xff00ff00, 0xff00ff00, + 0x80ff8000, + 0x800080ff + }, +#else + { + 0xff00ff00, 0xff00ff00, + 0x00ff00ff, 0x00ff00ff, + 0x0080ff80, + 0xff800080 + }, +#endif + { + 0xff000000, 0xffffffff, 0xffb80000, 0xffffe113, + 0xff000000, 0xffffffff, 0xff0023ee, 0xff4affff, + 0xffffffff, 0xff000000, 0xffffe113, 0xffb80000, + 0xffffffff, 0xff000000, 0xff4affff, 0xff0023ee, + }, + }, +}; + +int n_test_cases = ARRAY_LENGTH (testcases); + + +static uint32_t +reader (const void *src, int size) +{ + switch (size) + { + case 1: + return *(uint8_t *)src; + case 2: + return *(uint16_t *)src; + case 4: + return *(uint32_t *)src; + default: + assert(0); + return 0; /* silence MSVC */ + } +} + + +static void +writer (void *src, uint32_t value, int size) +{ + switch (size) + { + case 1: + *(uint8_t *)src = value; + break; + case 2: + *(uint16_t *)src = value; + break; + case 4: + *(uint32_t *)src = value; + break; + default: + assert(0); + } +} + + +int +main (int argc, char **argv) +{ + uint32_t dst[SIZE]; + pixman_image_t *src_img; + pixman_image_t *dst_img; + int i, j, x, y; + int ret = 0; + + for (i = 0; i < n_test_cases; ++i) + { + for (j = 0; j < 2; ++j) + { + src_img = pixman_image_create_bits (testcases[i].format, + testcases[i].width, + testcases[i].height, + testcases[i].src, + testcases[i].stride); + pixman_image_set_indexed(src_img, testcases[i].indexed); + + dst_img = pixman_image_create_bits (PIXMAN_a8r8g8b8, + testcases[i].width, + testcases[i].height, + dst, + testcases[i].width*4); + + if (j) + { + pixman_image_set_accessors (src_img, reader, writer); + pixman_image_set_accessors (dst_img, reader, writer); + } + + pixman_image_composite (PIXMAN_OP_SRC, src_img, NULL, dst_img, + 0, 0, 0, 0, 0, 0, testcases[i].width, testcases[i].height); + + pixman_image_unref (src_img); + pixman_image_unref (dst_img); + + for (y = 0; y < testcases[i].height; ++y) + { + for (x = 0; x < testcases[i].width; ++x) + { + int offset = y * testcases[i].width + x; + + if (dst[offset] != testcases[i].dst[offset]) + { + printf ("test %i%c: pixel mismatch at (x=%d,y=%d): %08x expected, %08x obtained\n", + i + 1, 'a' + j, + x, y, + testcases[i].dst[offset], dst[offset]); + ret = 1; + } + } + } + } + } + + return ret; +} diff --git a/src/pixman/test/fuzzer-find-diff.pl b/src/pixman/test/fuzzer-find-diff.pl new file mode 100755 index 0000000..e1d67fb --- /dev/null +++ b/src/pixman/test/fuzzer-find-diff.pl @@ -0,0 +1,75 @@ +#!/usr/bin/env perl + +$usage = "Usage: + fuzzer-find-diff.pl reference_binary new_binary [number_of_tests_to_run] + +The first two input arguments are the commands to run the test programs +based on fuzzer_test_main() function from 'util.c' (preferably they should +be statically compiled, this can be achieved via '--disable-shared' pixman +configure option). The third optional argument is the number of test rounds +to run (if not specified, then testing runs infinitely or until some problem +is detected). + +Usage examples: + fuzzer-find-diff.pl ./blitters-test-with-sse-disabled ./blitters-test 9000000 + fuzzer-find-diff.pl ./blitters-test \"ssh ppc64_host /path/to/blitters-test\" +"; + +$#ARGV >= 1 or die $usage; + +$batch_size = 10000; + +if ($#ARGV >= 2) { + $number_of_tests = int($ARGV[2]); +} else { + $number_of_tests = -1 +} + +sub test_range { + my $min = shift; + my $max = shift; + + # check that [$min, $max] range is "bad", otherwise return + if (`$ARGV[0] $min $max 2>/dev/null` eq `$ARGV[1] $min $max 2>/dev/null`) { + return; + } + + # check that $min itself is "good", otherwise return + if (`$ARGV[0] $min 2>/dev/null` ne `$ARGV[1] $min 2>/dev/null`) { + return $min; + } + + # start bisecting + while ($max != $min + 1) { + my $avg = int(($min + $max) / 2); + my $res1 = `$ARGV[0] $min $avg 2>/dev/null`; + my $res2 = `$ARGV[1] $min $avg 2>/dev/null`; + if ($res1 ne $res2) { + $max = $avg; + } else { + $min = $avg; + } + } + return $max; +} + +$base = 1; +while ($number_of_tests <= 0 || $base <= $number_of_tests) { + printf("testing %-12d\r", $base + $batch_size - 1); + my $res = test_range($base, $base + $batch_size - 1); + if ($res) { + printf("Failure: results are different for test %d:\n", $res); + + printf("\n-- ref --\n"); + print `$ARGV[0] $res`; + printf("-- new --\n"); + print `$ARGV[1] $res`; + + printf("The problematic conditions can be reproduced by running:\n"); + printf("$ARGV[1] %d\n", $res); + + exit(1); + } + $base += $batch_size; +} +printf("Success: %d tests finished\n", $base - 1); diff --git a/src/pixman/test/glyph-test.c b/src/pixman/test/glyph-test.c new file mode 100644 index 0000000..1811add --- /dev/null +++ b/src/pixman/test/glyph-test.c @@ -0,0 +1,332 @@ +#include <stdlib.h> +#include "utils.h" + +static const pixman_format_code_t glyph_formats[] = +{ + PIXMAN_a8r8g8b8, + PIXMAN_a8, + PIXMAN_a4, + PIXMAN_a1, + PIXMAN_x8r8g8b8, + PIXMAN_r3g3b2, + PIXMAN_null, +}; + +static const pixman_format_code_t formats[] = +{ + PIXMAN_a8r8g8b8, + PIXMAN_a8b8g8r8, + PIXMAN_x8r8g8b8, + PIXMAN_x8b8g8r8, + PIXMAN_r5g6b5, + PIXMAN_b5g6r5, + PIXMAN_a8, + PIXMAN_a1, + PIXMAN_r3g3b2, + PIXMAN_b8g8r8a8, + PIXMAN_b8g8r8x8, + PIXMAN_r8g8b8a8, + PIXMAN_r8g8b8x8, + PIXMAN_x14r6g6b6, + PIXMAN_r8g8b8, + PIXMAN_b8g8r8, +#if 0 + /* These use floating point */ + PIXMAN_x2r10g10b10, + PIXMAN_a2r10g10b10, + PIXMAN_x2b10g10r10, + PIXMAN_a2b10g10r10, +#endif + PIXMAN_a1r5g5b5, + PIXMAN_x1r5g5b5, + PIXMAN_a1b5g5r5, + PIXMAN_x1b5g5r5, + PIXMAN_a4r4g4b4, + PIXMAN_x4r4g4b4, + PIXMAN_a4b4g4r4, + PIXMAN_x4b4g4r4, + PIXMAN_r3g3b2, + PIXMAN_b2g3r3, + PIXMAN_a2r2g2b2, + PIXMAN_a2b2g2r2, + PIXMAN_x4a4, + PIXMAN_a4, + PIXMAN_r1g2b1, + PIXMAN_b1g2r1, + PIXMAN_a1r1g1b1, + PIXMAN_a1b1g1r1, + PIXMAN_null, +}; + +static const pixman_op_t operators[] = +{ + PIXMAN_OP_SRC, + PIXMAN_OP_OVER, + PIXMAN_OP_ADD, + PIXMAN_OP_CLEAR, + PIXMAN_OP_SRC, + PIXMAN_OP_DST, + PIXMAN_OP_OVER, + PIXMAN_OP_OVER_REVERSE, + PIXMAN_OP_IN, + PIXMAN_OP_IN_REVERSE, + PIXMAN_OP_OUT, + PIXMAN_OP_OUT_REVERSE, + PIXMAN_OP_ATOP, + PIXMAN_OP_ATOP_REVERSE, + PIXMAN_OP_XOR, + PIXMAN_OP_ADD +}; + +enum +{ + ALLOW_CLIPPED = (1 << 0), + ALLOW_ALPHA_MAP = (1 << 1), + ALLOW_SOURCE_CLIPPING = (1 << 2), + ALLOW_REPEAT = (1 << 3), + ALLOW_SOLID = (1 << 4), + ALLOW_FENCED_MEMORY = (1 << 5), +}; + +static void +destroy_fenced (pixman_image_t *image, void *data) +{ + fence_free (data); +} + +static void +destroy_malloced (pixman_image_t *image, void *data) +{ + free (data); +} + +static pixman_format_code_t +random_format (const pixman_format_code_t *formats) +{ + int i; + i = 0; + while (formats[i] != PIXMAN_null) + ++i; + return formats[prng_rand_n (i)]; +} + +static pixman_image_t * +create_image (int max_size, const pixman_format_code_t *formats, uint32_t flags) +{ + int width, height; + pixman_image_t *image; + pixman_format_code_t format; + uint32_t *data; + int bpp; + int stride; + int i; + pixman_image_destroy_func_t destroy; + + if ((flags & ALLOW_SOLID) && prng_rand_n (4) == 0) + { + pixman_color_t color; + + color.alpha = prng_rand(); + color.red = prng_rand(); + color.green = prng_rand(); + color.blue = prng_rand(); + + return pixman_image_create_solid_fill (&color); + } + + width = prng_rand_n (max_size) + 1; + height = prng_rand_n (max_size) + 1; + format = random_format (formats); + + bpp = PIXMAN_FORMAT_BPP (format); + stride = (width * bpp + 7) / 8 + prng_rand_n (17); + stride = (stride + 3) & ~3; + + if (prng_rand_n (64) == 0) + { + if (!(data = (uint32_t *)make_random_bytes (stride * height))) + { + fprintf (stderr, "Out of memory\n"); + abort (); + } + destroy = destroy_fenced; + } + else + { + data = malloc (stride * height); + prng_randmemset (data, height * stride, 0); + destroy = destroy_malloced; + } + + image = pixman_image_create_bits (format, width, height, data, stride); + pixman_image_set_destroy_function (image, destroy, data); + + if ((flags & ALLOW_CLIPPED) && prng_rand_n (8) == 0) + { + pixman_box16_t clip_boxes[8]; + pixman_region16_t clip; + int n = prng_rand_n (8) + 1; + + for (i = 0; i < n; i++) + { + clip_boxes[i].x1 = prng_rand_n (width); + clip_boxes[i].y1 = prng_rand_n (height); + clip_boxes[i].x2 = + clip_boxes[i].x1 + prng_rand_n (width - clip_boxes[i].x1); + clip_boxes[i].y2 = + clip_boxes[i].y1 + prng_rand_n (height - clip_boxes[i].y1); + } + + pixman_region_init_rects (&clip, clip_boxes, n); + pixman_image_set_clip_region (image, &clip); + pixman_region_fini (&clip); + } + + if ((flags & ALLOW_SOURCE_CLIPPING) && prng_rand_n (4) == 0) + { + pixman_image_set_source_clipping (image, TRUE); + pixman_image_set_has_client_clip (image, TRUE); + } + + if ((flags & ALLOW_ALPHA_MAP) && prng_rand_n (16) == 0) + { + pixman_image_t *alpha_map; + int alpha_x, alpha_y; + + alpha_x = prng_rand_n (width); + alpha_y = prng_rand_n (height); + alpha_map = + create_image (max_size, formats, (flags & ~(ALLOW_ALPHA_MAP | ALLOW_SOLID))); + pixman_image_set_alpha_map (image, alpha_map, alpha_x, alpha_y); + pixman_image_unref (alpha_map); + } + + if ((flags & ALLOW_REPEAT) && prng_rand_n (2) == 0) + pixman_image_set_repeat (image, prng_rand_n (4)); + + image_endian_swap (image); + + return image; +} + +#define KEY1(p) ((void *)(((uintptr_t)p) ^ (0xa7e23dfaUL))) +#define KEY2(p) ((void *)(((uintptr_t)p) ^ (0xabcd9876UL))) + +#define MAX_GLYPHS 32 + +uint32_t +test_glyphs (int testnum, int verbose) +{ + pixman_image_t *glyph_images[MAX_GLYPHS]; + pixman_glyph_t glyphs[4 * MAX_GLYPHS]; + uint32_t crc32 = 0; + pixman_image_t *source, *dest; + int n_glyphs, i; + pixman_glyph_cache_t *cache; + + prng_srand (testnum); + + cache = pixman_glyph_cache_create (); + + source = create_image (300, formats, + ALLOW_CLIPPED | ALLOW_ALPHA_MAP | + ALLOW_SOURCE_CLIPPING | + ALLOW_REPEAT | ALLOW_SOLID); + + dest = create_image (128, formats, + ALLOW_CLIPPED | ALLOW_ALPHA_MAP | + ALLOW_SOURCE_CLIPPING); + + pixman_glyph_cache_freeze (cache); + + n_glyphs = prng_rand_n (MAX_GLYPHS); + for (i = 0; i < n_glyphs; ++i) + glyph_images[i] = create_image (32, glyph_formats, 0); + + for (i = 0; i < 4 * n_glyphs; ++i) + { + int g = prng_rand_n (n_glyphs); + pixman_image_t *glyph_img = glyph_images[g]; + void *key1 = KEY1 (glyph_img); + void *key2 = KEY2 (glyph_img); + const void *glyph; + + if (!(glyph = pixman_glyph_cache_lookup (cache, key1, key2))) + { + glyph = + pixman_glyph_cache_insert (cache, key1, key2, 5, 8, glyph_img); + } + + glyphs[i].glyph = glyph; + glyphs[i].x = prng_rand_n (128); + glyphs[i].y = prng_rand_n (128); + } + + if (prng_rand_n (2) == 0) + { + int src_x = prng_rand_n (300) - 150; + int src_y = prng_rand_n (300) - 150; + int mask_x = prng_rand_n (64) - 32; + int mask_y = prng_rand_n (64) - 32; + int dest_x = prng_rand_n (64) - 32; + int dest_y = prng_rand_n (64) - 32; + int width = prng_rand_n (64); + int height = prng_rand_n (64); + pixman_op_t op = operators[prng_rand_n (ARRAY_LENGTH (operators))]; + pixman_format_code_t format = random_format (glyph_formats); + + pixman_composite_glyphs ( + op, + source, dest, format, + src_x, src_y, + mask_x, mask_y, + dest_x, dest_y, + width, height, + cache, 4 * n_glyphs, glyphs); + } + else + { + pixman_op_t op = operators[prng_rand_n (ARRAY_LENGTH (operators))]; + int src_x = prng_rand_n (300) - 150; + int src_y = prng_rand_n (300) - 150; + int dest_x = prng_rand_n (64) - 32; + int dest_y = prng_rand_n (64) - 32; + + pixman_composite_glyphs_no_mask ( + op, source, dest, + src_x, src_y, + dest_x, dest_y, + cache, 4 * n_glyphs, glyphs); + } + + pixman_glyph_cache_thaw (cache); + + for (i = 0; i < n_glyphs; ++i) + { + pixman_image_t *img = glyph_images[i]; + void *key1, *key2; + + key1 = KEY1 (img); + key2 = KEY2 (img); + + pixman_glyph_cache_remove (cache, key1, key2); + pixman_image_unref (glyph_images[i]); + } + + crc32 = compute_crc32_for_image (0, dest); + + pixman_image_unref (source); + pixman_image_unref (dest); + + pixman_glyph_cache_destroy (cache); + + return crc32; +} + +int +main (int argc, const char *argv[]) +{ + return fuzzer_test_main ("glyph", 30000, + 0xFA478A79, + test_glyphs, argc, argv); +} diff --git a/src/pixman/test/gradient-crash-test.c b/src/pixman/test/gradient-crash-test.c new file mode 100644 index 0000000..962d1cb --- /dev/null +++ b/src/pixman/test/gradient-crash-test.c @@ -0,0 +1,158 @@ +#include <stdio.h> +#include <stdlib.h> +#include "utils.h" + +int +main (int argc, char **argv) +{ +#define WIDTH 400 +#define HEIGHT 200 + + uint32_t *dest = malloc (WIDTH * HEIGHT * 4); + pixman_image_t *src_img; + pixman_image_t *dest_img; + int i, j, k, p; + + typedef struct + { + pixman_point_fixed_t p0; + pixman_point_fixed_t p1; + } point_pair_t; + + pixman_gradient_stop_t onestop[1] = + { + { pixman_int_to_fixed (1), { 0xffff, 0xeeee, 0xeeee, 0xeeee } }, + }; + + pixman_gradient_stop_t subsetstops[2] = + { + { pixman_int_to_fixed (1), { 0xffff, 0xeeee, 0xeeee, 0xeeee } }, + { pixman_int_to_fixed (1), { 0xffff, 0xeeee, 0xeeee, 0xeeee } }, + }; + + pixman_gradient_stop_t stops01[2] = + { + { pixman_int_to_fixed (0), { 0xffff, 0xeeee, 0xeeee, 0xeeee } }, + { pixman_int_to_fixed (1), { 0xffff, 0x1111, 0x1111, 0x1111 } } + }; + + point_pair_t point_pairs [] = + { { { pixman_double_to_fixed (0), 0 }, + { pixman_double_to_fixed (WIDTH / 8.), pixman_int_to_fixed (0) } }, + { { pixman_double_to_fixed (WIDTH / 2.0), pixman_double_to_fixed (HEIGHT / 2.0) }, + { pixman_double_to_fixed (WIDTH / 2.0), pixman_double_to_fixed (HEIGHT / 2.0) } } + }; + + pixman_transform_t transformations[] = { + { + { { pixman_double_to_fixed (2), pixman_double_to_fixed (0.5), pixman_double_to_fixed (-100), }, + { pixman_double_to_fixed (0), pixman_double_to_fixed (3), pixman_double_to_fixed (0), }, + { pixman_double_to_fixed (0), pixman_double_to_fixed (0.000), pixman_double_to_fixed (1.0) } + } + }, + { + { { pixman_double_to_fixed (1), pixman_double_to_fixed (0), pixman_double_to_fixed (0), }, + { pixman_double_to_fixed (0), pixman_double_to_fixed (1), pixman_double_to_fixed (0), }, + { pixman_double_to_fixed (0), pixman_double_to_fixed (0.000), pixman_double_to_fixed (1.0) } + } + }, + { + { { pixman_double_to_fixed (2), pixman_double_to_fixed (1), pixman_double_to_fixed (0), }, + { pixman_double_to_fixed (1), pixman_double_to_fixed (1), pixman_double_to_fixed (0), }, + { pixman_double_to_fixed (2), pixman_double_to_fixed (1.000), pixman_double_to_fixed (1.0) } + } + }, + { + { { pixman_double_to_fixed (2), pixman_double_to_fixed (1), pixman_double_to_fixed (0), }, + { pixman_double_to_fixed (1), pixman_double_to_fixed (1), pixman_double_to_fixed (0), }, + { pixman_double_to_fixed (0), pixman_double_to_fixed (0), pixman_double_to_fixed (0) } + } + }, + { + { { pixman_double_to_fixed (2), pixman_double_to_fixed (1), pixman_double_to_fixed (0), }, + { pixman_double_to_fixed (1), pixman_double_to_fixed (1), pixman_double_to_fixed (0), }, + { pixman_double_to_fixed (2), pixman_double_to_fixed (-1), pixman_double_to_fixed (0) } + } + }, + { + { { pixman_double_to_fixed (2), pixman_double_to_fixed (1), pixman_double_to_fixed (3), }, + { pixman_double_to_fixed (1), pixman_double_to_fixed (1), pixman_double_to_fixed (0), }, + { pixman_double_to_fixed (2), pixman_double_to_fixed (-1), pixman_double_to_fixed (0) } + } + }, + }; + + pixman_fixed_t r_inner; + pixman_fixed_t r_outer; + + enable_divbyzero_exceptions(); + + for (i = 0; i < WIDTH * HEIGHT; ++i) + dest[i] = 0x4f00004f; /* pale blue */ + + dest_img = pixman_image_create_bits (PIXMAN_a8r8g8b8, + WIDTH, HEIGHT, + dest, + WIDTH * 4); + + r_inner = 0; + r_outer = pixman_double_to_fixed (50.0); + + for (i = 0; i < 3; ++i) + { + pixman_gradient_stop_t *stops; + int num_stops; + + if (i == 0) + { + stops = onestop; + num_stops = ARRAY_LENGTH (onestop); + } + else if (i == 1) + { + stops = subsetstops; + num_stops = ARRAY_LENGTH (subsetstops); + } + else + { + stops = stops01; + num_stops = ARRAY_LENGTH (stops01); + } + + for (j = 0; j < 3; ++j) + { + for (p = 0; p < ARRAY_LENGTH (point_pairs); ++p) + { + point_pair_t *pair = &(point_pairs[p]); + + if (j == 0) + src_img = pixman_image_create_conical_gradient (&(pair->p0), r_inner, + stops, num_stops); + else if (j == 1) + src_img = pixman_image_create_radial_gradient (&(pair->p0), &(pair->p1), + r_inner, r_outer, + stops, num_stops); + else + src_img = pixman_image_create_linear_gradient (&(pair->p0), &(pair->p1), + stops, num_stops); + + for (k = 0; k < ARRAY_LENGTH (transformations); ++k) + { + pixman_image_set_transform (src_img, &transformations[k]); + + pixman_image_set_repeat (src_img, PIXMAN_REPEAT_NONE); + pixman_image_composite (PIXMAN_OP_OVER, src_img, NULL, dest_img, + 0, 0, 0, 0, 0, 0, 10 * WIDTH, HEIGHT); + } + + pixman_image_unref (src_img); + } + + } + } + + pixman_image_unref (dest_img); + free (dest); + + return 0; +} diff --git a/src/pixman/test/infinite-loop.c b/src/pixman/test/infinite-loop.c new file mode 100644 index 0000000..02addaa --- /dev/null +++ b/src/pixman/test/infinite-loop.c @@ -0,0 +1,39 @@ +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "utils.h" + +int +main (int argc, char **argv) +{ +#define SRC_WIDTH 16 +#define SRC_HEIGHT 12 +#define DST_WIDTH 7 +#define DST_HEIGHT 2 + + static const pixman_transform_t transform = { + { { 0x200017bd, 0x00000000, 0x000e6465 }, + { 0x00000000, 0x000a42fd, 0x000e6465 }, + { 0x00000000, 0x00000000, 0x00010000 }, + } + }; + pixman_image_t *src, *dest; + + src = pixman_image_create_bits ( + PIXMAN_a8r8g8b8, SRC_WIDTH, SRC_HEIGHT, NULL, -1); + dest = pixman_image_create_bits ( + PIXMAN_a8r8g8b8, DST_WIDTH, DST_HEIGHT, NULL, -1); + + pixman_image_set_transform (src, &transform); + pixman_image_set_repeat (src, PIXMAN_REPEAT_NORMAL); + pixman_image_set_filter (src, PIXMAN_FILTER_BILINEAR, NULL, 0); + + if (argc == 1 || strcmp (argv[1], "-nf") != 0) + fail_after (1, "infinite loop detected"); + + pixman_image_composite ( + PIXMAN_OP_OVER, src, NULL, dest, -3, -3, 0, 0, 0, 0, 6, 2); + + return 0; +} diff --git a/src/pixman/test/lowlevel-blt-bench.c b/src/pixman/test/lowlevel-blt-bench.c new file mode 100644 index 0000000..1049e21 --- /dev/null +++ b/src/pixman/test/lowlevel-blt-bench.c @@ -0,0 +1,820 @@ +/* + * Copyright © 2009 Nokia Corporation + * Copyright © 2010 Movial Creative Technologies Oy + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "utils.h" + +#define SOLID_FLAG 1 +#define CA_FLAG 2 + +#define L1CACHE_SIZE (8 * 1024) +#define L2CACHE_SIZE (128 * 1024) + +/* This is applied to both L1 and L2 tests - alternatively, you could + * parameterise bench_L or split it into two functions. It could be + * read at runtime on some architectures, but it only really matters + * that it's a number that's an integer divisor of both cacheline + * lengths, and further, it only really matters for caches that don't + * do allocate0on-write. */ +#define CACHELINE_LENGTH (32) /* bytes */ + +#define WIDTH 1920 +#define HEIGHT 1080 +#define BUFSIZE (WIDTH * HEIGHT * 4) +#define XWIDTH 256 +#define XHEIGHT 256 +#define TILEWIDTH 32 +#define TINYWIDTH 8 + +#define EXCLUDE_OVERHEAD 1 + +uint32_t *dst; +uint32_t *src; +uint32_t *mask; + +double bandwidth = 0; + +double +bench_memcpy () +{ + int64_t n = 0, total; + double t1, t2; + int x = 0; + + t1 = gettime (); + while (1) + { + memcpy (dst, src, BUFSIZE - 64); + memcpy (src, dst, BUFSIZE - 64); + n += 4 * (BUFSIZE - 64); + t2 = gettime (); + if (t2 - t1 > 0.5) + break; + } + n = total = n * 5; + t1 = gettime (); + while (n > 0) + { + if (++x >= 64) + x = 0; + memcpy ((char *)dst + 1, (char *)src + x, BUFSIZE - 64); + memcpy ((char *)src + 1, (char *)dst + x, BUFSIZE - 64); + n -= 4 * (BUFSIZE - 64); + } + t2 = gettime (); + return (double)total / (t2 - t1); +} + +static pixman_bool_t use_scaling = FALSE; +static pixman_filter_t filter = PIXMAN_FILTER_NEAREST; + +/* nearly 1x scale factor */ +static pixman_transform_t m = +{ + { + { pixman_fixed_1 + 1, 0, 0 }, + { 0, pixman_fixed_1, 0 }, + { 0, 0, pixman_fixed_1 } + } +}; + +static void +pixman_image_composite_wrapper (pixman_implementation_t *impl, + pixman_composite_info_t *info) +{ + if (use_scaling) + { + pixman_image_set_filter (info->src_image, filter, NULL, 0); + pixman_image_set_transform(info->src_image, &m); + } + pixman_image_composite (info->op, + info->src_image, info->mask_image, info->dest_image, + info->src_x, info->src_y, + info->mask_x, info->mask_y, + info->dest_x, info->dest_y, + info->width, info->height); +} + +static void +pixman_image_composite_empty (pixman_implementation_t *impl, + pixman_composite_info_t *info) +{ + if (use_scaling) + { + pixman_image_set_filter (info->src_image, filter, NULL, 0); + pixman_image_set_transform(info->src_image, &m); + } + pixman_image_composite (info->op, + info->src_image, info->mask_image, info->dest_image, + 0, 0, 0, 0, 0, 0, 1, 1); +} + +static inline void +call_func (pixman_composite_func_t func, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dest_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + pixman_composite_info_t info; + + info.op = op; + info.src_image = src_image; + info.mask_image = mask_image; + info.dest_image = dest_image; + info.src_x = src_x; + info.src_y = src_y; + info.mask_x = mask_x; + info.mask_y = mask_y; + info.dest_x = dest_x; + info.dest_y = dest_y; + info.width = width; + info.height = height; + + func (0, &info); +} + +void +noinline +bench_L (pixman_op_t op, + pixman_image_t * src_img, + pixman_image_t * mask_img, + pixman_image_t * dst_img, + int64_t n, + pixman_composite_func_t func, + int width, + int lines_count) +{ + int64_t i, j, k; + int x = 0; + int q = 0; + volatile int qx; + + for (i = 0; i < n; i++) + { + /* For caches without allocate-on-write, we need to force the + * destination buffer back into the cache on each iteration, + * otherwise if they are evicted during the test, they remain + * uncached. This doesn't matter for tests which read the + * destination buffer, or for caches that do allocate-on-write, + * but in those cases this loop just adds constant time, which + * should be successfully cancelled out. + */ + for (j = 0; j < lines_count; j++) + { + for (k = 0; k < width + 62; k += CACHELINE_LENGTH / sizeof *dst) + { + q += dst[j * WIDTH + k]; + } + q += dst[j * WIDTH + width + 62]; + } + if (++x >= 64) + x = 0; + call_func (func, op, src_img, mask_img, dst_img, x, 0, x, 0, 63 - x, 0, width, lines_count); + } + qx = q; +} + +void +noinline +bench_M (pixman_op_t op, + pixman_image_t * src_img, + pixman_image_t * mask_img, + pixman_image_t * dst_img, + int64_t n, + pixman_composite_func_t func) +{ + int64_t i; + int x = 0; + + for (i = 0; i < n; i++) + { + if (++x >= 64) + x = 0; + call_func (func, op, src_img, mask_img, dst_img, x, 0, x, 0, 1, 0, WIDTH - 64, HEIGHT); + } +} + +double +noinline +bench_HT (pixman_op_t op, + pixman_image_t * src_img, + pixman_image_t * mask_img, + pixman_image_t * dst_img, + int64_t n, + pixman_composite_func_t func) +{ + double pix_cnt = 0; + int x = 0; + int y = 0; + int64_t i; + + srand (0); + for (i = 0; i < n; i++) + { + int w = (rand () % (TILEWIDTH * 2)) + 1; + int h = (rand () % (TILEWIDTH * 2)) + 1; + if (x + w > WIDTH) + { + x = 0; + y += TILEWIDTH * 2; + } + if (y + h > HEIGHT) + { + y = 0; + } + call_func (func, op, src_img, mask_img, dst_img, x, y, x, y, x, y, w, h); + x += w; + pix_cnt += w * h; + } + return pix_cnt; +} + +double +noinline +bench_VT (pixman_op_t op, + pixman_image_t * src_img, + pixman_image_t * mask_img, + pixman_image_t * dst_img, + int64_t n, + pixman_composite_func_t func) +{ + double pix_cnt = 0; + int x = 0; + int y = 0; + int64_t i; + + srand (0); + for (i = 0; i < n; i++) + { + int w = (rand () % (TILEWIDTH * 2)) + 1; + int h = (rand () % (TILEWIDTH * 2)) + 1; + if (y + h > HEIGHT) + { + y = 0; + x += TILEWIDTH * 2; + } + if (x + w > WIDTH) + { + x = 0; + } + call_func (func, op, src_img, mask_img, dst_img, x, y, x, y, x, y, w, h); + y += h; + pix_cnt += w * h; + } + return pix_cnt; +} + +double +noinline +bench_R (pixman_op_t op, + pixman_image_t * src_img, + pixman_image_t * mask_img, + pixman_image_t * dst_img, + int64_t n, + pixman_composite_func_t func, + int maxw, + int maxh) +{ + double pix_cnt = 0; + int64_t i; + + if (maxw <= TILEWIDTH * 2 || maxh <= TILEWIDTH * 2) + { + printf("error: maxw <= TILEWIDTH * 2 || maxh <= TILEWIDTH * 2\n"); + return 0; + } + + srand (0); + for (i = 0; i < n; i++) + { + int w = (rand () % (TILEWIDTH * 2)) + 1; + int h = (rand () % (TILEWIDTH * 2)) + 1; + int sx = rand () % (maxw - TILEWIDTH * 2); + int sy = rand () % (maxh - TILEWIDTH * 2); + int dx = rand () % (maxw - TILEWIDTH * 2); + int dy = rand () % (maxh - TILEWIDTH * 2); + call_func (func, op, src_img, mask_img, dst_img, sx, sy, sx, sy, dx, dy, w, h); + pix_cnt += w * h; + } + return pix_cnt; +} + +double +noinline +bench_RT (pixman_op_t op, + pixman_image_t * src_img, + pixman_image_t * mask_img, + pixman_image_t * dst_img, + int64_t n, + pixman_composite_func_t func, + int maxw, + int maxh) +{ + double pix_cnt = 0; + int64_t i; + + if (maxw <= TINYWIDTH * 2 || maxh <= TINYWIDTH * 2) + { + printf("error: maxw <= TINYWIDTH * 2 || maxh <= TINYWIDTH * 2\n"); + return 0; + } + + srand (0); + for (i = 0; i < n; i++) + { + int w = (rand () % (TINYWIDTH * 2)) + 1; + int h = (rand () % (TINYWIDTH * 2)) + 1; + int sx = rand () % (maxw - TINYWIDTH * 2); + int sy = rand () % (maxh - TINYWIDTH * 2); + int dx = rand () % (maxw - TINYWIDTH * 2); + int dy = rand () % (maxh - TINYWIDTH * 2); + call_func (func, op, src_img, mask_img, dst_img, sx, sy, sx, sy, dx, dy, w, h); + pix_cnt += w * h; + } + return pix_cnt; +} + +void +bench_composite (char * testname, + int src_fmt, + int src_flags, + int op, + int mask_fmt, + int mask_flags, + int dst_fmt, + double npix) +{ + pixman_image_t * src_img; + pixman_image_t * dst_img; + pixman_image_t * mask_img; + pixman_image_t * xsrc_img; + pixman_image_t * xdst_img; + pixman_image_t * xmask_img; + double t1, t2, t3, pix_cnt; + int64_t n, l1test_width, nlines; + double bytes_per_pix = 0; + pixman_bool_t bench_pixbuf = FALSE; + + pixman_composite_func_t func = pixman_image_composite_wrapper; + + if (!(src_flags & SOLID_FLAG)) + { + bytes_per_pix += (src_fmt >> 24) / 8.0; + src_img = pixman_image_create_bits (src_fmt, + WIDTH, HEIGHT, + src, + WIDTH * 4); + xsrc_img = pixman_image_create_bits (src_fmt, + XWIDTH, XHEIGHT, + src, + XWIDTH * 4); + } + else + { + src_img = pixman_image_create_bits (src_fmt, + 1, 1, + src, + 4); + xsrc_img = pixman_image_create_bits (src_fmt, + 1, 1, + src, + 4); + pixman_image_set_repeat (src_img, PIXMAN_REPEAT_NORMAL); + pixman_image_set_repeat (xsrc_img, PIXMAN_REPEAT_NORMAL); + } + + bytes_per_pix += (dst_fmt >> 24) / 8.0; + dst_img = pixman_image_create_bits (dst_fmt, + WIDTH, HEIGHT, + dst, + WIDTH * 4); + + mask_img = NULL; + xmask_img = NULL; + if (strcmp (testname, "pixbuf") == 0 || strcmp (testname, "rpixbuf") == 0) + { + bench_pixbuf = TRUE; + } + if (!(mask_flags & SOLID_FLAG) && mask_fmt != PIXMAN_null) + { + bytes_per_pix += (mask_fmt >> 24) / ((op == PIXMAN_OP_SRC) ? 8.0 : 4.0); + mask_img = pixman_image_create_bits (mask_fmt, + WIDTH, HEIGHT, + bench_pixbuf ? src : mask, + WIDTH * 4); + xmask_img = pixman_image_create_bits (mask_fmt, + XWIDTH, XHEIGHT, + bench_pixbuf ? src : mask, + XWIDTH * 4); + } + else if (mask_fmt != PIXMAN_null) + { + mask_img = pixman_image_create_bits (mask_fmt, + 1, 1, + mask, + 4); + xmask_img = pixman_image_create_bits (mask_fmt, + 1, 1, + mask, + 4 * 4); + pixman_image_set_repeat (mask_img, PIXMAN_REPEAT_NORMAL); + pixman_image_set_repeat (xmask_img, PIXMAN_REPEAT_NORMAL); + } + if ((mask_flags & CA_FLAG) && mask_fmt != PIXMAN_null) + { + pixman_image_set_component_alpha (mask_img, 1); + } + xdst_img = pixman_image_create_bits (dst_fmt, + XWIDTH, XHEIGHT, + dst, + XWIDTH * 4); + + + printf ("%24s %c", testname, func != pixman_image_composite_wrapper ? + '-' : '='); + + memcpy (dst, src, BUFSIZE); + memcpy (src, dst, BUFSIZE); + + l1test_width = L1CACHE_SIZE / 8 - 64; + if (l1test_width < 1) + l1test_width = 1; + if (l1test_width > WIDTH - 64) + l1test_width = WIDTH - 64; + n = 1 + npix / (l1test_width * 8); + t1 = gettime (); +#if EXCLUDE_OVERHEAD + bench_L (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty, l1test_width, 1); +#endif + t2 = gettime (); + bench_L (op, src_img, mask_img, dst_img, n, func, l1test_width, 1); + t3 = gettime (); + printf (" L1:%7.2f", (double)n * l1test_width * 1 / + ((t3 - t2) - (t2 - t1)) / 1000000.); + fflush (stdout); + + memcpy (dst, src, BUFSIZE); + memcpy (src, dst, BUFSIZE); + + nlines = (L2CACHE_SIZE / l1test_width) / + ((PIXMAN_FORMAT_BPP(src_fmt) + PIXMAN_FORMAT_BPP(dst_fmt)) / 8); + if (nlines < 1) + nlines = 1; + n = 1 + npix / (l1test_width * nlines); + t1 = gettime (); +#if EXCLUDE_OVERHEAD + bench_L (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty, l1test_width, nlines); +#endif + t2 = gettime (); + bench_L (op, src_img, mask_img, dst_img, n, func, l1test_width, nlines); + t3 = gettime (); + printf (" L2:%7.2f", (double)n * l1test_width * nlines / + ((t3 - t2) - (t2 - t1)) / 1000000.); + fflush (stdout); + + memcpy (dst, src, BUFSIZE); + memcpy (src, dst, BUFSIZE); + + n = 1 + npix / (WIDTH * HEIGHT); + t1 = gettime (); +#if EXCLUDE_OVERHEAD + bench_M (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty); +#endif + t2 = gettime (); + bench_M (op, src_img, mask_img, dst_img, n, func); + t3 = gettime (); + printf (" M:%6.2f (%6.2f%%)", + ((double)n * (WIDTH - 64) * HEIGHT / ((t3 - t2) - (t2 - t1))) / 1000000., + ((double)n * (WIDTH - 64) * HEIGHT / ((t3 - t2) - (t2 - t1)) * bytes_per_pix) * (100.0 / bandwidth) ); + fflush (stdout); + + memcpy (dst, src, BUFSIZE); + memcpy (src, dst, BUFSIZE); + + n = 1 + npix / (8 * TILEWIDTH * TILEWIDTH); + t1 = gettime (); +#if EXCLUDE_OVERHEAD + pix_cnt = bench_HT (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty); +#endif + t2 = gettime (); + pix_cnt = bench_HT (op, src_img, mask_img, dst_img, n, func); + t3 = gettime (); + printf (" HT:%6.2f", (double)pix_cnt / ((t3 - t2) - (t2 - t1)) / 1000000.); + fflush (stdout); + + memcpy (dst, src, BUFSIZE); + memcpy (src, dst, BUFSIZE); + + n = 1 + npix / (8 * TILEWIDTH * TILEWIDTH); + t1 = gettime (); +#if EXCLUDE_OVERHEAD + pix_cnt = bench_VT (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty); +#endif + t2 = gettime (); + pix_cnt = bench_VT (op, src_img, mask_img, dst_img, n, func); + t3 = gettime (); + printf (" VT:%6.2f", (double)pix_cnt / ((t3 - t2) - (t2 - t1)) / 1000000.); + fflush (stdout); + + memcpy (dst, src, BUFSIZE); + memcpy (src, dst, BUFSIZE); + + n = 1 + npix / (8 * TILEWIDTH * TILEWIDTH); + t1 = gettime (); +#if EXCLUDE_OVERHEAD + pix_cnt = bench_R (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty, WIDTH, HEIGHT); +#endif + t2 = gettime (); + pix_cnt = bench_R (op, src_img, mask_img, dst_img, n, func, WIDTH, HEIGHT); + t3 = gettime (); + printf (" R:%6.2f", (double)pix_cnt / ((t3 - t2) - (t2 - t1)) / 1000000.); + fflush (stdout); + + memcpy (dst, src, BUFSIZE); + memcpy (src, dst, BUFSIZE); + + n = 1 + npix / (16 * TINYWIDTH * TINYWIDTH); + t1 = gettime (); +#if EXCLUDE_OVERHEAD + pix_cnt = bench_RT (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty, WIDTH, HEIGHT); +#endif + t2 = gettime (); + pix_cnt = bench_RT (op, src_img, mask_img, dst_img, n, func, WIDTH, HEIGHT); + t3 = gettime (); + printf (" RT:%6.2f (%4.0fKops/s)\n", (double)pix_cnt / ((t3 - t2) - (t2 - t1)) / 1000000., (double) n / ((t3 - t2) * 1000)); + + if (mask_img) { + pixman_image_unref (mask_img); + pixman_image_unref (xmask_img); + } + pixman_image_unref (src_img); + pixman_image_unref (dst_img); + pixman_image_unref (xsrc_img); + pixman_image_unref (xdst_img); +} + +#define PIXMAN_OP_OUT_REV (PIXMAN_OP_OUT_REVERSE) + +struct +{ + char *testname; + int src_fmt; + int src_flags; + int op; + int mask_fmt; + int mask_flags; + int dst_fmt; +} +tests_tbl[] = +{ + { "add_8_8_8", PIXMAN_a8, 0, PIXMAN_OP_ADD, PIXMAN_a8, 0, PIXMAN_a8 }, + { "add_n_8_8", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_a8, 0, PIXMAN_a8 }, + { "add_n_8_8888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_a8, 0, PIXMAN_a8r8g8b8 }, + { "add_n_8_x888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_a8, 0, PIXMAN_x8r8g8b8 }, + { "add_n_8_0565", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_a8, 0, PIXMAN_r5g6b5 }, + { "add_n_8_1555", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_a8, 0, PIXMAN_a1r5g5b5 }, + { "add_n_8_4444", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_a8, 0, PIXMAN_a4r4g4b4 }, + { "add_n_8_2222", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_a8, 0, PIXMAN_a2r2g2b2 }, + { "add_n_8_2x10", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_a8, 0, PIXMAN_x2r10g10b10 }, + { "add_n_8_2a10", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_a8, 0, PIXMAN_a2r10g10b10 }, + { "add_n_8", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a8 }, + { "add_n_8888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a8r8g8b8 }, + { "add_n_x888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_x8r8g8b8 }, + { "add_n_0565", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_r5g6b5 }, + { "add_n_1555", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a1r5g5b5 }, + { "add_n_4444", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a4r4g4b4 }, + { "add_n_2222", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a2r2g2b2 }, + { "add_n_2x10", PIXMAN_a2r10g10b10, 1, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_x2r10g10b10 }, + { "add_n_2a10", PIXMAN_a2r10g10b10, 1, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a2r10g10b10 }, + { "add_8_8", PIXMAN_a8, 0, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a8 }, + { "add_x888_x888", PIXMAN_x8r8g8b8, 0, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_x8r8g8b8 }, + { "add_8888_8888", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a8r8g8b8 }, + { "add_8888_0565", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_r5g6b5 }, + { "add_8888_1555", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a1r5g5b5 }, + { "add_8888_4444", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a4r4g4b4 }, + { "add_8888_2222", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a2r2g2b2 }, + { "add_0565_0565", PIXMAN_r5g6b5, 0, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_r5g6b5 }, + { "add_1555_1555", PIXMAN_a1r5g5b5, 0, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a1r5g5b5 }, + { "add_0565_2x10", PIXMAN_r5g6b5, 0, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_x2r10g10b10 }, + { "add_2a10_2a10", PIXMAN_a2r10g10b10, 0, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a2r10g10b10 }, + { "in_n_8_8", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_IN, PIXMAN_a8, 0, PIXMAN_a8 }, + { "in_8_8", PIXMAN_a8, 0, PIXMAN_OP_IN, PIXMAN_null, 0, PIXMAN_a8 }, + { "src_n_2222", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a2r2g2b2 }, + { "src_n_0565", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_r5g6b5 }, + { "src_n_1555", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a1r5g5b5 }, + { "src_n_4444", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a4r4g4b4 }, + { "src_n_x888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_x8r8g8b8 }, + { "src_n_8888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a8r8g8b8 }, + { "src_n_2x10", PIXMAN_a2r10g10b10, 1, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_x2r10g10b10 }, + { "src_n_2a10", PIXMAN_a2r10g10b10, 1, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a2r10g10b10 }, + { "src_8888_0565", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_r5g6b5 }, + { "src_0565_8888", PIXMAN_r5g6b5, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a8r8g8b8 }, + { "src_8888_4444", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a4r4g4b4 }, + { "src_8888_2222", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a2r2g2b2 }, + { "src_8888_2x10", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_x2r10g10b10 }, + { "src_8888_2a10", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a2r10g10b10 }, + { "src_0888_0565", PIXMAN_r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_r5g6b5 }, + { "src_0888_8888", PIXMAN_r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a8r8g8b8 }, + { "src_0888_x888", PIXMAN_r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_x8r8g8b8 }, + { "src_0888_8888_rev", PIXMAN_b8g8r8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_x8r8g8b8 }, + { "src_0888_0565_rev", PIXMAN_b8g8r8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_r5g6b5 }, + { "src_x888_x888", PIXMAN_x8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_x8r8g8b8 }, + { "src_x888_8888", PIXMAN_x8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a8r8g8b8 }, + { "src_8888_8888", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a8r8g8b8 }, + { "src_0565_0565", PIXMAN_r5g6b5, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_r5g6b5 }, + { "src_1555_0565", PIXMAN_a1r5g5b5, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_r5g6b5 }, + { "src_0565_1555", PIXMAN_r5g6b5, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a1r5g5b5 }, + { "src_8_8", PIXMAN_a8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a8 }, + { "src_n_8", PIXMAN_a8, 1, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a8 }, + { "src_n_8_0565", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_r5g6b5 }, + { "src_n_8_1555", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_a1r5g5b5 }, + { "src_n_8_4444", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_a4r4g4b4 }, + { "src_n_8_2222", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_a2r2g2b2 }, + { "src_n_8_x888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_x8r8g8b8 }, + { "src_n_8_8888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_a8r8g8b8 }, + { "src_n_8_2x10", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_x2r10g10b10 }, + { "src_n_8_2a10", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_a2r10g10b10 }, + { "src_8888_8_0565", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_r5g6b5 }, + { "src_0888_8_0565", PIXMAN_r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_r5g6b5 }, + { "src_0888_8_8888", PIXMAN_r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_a8r8g8b8 }, + { "src_0888_8_x888", PIXMAN_r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_x8r8g8b8 }, + { "src_x888_8_x888", PIXMAN_x8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_x8r8g8b8 }, + { "src_x888_8_8888", PIXMAN_x8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_a8r8g8b8 }, + { "src_0565_8_0565", PIXMAN_r5g6b5, 0, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_r5g6b5 }, + { "src_1555_8_0565", PIXMAN_a1r5g5b5, 0, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_r5g6b5 }, + { "src_0565_8_1555", PIXMAN_r5g6b5, 0, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_a1r5g5b5 }, + { "over_n_x888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_null, 0, PIXMAN_x8r8g8b8 }, + { "over_n_8888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_null, 0, PIXMAN_a8r8g8b8 }, + { "over_n_0565", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_null, 0, PIXMAN_r5g6b5 }, + { "over_n_1555", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_null, 0, PIXMAN_a1r5g5b5 }, + { "over_8888_0565", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_OVER, PIXMAN_null, 0, PIXMAN_r5g6b5 }, + { "over_8888_8888", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_OVER, PIXMAN_null, 0, PIXMAN_a8r8g8b8 }, + { "over_8888_x888", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_OVER, PIXMAN_null, 0, PIXMAN_x8r8g8b8 }, + { "over_x888_8_0565", PIXMAN_x8r8g8b8, 0, PIXMAN_OP_OVER, PIXMAN_a8, 0, PIXMAN_r5g6b5 }, + { "over_x888_8_8888", PIXMAN_x8r8g8b8, 0, PIXMAN_OP_OVER, PIXMAN_a8, 0, PIXMAN_a8r8g8b8 }, + { "over_n_8_0565", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8, 0, PIXMAN_r5g6b5 }, + { "over_n_8_1555", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8, 0, PIXMAN_a1r5g5b5 }, + { "over_n_8_4444", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8, 0, PIXMAN_a4r4g4b4 }, + { "over_n_8_2222", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8, 0, PIXMAN_a2r2g2b2 }, + { "over_n_8_x888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8, 0, PIXMAN_x8r8g8b8 }, + { "over_n_8_8888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8, 0, PIXMAN_a8r8g8b8 }, + { "over_n_8_2x10", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8, 0, PIXMAN_x2r10g10b10 }, + { "over_n_8_2a10", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8, 0, PIXMAN_a2r10g10b10 }, + { "over_n_8888_8888_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, 2, PIXMAN_a8r8g8b8 }, + { "over_n_8888_x888_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, 2, PIXMAN_x8r8g8b8 }, + { "over_n_8888_0565_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, 2, PIXMAN_r5g6b5 }, + { "over_n_8888_1555_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, 2, PIXMAN_a1r5g5b5 }, + { "over_n_8888_4444_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, 2, PIXMAN_a4r4g4b4 }, + { "over_n_8888_2222_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, 2, PIXMAN_a2r2g2b2 }, + { "over_n_8888_2x10_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, 2, PIXMAN_x2r10g10b10 }, + { "over_n_8888_2a10_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, 2, PIXMAN_a2r10g10b10 }, + { "over_8888_n_8888", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_OVER, PIXMAN_a8, 1, PIXMAN_a8r8g8b8 }, + { "over_8888_n_x888", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_OVER, PIXMAN_a8, 1, PIXMAN_x8r8g8b8 }, + { "over_8888_n_0565", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_OVER, PIXMAN_a8, 1, PIXMAN_r5g6b5 }, + { "over_8888_n_1555", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_OVER, PIXMAN_a8, 1, PIXMAN_a1r5g5b5 }, + { "over_x888_n_8888", PIXMAN_x8r8g8b8, 0, PIXMAN_OP_OVER, PIXMAN_a8, 1, PIXMAN_a8r8g8b8 }, + { "outrev_n_8_0565", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OUT_REV, PIXMAN_a8, 0, PIXMAN_r5g6b5 }, + { "outrev_n_8_1555", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OUT_REV, PIXMAN_a8, 0, PIXMAN_a1r5g5b5 }, + { "outrev_n_8_x888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OUT_REV, PIXMAN_a8, 0, PIXMAN_x8r8g8b8 }, + { "outrev_n_8_8888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OUT_REV, PIXMAN_a8, 0, PIXMAN_a8r8g8b8 }, + { "outrev_n_8888_0565_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OUT_REV, PIXMAN_a8r8g8b8, 2, PIXMAN_r5g6b5 }, + { "outrev_n_8888_1555_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OUT_REV, PIXMAN_a8r8g8b8, 2, PIXMAN_a1r5g5b5 }, + { "outrev_n_8888_x888_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OUT_REV, PIXMAN_a8r8g8b8, 2, PIXMAN_x8r8g8b8 }, + { "outrev_n_8888_8888_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OUT_REV, PIXMAN_a8r8g8b8, 2, PIXMAN_a8r8g8b8 }, + { "over_reverse_n_8888", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_OVER_REVERSE, PIXMAN_null, 0, PIXMAN_a8r8g8b8 }, + { "pixbuf", PIXMAN_x8b8g8r8, 0, PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, 0, PIXMAN_a8r8g8b8 }, + { "rpixbuf", PIXMAN_x8b8g8r8, 0, PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, 0, PIXMAN_a8b8g8r8 }, +}; + +int +main (int argc, char *argv[]) +{ + double x; + int i; + const char *pattern = NULL; + for (i = 1; i < argc; i++) + { + if (argv[i][0] == '-') + { + if (strchr (argv[i] + 1, 'b')) + { + use_scaling = TRUE; + filter = PIXMAN_FILTER_BILINEAR; + } + else if (strchr (argv[i] + 1, 'n')) + { + use_scaling = TRUE; + filter = PIXMAN_FILTER_NEAREST; + } + } + else + { + pattern = argv[i]; + } + } + + if (!pattern) + { + printf ("Usage: lowlevel-blt-bench [-b] [-n] pattern\n"); + printf (" -n : benchmark nearest scaling\n"); + printf (" -b : benchmark bilinear scaling\n"); + return 1; + } + + src = aligned_malloc (4096, BUFSIZE * 3); + memset (src, 0xCC, BUFSIZE * 3); + dst = src + (BUFSIZE / 4); + mask = dst + (BUFSIZE / 4); + + printf ("Benchmark for a set of most commonly used functions\n"); + printf ("---\n"); + printf ("All results are presented in millions of pixels per second\n"); + printf ("L1 - small Xx1 rectangle (fitting L1 cache), always blitted at the same\n"); + printf (" memory location with small drift in horizontal direction\n"); + printf ("L2 - small XxY rectangle (fitting L2 cache), always blitted at the same\n"); + printf (" memory location with small drift in horizontal direction\n"); + printf ("M - large %dx%d rectangle, always blitted at the same\n", + WIDTH - 64, HEIGHT); + printf (" memory location with small drift in horizontal direction\n"); + printf ("HT - random rectangles with %dx%d average size are copied from\n", + TILEWIDTH, TILEWIDTH); + printf (" one %dx%d buffer to another, traversing from left to right\n", + WIDTH, HEIGHT); + printf (" and from top to bottom\n"); + printf ("VT - random rectangles with %dx%d average size are copied from\n", + TILEWIDTH, TILEWIDTH); + printf (" one %dx%d buffer to another, traversing from top to bottom\n", + WIDTH, HEIGHT); + printf (" and from left to right\n"); + printf ("R - random rectangles with %dx%d average size are copied from\n", + TILEWIDTH, TILEWIDTH); + printf (" random locations of one %dx%d buffer to another\n", + WIDTH, HEIGHT); + printf ("RT - as R, but %dx%d average sized rectangles are copied\n", + TINYWIDTH, TINYWIDTH); + printf ("---\n"); + bandwidth = x = bench_memcpy (); + printf ("reference memcpy speed = %.1fMB/s (%.1fMP/s for 32bpp fills)\n", + x / 1000000., x / 4000000); + if (use_scaling) + { + printf ("---\n"); + if (filter == PIXMAN_FILTER_BILINEAR) + printf ("BILINEAR scaling\n"); + else if (filter == PIXMAN_FILTER_NEAREST) + printf ("NEAREST scaling\n"); + else + printf ("UNKNOWN scaling\n"); + } + printf ("---\n"); + + for (i = 0; i < ARRAY_LENGTH (tests_tbl); i++) + { + if (strcmp (pattern, "all") == 0 || strcmp (tests_tbl[i].testname, pattern) == 0) + { + bench_composite (tests_tbl[i].testname, + tests_tbl[i].src_fmt, + tests_tbl[i].src_flags, + tests_tbl[i].op, + tests_tbl[i].mask_fmt, + tests_tbl[i].mask_flags, + tests_tbl[i].dst_fmt, + bandwidth/8); + } + } + + free (src); + return 0; +} diff --git a/src/pixman/test/matrix-test.c b/src/pixman/test/matrix-test.c new file mode 100644 index 0000000..0a5f203 --- /dev/null +++ b/src/pixman/test/matrix-test.c @@ -0,0 +1,235 @@ +/* + * Copyright © 2012 Siarhei Siamashka <siarhei.siamashka@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "utils.h" +#include <assert.h> +#include <stdlib.h> +#include <stdio.h> +#include <math.h> + +#ifdef HAVE_FLOAT128 + +#define pixman_fixed_to_float128(x) (((__float128)(x)) / 65536.0Q) + +typedef struct { __float128 v[3]; } pixman_vector_f128_t; +typedef struct { __float128 m[3][3]; } pixman_transform_f128_t; + +pixman_bool_t +pixman_transform_point_f128 (const pixman_transform_f128_t *t, + const pixman_vector_f128_t *v, + pixman_vector_f128_t *result) +{ + int i; + for (i = 0; i < 3; i++) + { + result->v[i] = t->m[i][0] * v->v[0] + + t->m[i][1] * v->v[1] + + t->m[i][2] * v->v[2]; + } + if (result->v[2] != 0) + { + result->v[0] /= result->v[2]; + result->v[1] /= result->v[2]; + result->v[2] = 1; + return TRUE; + } + else + { + return FALSE; + } +} + +pixman_bool_t does_it_fit_fixed_48_16 (__float128 x) +{ + if (x >= 65536.0Q * 65536.0Q * 32768.0Q) + return FALSE; + if (x <= -65536.0Q * 65536.0Q * 32768.0Q) + return FALSE; + return TRUE; +} + +#endif + +static inline uint32_t +byteswap32 (uint32_t x) +{ + return ((x & ((uint32_t)0xFF << 24)) >> 24) | + ((x & ((uint32_t)0xFF << 16)) >> 8) | + ((x & ((uint32_t)0xFF << 8)) << 8) | + ((x & ((uint32_t)0xFF << 0)) << 24); +} + +static inline uint64_t +byteswap64 (uint64_t x) +{ + return ((x & ((uint64_t)0xFF << 56)) >> 56) | + ((x & ((uint64_t)0xFF << 48)) >> 40) | + ((x & ((uint64_t)0xFF << 40)) >> 24) | + ((x & ((uint64_t)0xFF << 32)) >> 8) | + ((x & ((uint64_t)0xFF << 24)) << 8) | + ((x & ((uint64_t)0xFF << 16)) << 24) | + ((x & ((uint64_t)0xFF << 8)) << 40) | + ((x & ((uint64_t)0xFF << 0)) << 56); +} + +static void +byteswap_transform (pixman_transform_t *t) +{ + int i, j; + + if (is_little_endian ()) + return; + + for (i = 0; i < 3; i++) + for (j = 0; j < 3; j++) + t->matrix[i][j] = byteswap32 (t->matrix[i][j]); +} + +static void +byteswap_vector_48_16 (pixman_vector_48_16_t *v) +{ + int i; + + if (is_little_endian ()) + return; + + for (i = 0; i < 3; i++) + v->v[i] = byteswap64 (v->v[i]); +} + +uint32_t +test_matrix (int testnum, int verbose) +{ + uint32_t crc32 = 0; + int i, j, k; + pixman_bool_t is_affine; + + prng_srand (testnum); + + for (i = 0; i < 100; i++) + { + pixman_bool_t transform_ok; + pixman_transform_t ti; + pixman_vector_48_16_t vi, result_i; +#ifdef HAVE_FLOAT128 + pixman_transform_f128_t tf; + pixman_vector_f128_t vf, result_f; +#endif + prng_randmemset (&ti, sizeof(ti), 0); + prng_randmemset (&vi, sizeof(vi), 0); + byteswap_transform (&ti); + byteswap_vector_48_16 (&vi); + + for (j = 0; j < 3; j++) + { + /* make sure that "vi" contains 31.16 fixed point data */ + vi.v[j] >>= 17; + /* and apply random shift */ + if (prng_rand_n (3) == 0) + vi.v[j] >>= prng_rand_n (46); + } + + if (prng_rand_n (2)) + { + /* random shift for the matrix */ + for (j = 0; j < 3; j++) + for (k = 0; k < 3; k++) + ti.matrix[j][k] >>= prng_rand_n (30); + } + + if (prng_rand_n (2)) + { + /* affine matrix */ + ti.matrix[2][0] = 0; + ti.matrix[2][1] = 0; + ti.matrix[2][2] = pixman_fixed_1; + } + + if (prng_rand_n (2)) + { + /* cartesian coordinates */ + vi.v[2] = pixman_fixed_1; + } + + is_affine = (ti.matrix[2][0] == 0 && ti.matrix[2][1] == 0 && + ti.matrix[2][2] == pixman_fixed_1 && + vi.v[2] == pixman_fixed_1); + + transform_ok = TRUE; + if (is_affine && prng_rand_n (2)) + pixman_transform_point_31_16_affine (&ti, &vi, &result_i); + else + transform_ok = pixman_transform_point_31_16 (&ti, &vi, &result_i); + +#ifdef HAVE_FLOAT128 + /* compare with a reference 128-bit floating point implementation */ + for (j = 0; j < 3; j++) + { + vf.v[j] = pixman_fixed_to_float128 (vi.v[j]); + for (k = 0; k < 3; k++) + { + tf.m[j][k] = pixman_fixed_to_float128 (ti.matrix[j][k]); + } + } + + if (pixman_transform_point_f128 (&tf, &vf, &result_f)) + { + if (transform_ok || + (does_it_fit_fixed_48_16 (result_f.v[0]) && + does_it_fit_fixed_48_16 (result_f.v[1]) && + does_it_fit_fixed_48_16 (result_f.v[2]))) + { + for (j = 0; j < 3; j++) + { + double diff = fabs (result_f.v[j] - + pixman_fixed_to_float128 (result_i.v[j])); + + if (is_affine && diff > (0.51 / 65536.0)) + { + printf ("%d:%d: bad precision for affine (%.12f)\n", + testnum, i, diff); + abort (); + } + else if (diff > (0.71 / 65536.0)) + { + printf ("%d:%d: bad precision for projective (%.12f)\n", + testnum, i, diff); + abort (); + } + } + } + } +#endif + byteswap_vector_48_16 (&result_i); + crc32 = compute_crc32 (crc32, &result_i, sizeof (result_i)); + } + return crc32; +} + +int +main (int argc, const char *argv[]) +{ + return fuzzer_test_main ("matrix", 20000, + 0xBEBF98C3, + test_matrix, argc, argv); +} diff --git a/src/pixman/test/oob-test.c b/src/pixman/test/oob-test.c new file mode 100644 index 0000000..0d19b50 --- /dev/null +++ b/src/pixman/test/oob-test.c @@ -0,0 +1,101 @@ +#include <stdio.h> +#include <stdlib.h> +#include "utils.h" + +typedef struct +{ + int width; + int height; + int stride; + pixman_format_code_t format; + +} image_info_t; + +typedef struct +{ + pixman_op_t op; + + image_info_t src; + image_info_t dest; + + int src_x; + int src_y; + int dest_x; + int dest_y; + int width; + int height; +} composite_info_t; + +const composite_info_t info[] = +{ + { + PIXMAN_OP_SRC, + { 3, 6, 16, PIXMAN_a8r8g8b8 }, + { 5, 7, 20, PIXMAN_x8r8g8b8 }, + 1, 8, + 1, -1, + 1, 8 + }, + { + PIXMAN_OP_SRC, + { 7, 5, 36, PIXMAN_a8r8g8b8 }, + { 6, 5, 28, PIXMAN_x8r8g8b8 }, + 8, 5, + 5, 3, + 1, 2 + }, + { + PIXMAN_OP_OVER, + { 10, 10, 40, PIXMAN_a2b10g10r10 }, + { 10, 10, 40, PIXMAN_a2b10g10r10 }, + 0, 0, + 0, 0, + 10, 10 + }, + { + PIXMAN_OP_OVER, + { 10, 10, 40, PIXMAN_x2b10g10r10 }, + { 10, 10, 40, PIXMAN_x2b10g10r10 }, + 0, 0, + 0, 0, + 10, 10 + }, +}; + +static pixman_image_t * +make_image (const image_info_t *info) +{ + char *data = malloc (info->stride * info->height); + int i; + + for (i = 0; i < info->height * info->stride; ++i) + data[i] = (i % 255) ^ (((i % 16) << 4) | (i & 0xf0)); + + return pixman_image_create_bits (info->format, info->width, info->height, (uint32_t *)data, info->stride); +} + +static void +test_composite (const composite_info_t *info) +{ + pixman_image_t *src = make_image (&info->src); + pixman_image_t *dest = make_image (&info->dest); + + pixman_image_composite (PIXMAN_OP_SRC, src, NULL, dest, + info->src_x, info->src_y, + 0, 0, + info->dest_x, info->dest_y, + info->width, info->height); +} + + + +int +main (int argc, char **argv) +{ + int i; + + for (i = 0; i < ARRAY_LENGTH (info); ++i) + test_composite (&info[i]); + + return 0; +} diff --git a/src/pixman/test/pdf-op-test.c b/src/pixman/test/pdf-op-test.c new file mode 100644 index 0000000..dcb3a60 --- /dev/null +++ b/src/pixman/test/pdf-op-test.c @@ -0,0 +1,83 @@ +#include <stdlib.h> +#include "utils.h" + +static const pixman_op_t pdf_ops[] = +{ + PIXMAN_OP_MULTIPLY, + PIXMAN_OP_SCREEN, + PIXMAN_OP_OVERLAY, + PIXMAN_OP_DARKEN, + PIXMAN_OP_LIGHTEN, + PIXMAN_OP_COLOR_DODGE, + PIXMAN_OP_COLOR_BURN, + PIXMAN_OP_HARD_LIGHT, + PIXMAN_OP_SOFT_LIGHT, + PIXMAN_OP_DIFFERENCE, + PIXMAN_OP_EXCLUSION, + PIXMAN_OP_HSL_HUE, + PIXMAN_OP_HSL_SATURATION, + PIXMAN_OP_HSL_COLOR, + PIXMAN_OP_HSL_LUMINOSITY +}; + +static const uint32_t pixels[] = +{ + 0x00808080, + 0x80123456, + 0x00000000, + 0xffffffff, + 0x00ffffff, + 0x80808080, + 0x00123456, +}; + +int +main () +{ + int o, s, m, d; + + enable_divbyzero_exceptions(); + + for (o = 0; o < ARRAY_LENGTH (pdf_ops); ++o) + { + pixman_op_t op = pdf_ops[o]; + + for (s = 0; s < ARRAY_LENGTH (pixels); ++s) + { + pixman_image_t *src; + + src = pixman_image_create_bits ( + PIXMAN_a8r8g8b8, 1, 1, (uint32_t *)&(pixels[s]), 4); + + for (m = -1; m < ARRAY_LENGTH (pixels); ++m) + { + pixman_image_t *msk = NULL; + if (m >= 0) + { + msk = pixman_image_create_bits ( + PIXMAN_a8r8g8b8, 1, 1, (uint32_t *)&(pixels[m]), 4); + } + + for (d = 0; d < ARRAY_LENGTH (pixels); ++d) + { + pixman_image_t *dst; + uint32_t dp = pixels[d]; + + dst = pixman_image_create_bits ( + PIXMAN_a8r8g8b8, 1, 1, &dp, 4); + + pixman_image_composite (op, src, msk, dst, + 0, 0, 0, 0, 0, 0, 1, 1); + + pixman_image_unref (dst); + } + if (msk) + pixman_image_unref (msk); + } + + pixman_image_unref (src); + } + } + + return 0; +} diff --git a/src/pixman/test/pixel-test.c b/src/pixman/test/pixel-test.c new file mode 100644 index 0000000..8c525d2 --- /dev/null +++ b/src/pixman/test/pixel-test.c @@ -0,0 +1,267 @@ +/* + * Copyright © 2013 Soeren Sandmann + * Copyright © 2013 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include <stdio.h> +#include <stdlib.h> /* abort() */ +#include <math.h> +#include <time.h> +#include "utils.h" + +typedef struct pixel_combination_t pixel_combination_t; +struct pixel_combination_t +{ + pixman_op_t op; + pixman_format_code_t src_format; + uint32_t src_pixel; + pixman_format_code_t dest_format; + uint32_t dest_pixel; +}; + +static const pixel_combination_t regressions[] = +{ + { PIXMAN_OP_OVER, + PIXMAN_a8r8g8b8, 0x0f00c300, + PIXMAN_x14r6g6b6, 0x003c0, + }, + { PIXMAN_OP_DISJOINT_XOR, + PIXMAN_a4r4g4b4, 0xd0c0, + PIXMAN_a8r8g8b8, 0x5300ea00, + }, + { PIXMAN_OP_OVER, + PIXMAN_a8r8g8b8, 0x20c6bf00, + PIXMAN_r5g6b5, 0xb9ff + }, + { PIXMAN_OP_OVER, + PIXMAN_a8r8g8b8, 0x204ac7ff, + PIXMAN_r5g6b5, 0xc1ff + }, + { PIXMAN_OP_OVER_REVERSE, + PIXMAN_r5g6b5, 0xffc3, + PIXMAN_a8r8g8b8, 0x102d00dd + }, + { PIXMAN_OP_OVER_REVERSE, + PIXMAN_r5g6b5, 0x1f00, + PIXMAN_a8r8g8b8, 0x1bdf0c89 + }, + { PIXMAN_OP_OVER_REVERSE, + PIXMAN_r5g6b5, 0xf9d2, + PIXMAN_a8r8g8b8, 0x1076bcf7 + }, + { PIXMAN_OP_OVER_REVERSE, + PIXMAN_r5g6b5, 0x00c3, + PIXMAN_a8r8g8b8, 0x1bfe9ae5 + }, + { PIXMAN_OP_OVER_REVERSE, + PIXMAN_r5g6b5, 0x09ff, + PIXMAN_a8r8g8b8, 0x0b00c16c + }, + { PIXMAN_OP_DISJOINT_ATOP, + PIXMAN_a2r2g2b2, 0xbc, + PIXMAN_a8r8g8b8, 0x9efff1ff + }, + { PIXMAN_OP_DISJOINT_ATOP, + PIXMAN_a4r4g4b4, 0xae5f, + PIXMAN_a8r8g8b8, 0xf215b675 + }, + { PIXMAN_OP_DISJOINT_ATOP_REVERSE, + PIXMAN_a8r8g8b8, 0xce007980, + PIXMAN_a8r8g8b8, 0x80ffe4ad + }, + { PIXMAN_OP_DISJOINT_XOR, + PIXMAN_a8r8g8b8, 0xb8b07bea, + PIXMAN_a4r4g4b4, 0x939c + }, + { PIXMAN_OP_CONJOINT_ATOP_REVERSE, + PIXMAN_r5g6b5, 0x0063, + PIXMAN_a8r8g8b8, 0x10bb1ed7, + }, +}; + +static void +fill (pixman_image_t *image, uint32_t pixel) +{ + uint8_t *data = (uint8_t *)pixman_image_get_data (image); + int bytes_per_pixel = PIXMAN_FORMAT_BPP (pixman_image_get_format (image)) / 8; + int n_bytes = pixman_image_get_stride (image) * pixman_image_get_height (image); + int i; + + switch (bytes_per_pixel) + { + case 4: + for (i = 0; i < n_bytes / 4; ++i) + ((uint32_t *)data)[i] = pixel; + break; + + case 2: + pixel &= 0xffff; + for (i = 0; i < n_bytes / 2; ++i) + ((uint16_t *)data)[i] = pixel; + break; + + case 1: + pixel &= 0xff; + for (i = 0; i < n_bytes; ++i) + ((uint8_t *)data)[i] = pixel; + break; + + default: + assert (0); + break; + } +} + +static uint32_t +access (pixman_image_t *image, int x, int y) +{ + int bytes_per_pixel; + int stride; + uint32_t result; + uint8_t *location; + + if (x < 0 || x >= image->bits.width || y < 0 || y >= image->bits.height) + return 0; + + bytes_per_pixel = PIXMAN_FORMAT_BPP (image->bits.format) / 8; + stride = image->bits.rowstride * 4; + + location = (uint8_t *)image->bits.bits + y * stride + x * bytes_per_pixel; + + if (bytes_per_pixel == 4) + result = *(uint32_t *)location; + else if (bytes_per_pixel == 2) + result = *(uint16_t *)location; + else if (bytes_per_pixel == 1) + result = *(uint8_t *)location; + else + assert (0); + + return result; +} + +static pixman_bool_t +verify (int test_no, const pixel_combination_t *combination, int size) +{ + pixman_image_t *src, *dest; + pixel_checker_t src_checker, dest_checker; + color_t source_color, dest_color, reference_color; + pixman_bool_t result = TRUE; + int i, j; + + /* Compute reference color */ + pixel_checker_init (&src_checker, combination->src_format); + pixel_checker_init (&dest_checker, combination->dest_format); + pixel_checker_convert_pixel_to_color ( + &src_checker, combination->src_pixel, &source_color); + pixel_checker_convert_pixel_to_color ( + &dest_checker, combination->dest_pixel, &dest_color); + do_composite (combination->op, + &source_color, NULL, &dest_color, + &reference_color, FALSE); + + src = pixman_image_create_bits ( + combination->src_format, size, size, NULL, -1); + dest = pixman_image_create_bits ( + combination->dest_format, size, size, NULL, -1); + + fill (src, combination->src_pixel); + fill (dest, combination->dest_pixel); + + pixman_image_composite32 ( + combination->op, src, NULL, dest, 0, 0, 0, 0, 0, 0, size, size); + + for (j = 0; j < size; ++j) + { + for (i = 0; i < size; ++i) + { + uint32_t computed = access (dest, i, j); + int32_t a, r, g, b; + + if (!pixel_checker_check (&dest_checker, computed, &reference_color)) + { + printf ("----------- Test %d failed ----------\n", test_no); + + printf (" operator: %s\n", operator_name (combination->op)); + printf (" src format: %s\n", format_name (combination->src_format)); + printf (" dest format: %s\n", format_name (combination->dest_format)); + printf (" - source ARGB: %f %f %f %f (pixel: %8x)\n", + source_color.a, source_color.r, source_color.g, source_color.b, + combination->src_pixel); + pixel_checker_split_pixel (&src_checker, combination->src_pixel, + &a, &r, &g, &b); + printf (" %8d %8d %8d %8d\n", a, r, g, b); + + printf (" - dest ARGB: %f %f %f %f (pixel: %8x)\n", + dest_color.a, dest_color.r, dest_color.g, dest_color.b, + combination->dest_pixel); + pixel_checker_split_pixel (&dest_checker, combination->dest_pixel, + &a, &r, &g, &b); + printf (" %8d %8d %8d %8d\n", a, r, g, b); + + pixel_checker_split_pixel (&dest_checker, computed, &a, &r, &g, &b); + printf (" - expected ARGB: %f %f %f %f\n", + reference_color.a, reference_color.r, reference_color.g, reference_color.b); + + pixel_checker_get_min (&dest_checker, &reference_color, &a, &r, &g, &b); + printf (" min acceptable: %8d %8d %8d %8d\n", a, r, g, b); + + pixel_checker_split_pixel (&dest_checker, computed, &a, &r, &g, &b); + printf (" got: %8d %8d %8d %8d (pixel: %8x)\n", a, r, g, b, computed); + + pixel_checker_get_max (&dest_checker, &reference_color, &a, &r, &g, &b); + printf (" max acceptable: %8d %8d %8d %8d\n", a, r, g, b); + + result = FALSE; + goto done; + } + } + } + +done: + pixman_image_unref (src); + pixman_image_unref (dest); + + return result; +} + +int +main (int argc, char **argv) +{ + int result = 0; + int i, j; + + for (i = 0; i < ARRAY_LENGTH (regressions); ++i) + { + const pixel_combination_t *combination = &(regressions[i]); + + for (j = 1; j < 34; ++j) + { + if (!verify (i, combination, j)) + { + result = 1; + break; + } + } + } + + return result; +} diff --git a/src/pixman/test/prng-test.c b/src/pixman/test/prng-test.c new file mode 100644 index 0000000..c1d9320 --- /dev/null +++ b/src/pixman/test/prng-test.c @@ -0,0 +1,175 @@ +/* + * Copyright © 2012 Siarhei Siamashka <siarhei.siamashka@gmail.com> + * + * Based on the public domain implementation of small noncryptographic PRNG + * authored by Bob Jenkins: http://burtleburtle.net/bob/rand/smallprng.html + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include <assert.h> +#include <stdlib.h> +#include "utils-prng.h" +#include "utils.h" + +/* The original code from http://www.burtleburtle.net/bob/rand/smallprng.html */ + +typedef uint32_t u4; +typedef struct ranctx { u4 a; u4 b; u4 c; u4 d; } ranctx; + +#define rot(x,k) (((x)<<(k))|((x)>>(32-(k)))) +u4 ranval( ranctx *x ) { + u4 e = x->a - rot(x->b, 27); + x->a = x->b ^ rot(x->c, 17); + x->b = x->c + x->d; + x->c = x->d + e; + x->d = e + x->a; + return x->d; +} + +void raninit( ranctx *x, u4 seed ) { + u4 i; + x->a = 0xf1ea5eed, x->b = x->c = x->d = seed; + for (i=0; i<20; ++i) { + (void)ranval(x); + } +} + +/*****************************************************************************/ + +#define BUFSIZE (8 * 1024 * 1024) +#define N 50 + +void bench (void) +{ + double t1, t2; + int i; + prng_t prng; + uint8_t *buf = aligned_malloc (16, BUFSIZE + 1); + + prng_srand_r (&prng, 1234); + t1 = gettime(); + for (i = 0; i < N; i++) + prng_randmemset_r (&prng, buf, BUFSIZE, 0); + t2 = gettime(); + printf ("aligned randmemset : %.2f MB/s\n", + (double)BUFSIZE * N / 1000000. / (t2 - t1)); + + t1 = gettime(); + for (i = 0; i < N; i++) + prng_randmemset_r (&prng, buf + 1, BUFSIZE, 0); + t2 = gettime(); + printf ("unaligned randmemset : %.2f MB/s\n", + (double)BUFSIZE * N / 1000000. / (t2 - t1)); + + t1 = gettime(); + for (i = 0; i < N; i++) + { + prng_randmemset_r (&prng, buf, BUFSIZE, RANDMEMSET_MORE_00_AND_FF); + } + t2 = gettime (); + printf ("aligned randmemset (more 00 and FF) : %.2f MB/s\n", + (double)BUFSIZE * N / 1000000. / (t2 - t1)); + + t1 = gettime(); + for (i = 0; i < N; i++) + { + prng_randmemset_r (&prng, buf + 1, BUFSIZE, RANDMEMSET_MORE_00_AND_FF); + } + t2 = gettime (); + printf ("unaligned randmemset (more 00 and FF) : %.2f MB/s\n", + (double)BUFSIZE * N / 1000000. / (t2 - t1)); + + free (buf); +} + +#define SMALLBUFSIZE 100 + +int main (int argc, char *argv[]) +{ + const uint32_t ref_crc[RANDMEMSET_MORE_00_AND_FF + 1] = + { + 0xBA06763D, 0x103FC550, 0x8B59ABA5, 0xD82A0F39, + 0xD2321099, 0xFD8C5420, 0xD3B7C42A, 0xFC098093, + 0x85E01DE0, 0x6680F8F7, 0x4D32DD3C, 0xAE52382B, + 0x149E6CB5, 0x8B336987, 0x15DCB2B3, 0x8A71B781 + }; + uint32_t crc1, crc2; + uint32_t ref, seed, seed0, seed1, seed2, seed3; + prng_rand_128_data_t buf; + uint8_t *bytebuf = aligned_malloc(16, SMALLBUFSIZE + 1); + ranctx x; + prng_t prng; + prng_randmemset_flags_t flags; + + if (argc > 1 && strcmp(argv[1], "-bench") == 0) + { + bench (); + return 0; + } + + /* basic test */ + raninit (&x, 0); + prng_srand_r (&prng, 0); + assert (ranval (&x) == prng_rand_r (&prng)); + + /* test for simd code */ + seed = 0; + prng_srand_r (&prng, seed); + seed0 = (seed = seed * 1103515245 + 12345); + seed1 = (seed = seed * 1103515245 + 12345); + seed2 = (seed = seed * 1103515245 + 12345); + seed3 = (seed = seed * 1103515245 + 12345); + prng_rand_128_r (&prng, &buf); + + raninit (&x, seed0); + ref = ranval (&x); + assert (ref == buf.w[0]); + + raninit (&x, seed1); + ref = ranval (&x); + assert (ref == buf.w[1]); + + raninit (&x, seed2); + ref = ranval (&x); + assert (ref == buf.w[2]); + + raninit (&x, seed3); + ref = ranval (&x); + assert (ref == buf.w[3]); + + /* test for randmemset */ + for (flags = 0; flags <= RANDMEMSET_MORE_00_AND_FF; flags++) + { + prng_srand_r (&prng, 1234); + prng_randmemset_r (&prng, bytebuf, 16, flags); + prng_randmemset_r (&prng, bytebuf + 16, SMALLBUFSIZE - 17, flags); + crc1 = compute_crc32 (0, bytebuf, SMALLBUFSIZE - 1); + prng_srand_r (&prng, 1234); + prng_randmemset_r (&prng, bytebuf + 1, SMALLBUFSIZE - 1, flags); + crc2 = compute_crc32 (0, bytebuf + 1, SMALLBUFSIZE - 1); + assert (ref_crc[flags] == crc1); + assert (ref_crc[flags] == crc2); + } + + free (bytebuf); + + return 0; +} diff --git a/src/pixman/test/radial-perf-test.c b/src/pixman/test/radial-perf-test.c new file mode 100644 index 0000000..71092e2 --- /dev/null +++ b/src/pixman/test/radial-perf-test.c @@ -0,0 +1,58 @@ +#include "utils.h" +#include <stdio.h> + +int +main () +{ + static const pixman_point_fixed_t inner = { 0x0000, 0x0000 }; + static const pixman_point_fixed_t outer = { 0x0000, 0x0000 }; + static const pixman_fixed_t r_inner = 0; + static const pixman_fixed_t r_outer = 64 << 16; + static const pixman_gradient_stop_t stops[] = { + { 0x00000, { 0x6666, 0x6666, 0x6666, 0xffff } }, + { 0x10000, { 0x0000, 0x0000, 0x0000, 0xffff } } + }; + static const pixman_transform_t transform = { + { { 0x0, 0x26ee, 0x0}, + { 0xffffeeef, 0x0, 0x0}, + { 0x0, 0x0, 0x10000} + } + }; + static const pixman_color_t z = { 0x0000, 0x0000, 0x0000, 0x0000 }; + pixman_image_t *dest, *radial, *zero; + int i; + double before, after; + + dest = pixman_image_create_bits ( + PIXMAN_x8r8g8b8, 640, 429, NULL, -1); + zero = pixman_image_create_solid_fill (&z); + radial = pixman_image_create_radial_gradient ( + &inner, &outer, r_inner, r_outer, stops, ARRAY_LENGTH (stops)); + pixman_image_set_transform (radial, &transform); + pixman_image_set_repeat (radial, PIXMAN_REPEAT_PAD); + +#define N_COMPOSITE 500 + + before = gettime(); + for (i = 0; i < N_COMPOSITE; ++i) + { + before -= gettime(); + + pixman_image_composite ( + PIXMAN_OP_SRC, zero, NULL, dest, + 0, 0, 0, 0, 0, 0, 640, 429); + + before += gettime(); + + pixman_image_composite32 ( + PIXMAN_OP_OVER, radial, NULL, dest, + - 150, -158, 0, 0, 0, 0, 640, 361); + } + + after = gettime(); + + write_png (dest, "radial.png"); + + printf ("Average time to composite: %f\n", (after - before) / N_COMPOSITE); + return 0; +} diff --git a/src/pixman/test/region-contains-test.c b/src/pixman/test/region-contains-test.c new file mode 100644 index 0000000..096e651 --- /dev/null +++ b/src/pixman/test/region-contains-test.c @@ -0,0 +1,169 @@ +#include <stdlib.h> +#include <stdio.h> +#include "utils.h" + +static void +make_random_region (pixman_region32_t *region) +{ + int n_boxes; + + pixman_region32_init (region); + + n_boxes = prng_rand_n (64); + while (n_boxes--) + { + int32_t x, y; + uint32_t w, h; + + x = (int32_t)prng_rand() >> 2; + y = (int32_t)prng_rand() >> 2; + w = prng_rand() >> 2; + h = prng_rand() >> 2; + + pixman_region32_union_rect (region, region, x, y, w, h); + } +} + +static void +print_box (pixman_box32_t *box) +{ + printf (" %d %d %d %d\n", box->x1, box->y1, box->x2, box->y2); +} + +static int32_t +random_coord (pixman_region32_t *region, pixman_bool_t x) +{ + pixman_box32_t *b, *bb; + int n_boxes; + int begin, end; + + if (prng_rand_n (14)) + { + bb = pixman_region32_rectangles (region, &n_boxes); + if (n_boxes == 0) + goto use_extent; + b = bb + prng_rand_n (n_boxes); + } + else + { + use_extent: + b = pixman_region32_extents (region); + n_boxes = 1; + } + + if (x) + { + begin = b->x1; + end = b->x2; + } + else + { + begin = b->y1; + end = b->y2; + } + + switch (prng_rand_n (5)) + { + case 0: + return begin - prng_rand(); + case 1: + return end + prng_rand (); + case 2: + return end; + case 3: + return begin; + default: + return (end - begin) / 2 + begin; + } + return 0; +} + +static uint32_t +compute_crc32_u32 (uint32_t crc32, uint32_t v) +{ + if (!is_little_endian()) + { + v = ((v & 0xff000000) >> 24) | + ((v & 0x00ff0000) >> 8) | + ((v & 0x0000ff00) << 8) | + ((v & 0x000000ff) << 24); + } + + return compute_crc32 (crc32, &v, sizeof (int32_t)); +} + +static uint32_t +crc32_box32 (uint32_t crc32, pixman_box32_t *box) +{ + crc32 = compute_crc32_u32 (crc32, box->x1); + crc32 = compute_crc32_u32 (crc32, box->y1); + crc32 = compute_crc32_u32 (crc32, box->x2); + crc32 = compute_crc32_u32 (crc32, box->y2); + + return crc32; +} + +static uint32_t +test_region_contains_rectangle (int i, int verbose) +{ + pixman_box32_t box; + pixman_box32_t rbox = { 0, 0, 0, 0 }; + pixman_region32_t region; + uint32_t r, r1, r2, r3, r4, crc32; + + prng_srand (i); + + make_random_region (®ion); + + box.x1 = random_coord (®ion, TRUE); + box.x2 = box.x1 + prng_rand (); + box.y1 = random_coord (®ion, FALSE); + box.y2 = box.y1 + prng_rand (); + + if (verbose) + { + int n_rects; + pixman_box32_t *boxes; + + boxes = pixman_region32_rectangles (®ion, &n_rects); + + printf ("region:\n"); + while (n_rects--) + print_box (boxes++); + printf ("box:\n"); + print_box (&box); + } + + crc32 = 0; + + r1 = pixman_region32_contains_point (®ion, box.x1, box.y1, &rbox); + crc32 = crc32_box32 (crc32, &rbox); + r2 = pixman_region32_contains_point (®ion, box.x1, box.y2, &rbox); + crc32 = crc32_box32 (crc32, &rbox); + r3 = pixman_region32_contains_point (®ion, box.x2, box.y1, &rbox); + crc32 = crc32_box32 (crc32, &rbox); + r4 = pixman_region32_contains_point (®ion, box.x2, box.y2, &rbox); + crc32 = crc32_box32 (crc32, &rbox); + + r = pixman_region32_contains_rectangle (®ion, &box); + r = (i << 8) | (r << 4) | (r1 << 3) | (r2 << 2) | (r3 << 1) | (r4 << 0); + + crc32 = compute_crc32_u32 (crc32, r); + + if (verbose) + printf ("results: %d %d %d %d %d\n", (r & 0xf0) >> 4, r1, r2, r3, r4); + + pixman_region32_fini (®ion); + + return crc32; +} + +int +main (int argc, const char *argv[]) +{ + return fuzzer_test_main ("region_contains", + 1000000, + 0x548E0F3F, + test_region_contains_rectangle, + argc, argv); +} diff --git a/src/pixman/test/region-test.c b/src/pixman/test/region-test.c new file mode 100644 index 0000000..bfc219b --- /dev/null +++ b/src/pixman/test/region-test.c @@ -0,0 +1,125 @@ +#include <assert.h> +#include <stdlib.h> +#include <stdio.h> +#include "utils.h" + +int +main () +{ + pixman_region32_t r1; + pixman_region32_t r2; + pixman_region32_t r3; + pixman_box32_t boxes[] = { + { 10, 10, 20, 20 }, + { 30, 30, 30, 40 }, + { 50, 45, 60, 44 }, + }; + pixman_box32_t boxes2[] = { + { 2, 6, 7, 6 }, + { 4, 1, 6, 7 }, + }; + pixman_box32_t boxes3[] = { + { 2, 6, 7, 6 }, + { 4, 1, 6, 1 }, + }; + int i, j; + pixman_box32_t *b; + pixman_image_t *image, *fill; + pixman_color_t white = { + 0xffff, + 0xffff, + 0xffff, + 0xffff + }; + + prng_srand (0); + + /* This used to go into an infinite loop before pixman-region.c + * was fixed to not use explict "short" variables + */ + pixman_region32_init_rect (&r1, 0, 0, 20, 64000); + pixman_region32_init_rect (&r2, 0, 0, 20, 64000); + pixman_region32_init_rect (&r3, 0, 0, 20, 64000); + + pixman_region32_subtract (&r1, &r2, &r3); + + + /* This would produce a region containing an empty + * rectangle in it. Such regions are considered malformed, + * but using an empty rectangle for initialization should + * work. + */ + pixman_region32_init_rects (&r1, boxes, 3); + + b = pixman_region32_rectangles (&r1, &i); + + assert (i == 1); + + while (i--) + { + assert (b[i].x1 < b[i].x2); + assert (b[i].y1 < b[i].y2); + } + + /* This would produce a rectangle containing the bounding box + * of the two rectangles. The correct result is to eliminate + * the broken rectangle. + */ + pixman_region32_init_rects (&r1, boxes2, 2); + + b = pixman_region32_rectangles (&r1, &i); + + assert (i == 1); + + assert (b[0].x1 == 4); + assert (b[0].y1 == 1); + assert (b[0].x2 == 6); + assert (b[0].y2 == 7); + + /* This should produce an empty region */ + pixman_region32_init_rects (&r1, boxes3, 2); + + b = pixman_region32_rectangles (&r1, &i); + + assert (i == 0); + + fill = pixman_image_create_solid_fill (&white); + for (i = 0; i < 100; i++) + { + int image_size = 128; + + pixman_region32_init (&r1); + + /* Add some random rectangles */ + for (j = 0; j < 64; j++) + pixman_region32_union_rect (&r1, &r1, + prng_rand_n (image_size), + prng_rand_n (image_size), + prng_rand_n (25), + prng_rand_n (25)); + + /* Clip to image size */ + pixman_region32_init_rect (&r2, 0, 0, image_size, image_size); + pixman_region32_intersect (&r1, &r1, &r2); + pixman_region32_fini (&r2); + + /* render region to a1 mask */ + image = pixman_image_create_bits (PIXMAN_a1, image_size, image_size, NULL, 0); + pixman_image_set_clip_region32 (image, &r1); + pixman_image_composite32 (PIXMAN_OP_SRC, + fill, NULL, image, + 0, 0, 0, 0, 0, 0, + image_size, image_size); + pixman_region32_init_from_image (&r2, image); + + pixman_image_unref (image); + + assert (pixman_region32_equal (&r1, &r2)); + pixman_region32_fini (&r1); + pixman_region32_fini (&r2); + + } + pixman_image_unref (fill); + + return 0; +} diff --git a/src/pixman/test/region-translate-test.c b/src/pixman/test/region-translate-test.c new file mode 100644 index 0000000..5a03027 --- /dev/null +++ b/src/pixman/test/region-translate-test.c @@ -0,0 +1,30 @@ +#include <assert.h> +#include "utils.h" + +/* Pixman had a bug where 32bit regions where clipped to 16bit sizes when + * pixman_region32_translate() was called. This test exercises that bug. + */ + +#define LARGE 32000 + +int +main (int argc, char **argv) +{ + pixman_box32_t rect = { -LARGE, -LARGE, LARGE, LARGE }; + pixman_region32_t r1, r2; + + pixman_region32_init_rects (&r1, &rect, 1); + pixman_region32_init_rect (&r2, rect.x1, rect.y1, rect.x2 - rect.x1, rect.y2 - rect.y1); + + assert (pixman_region32_equal (&r1, &r2)); + + pixman_region32_translate (&r1, -LARGE, LARGE); + pixman_region32_translate (&r1, LARGE, -LARGE); + + assert (pixman_region32_equal (&r1, &r2)); + + pixman_region32_fini (&r1); + pixman_region32_fini (&r2); + + return 0; +} diff --git a/src/pixman/test/rotate-test.c b/src/pixman/test/rotate-test.c new file mode 100644 index 0000000..18ca60d --- /dev/null +++ b/src/pixman/test/rotate-test.c @@ -0,0 +1,120 @@ +#include <stdlib.h> +#include "utils.h" + +#define WIDTH 32 +#define HEIGHT 32 + +static const pixman_format_code_t formats[] = +{ + PIXMAN_a8r8g8b8, + PIXMAN_a8b8g8r8, + PIXMAN_x8r8g8b8, + PIXMAN_x8b8g8r8, + PIXMAN_r5g6b5, + PIXMAN_b5g6r5, + PIXMAN_a8, + PIXMAN_a1, +}; + +static const pixman_op_t ops[] = +{ + PIXMAN_OP_OVER, + PIXMAN_OP_SRC, + PIXMAN_OP_ADD, +}; + +#define TRANSFORM(v00, v01, v10, v11) \ + { { { v00, v01, WIDTH * pixman_fixed_1 / 2 }, \ + { v10, v11, HEIGHT * pixman_fixed_1 / 2 }, \ + { 0, 0, pixman_fixed_1 } } } + +#define F1 pixman_fixed_1 + +static const pixman_transform_t transforms[] = +{ + TRANSFORM (0, -1, 1, 0), /* wrong 90 degree rotation */ + TRANSFORM (0, 1, -1, 0), /* wrong 270 degree rotation */ + TRANSFORM (1, 0, 0, 1), /* wrong identity */ + TRANSFORM (-1, 0, 0, -1), /* wrong 180 degree rotation */ + TRANSFORM (0, -F1, F1, 0), /* correct 90 degree rotation */ + TRANSFORM (0, F1, -F1, 0), /* correct 270 degree rotation */ + TRANSFORM (F1, 0, 0, F1), /* correct identity */ + TRANSFORM (-F1, 0, 0, -F1), /* correct 180 degree rotation */ +}; + +#define RANDOM_FORMAT() \ + (formats[prng_rand_n (ARRAY_LENGTH (formats))]) + +#define RANDOM_OP() \ + (ops[prng_rand_n (ARRAY_LENGTH (ops))]) + +#define RANDOM_TRANSFORM() \ + (&(transforms[prng_rand_n (ARRAY_LENGTH (transforms))])) + +static void +on_destroy (pixman_image_t *image, void *data) +{ + free (data); +} + +static pixman_image_t * +make_image (void) +{ + pixman_format_code_t format = RANDOM_FORMAT(); + uint32_t *bytes, *orig; + pixman_image_t *image; + int stride; + + orig = bytes = malloc (WIDTH * HEIGHT * 4); + prng_randmemset (bytes, WIDTH * HEIGHT * 4, 0); + + stride = WIDTH * 4; + if (prng_rand_n (2) == 0) + { + bytes += (stride / 4) * (HEIGHT - 1); + stride = - stride; + } + + image = pixman_image_create_bits ( + format, WIDTH, HEIGHT, bytes, stride); + + pixman_image_set_transform (image, RANDOM_TRANSFORM()); + pixman_image_set_destroy_function (image, on_destroy, orig); + pixman_image_set_repeat (image, PIXMAN_REPEAT_NORMAL); + + image_endian_swap (image); + + return image; +} + +static uint32_t +test_transform (int testnum, int verbose) +{ + pixman_image_t *src, *dest; + uint32_t crc; + + prng_srand (testnum); + + src = make_image (); + dest = make_image (); + + pixman_image_composite (RANDOM_OP(), + src, NULL, dest, + 0, 0, 0, 0, WIDTH / 2, HEIGHT / 2, + WIDTH, HEIGHT); + + crc = compute_crc32_for_image (0, dest); + + pixman_image_unref (src); + pixman_image_unref (dest); + + return crc; +} + +int +main (int argc, const char *argv[]) +{ + return fuzzer_test_main ("rotate", 15000, + 0x81E9EC2F, + test_transform, argc, argv); +} diff --git a/src/pixman/test/scaling-bench.c b/src/pixman/test/scaling-bench.c new file mode 100644 index 0000000..365e798 --- /dev/null +++ b/src/pixman/test/scaling-bench.c @@ -0,0 +1,80 @@ +#include <stdlib.h> +#include "utils.h" + +#define SOURCE_WIDTH 320 +#define SOURCE_HEIGHT 240 +#define TEST_REPEATS 3 + +static pixman_image_t * +make_source (void) +{ + size_t n_bytes = (SOURCE_WIDTH + 2) * (SOURCE_HEIGHT + 2) * 4; + uint32_t *data = malloc (n_bytes); + pixman_image_t *source; + + prng_randmemset (data, n_bytes, 0); + + source = pixman_image_create_bits ( + PIXMAN_a8r8g8b8, SOURCE_WIDTH + 2, SOURCE_HEIGHT + 2, + data, + (SOURCE_WIDTH + 2) * 4); + + pixman_image_set_filter (source, PIXMAN_FILTER_BILINEAR, NULL, 0); + + return source; +} + +int +main () +{ + double scale; + pixman_image_t *src; + + prng_srand (23874); + + src = make_source (); + printf ("# %-6s %-22s %-14s %-12s\n", + "ratio", + "resolutions", + "time / ms", + "time per pixel / ns"); + for (scale = 0.1; scale < 10.005; scale += 0.01) + { + int i; + int dest_width = SOURCE_WIDTH * scale + 0.5; + int dest_height = SOURCE_HEIGHT * scale + 0.5; + int dest_byte_stride = (dest_width * 4 + 15) & ~15; + pixman_fixed_t s = (1 / scale) * 65536.0 + 0.5; + pixman_transform_t transform; + pixman_image_t *dest; + double t1, t2, t = -1; + uint32_t *dest_buf = aligned_malloc (16, dest_byte_stride * dest_height); + memset (dest_buf, 0, dest_byte_stride * dest_height); + + pixman_transform_init_scale (&transform, s, s); + pixman_image_set_transform (src, &transform); + + dest = pixman_image_create_bits ( + PIXMAN_a8r8g8b8, dest_width, dest_height, dest_buf, dest_byte_stride); + + for (i = 0; i < TEST_REPEATS; i++) + { + t1 = gettime(); + pixman_image_composite ( + PIXMAN_OP_OVER, src, NULL, dest, + scale, scale, 0, 0, 0, 0, dest_width, dest_height); + t2 = gettime(); + if (t < 0 || t2 - t1 < t) + t = t2 - t1; + } + + printf ("%6.2f : %4dx%-4d => %4dx%-4d : %12.4f : %12.4f\n", + scale, SOURCE_WIDTH, SOURCE_HEIGHT, dest_width, dest_height, + t * 1000, (t / (dest_width * dest_height)) * 1000000000); + + pixman_image_unref (dest); + free (dest_buf); + } + + return 0; +} diff --git a/src/pixman/test/scaling-crash-test.c b/src/pixman/test/scaling-crash-test.c new file mode 100644 index 0000000..0dac892 --- /dev/null +++ b/src/pixman/test/scaling-crash-test.c @@ -0,0 +1,219 @@ +#include <assert.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "utils.h" + +/* + * We have a source image filled with solid color, set NORMAL or PAD repeat, + * and some transform which results in nearest neighbour scaling. + * + * The expected result is either that the destination image filled with this solid + * color or, if the transformation is such that we can't composite anything at + * all, that nothing has changed in the destination. + * + * The surrounding memory of the source image is a different solid color so that + * we are sure to get failures if we access it. + */ +static int +run_test (int32_t dst_width, + int32_t dst_height, + int32_t src_width, + int32_t src_height, + int32_t src_x, + int32_t src_y, + int32_t scale_x, + int32_t scale_y, + pixman_filter_t filter, + pixman_repeat_t repeat) +{ + pixman_image_t * src_img; + pixman_image_t * dst_img; + pixman_transform_t transform; + uint32_t * srcbuf; + uint32_t * dstbuf; + pixman_color_t color_cc = { 0xcccc, 0xcccc, 0xcccc, 0xcccc }; + pixman_image_t * solid; + int result; + int i; + + static const pixman_fixed_t kernel[] = + { +#define D(f) (pixman_double_to_fixed (f) + 0x0001) + + pixman_int_to_fixed (5), + pixman_int_to_fixed (5), + D(1/25.0), D(1/25.0), D(1/25.0), D(1/25.0), D(1/25.0), + D(1/25.0), D(1/25.0), D(1/25.0), D(1/25.0), D(1/25.0), + D(1/25.0), D(1/25.0), D(1/25.0), D(1/25.0), D(1/25.0), + D(1/25.0), D(1/25.0), D(1/25.0), D(1/25.0), D(1/25.0), + D(1/25.0), D(1/25.0), D(1/25.0), D(1/25.0), D(1/25.0) + }; + + result = 0; + + srcbuf = (uint32_t *)malloc ((src_width + 10) * (src_height + 10) * 4); + dstbuf = (uint32_t *)malloc (dst_width * dst_height * 4); + + memset (srcbuf, 0x88, src_width * src_height * 4); + memset (dstbuf, 0x33, dst_width * dst_height * 4); + + src_img = pixman_image_create_bits ( + PIXMAN_a8r8g8b8, src_width, src_height, + srcbuf + (src_width + 10) * 5 + 5, (src_width + 10) * 4); + + solid = pixman_image_create_solid_fill (&color_cc); + pixman_image_composite32 (PIXMAN_OP_SRC, solid, NULL, src_img, + 0, 0, 0, 0, 0, 0, src_width, src_height); + pixman_image_unref (solid); + + dst_img = pixman_image_create_bits ( + PIXMAN_a8r8g8b8, dst_width, dst_height, dstbuf, dst_width * 4); + + pixman_transform_init_scale (&transform, scale_x, scale_y); + pixman_image_set_transform (src_img, &transform); + pixman_image_set_repeat (src_img, repeat); + if (filter == PIXMAN_FILTER_CONVOLUTION) + pixman_image_set_filter (src_img, filter, kernel, 27); + else + pixman_image_set_filter (src_img, filter, NULL, 0); + + pixman_image_composite (PIXMAN_OP_SRC, src_img, NULL, dst_img, + src_x, src_y, 0, 0, 0, 0, dst_width, dst_height); + + pixman_image_unref (src_img); + pixman_image_unref (dst_img); + + for (i = 0; i < dst_width * dst_height; i++) + { + if (dstbuf[i] != 0xCCCCCCCC && dstbuf[i] != 0x33333333) + { + result = 1; + break; + } + } + + free (srcbuf); + free (dstbuf); + return result; +} + +typedef struct filter_info_t filter_info_t; +struct filter_info_t +{ + pixman_filter_t value; + char name[28]; +}; + +static const filter_info_t filters[] = +{ + { PIXMAN_FILTER_NEAREST, "NEAREST" }, + { PIXMAN_FILTER_BILINEAR, "BILINEAR" }, + { PIXMAN_FILTER_CONVOLUTION, "CONVOLUTION" }, +}; + +typedef struct repeat_info_t repeat_info_t; +struct repeat_info_t +{ + pixman_repeat_t value; + char name[28]; +}; + + +static const repeat_info_t repeats[] = +{ + { PIXMAN_REPEAT_PAD, "PAD" }, + { PIXMAN_REPEAT_REFLECT, "REFLECT" }, + { PIXMAN_REPEAT_NORMAL, "NORMAL" } +}; + +static int +do_test (int32_t dst_size, + int32_t src_size, + int32_t src_offs, + int32_t scale_factor) +{ + int i, j; + + for (i = 0; i < ARRAY_LENGTH (filters); ++i) + { + for (j = 0; j < ARRAY_LENGTH (repeats); ++j) + { + /* horizontal test */ + if (run_test (dst_size, 1, + src_size, 1, + src_offs, 0, + scale_factor, 65536, + filters[i].value, + repeats[j].value) != 0) + { + printf ("Vertical test failed with %s filter and repeat mode %s\n", + filters[i].name, repeats[j].name); + + return 1; + } + + /* vertical test */ + if (run_test (1, dst_size, + 1, src_size, + 0, src_offs, + 65536, scale_factor, + filters[i].value, + repeats[j].value) != 0) + { + printf ("Vertical test failed with %s filter and repeat mode %s\n", + filters[i].name, repeats[j].name); + + return 1; + } + } + } + + return 0; +} + +int +main (int argc, char *argv[]) +{ + int i; + + pixman_disable_out_of_bounds_workaround (); + + /* can potentially crash */ + assert (do_test ( + 48000, 32767, 1, 65536 * 128) == 0); + + /* can potentially get into a deadloop */ + assert (do_test ( + 16384, 65536, 32, 32768) == 0); + + /* can potentially access memory outside source image buffer */ + assert (do_test ( + 10, 10, 0, 1) == 0); + assert (do_test ( + 10, 10, 0, 0) == 0); + + for (i = 0; i < 100; ++i) + { + pixman_fixed_t one_seventh = + (((pixman_fixed_48_16_t)pixman_fixed_1) << 16) / (7 << 16); + + assert (do_test ( + 1, 7, 3, one_seventh + i - 50) == 0); + } + + for (i = 0; i < 100; ++i) + { + pixman_fixed_t scale = + (((pixman_fixed_48_16_t)pixman_fixed_1) << 16) / (32767 << 16); + + assert (do_test ( + 1, 32767, 16383, scale + i - 50) == 0); + } + + /* can potentially provide invalid results (out of range matrix stuff) */ + assert (do_test ( + 48000, 32767, 16384, 65536 * 128) == 0); + + return 0; +} diff --git a/src/pixman/test/scaling-helpers-test.c b/src/pixman/test/scaling-helpers-test.c new file mode 100644 index 0000000..cd5ace0 --- /dev/null +++ b/src/pixman/test/scaling-helpers-test.c @@ -0,0 +1,92 @@ +#include <stdlib.h> +#include <stdio.h> +#include <assert.h> +#include "utils.h" +#include "pixman-inlines.h" + +/* A trivial reference implementation for + * 'bilinear_pad_repeat_get_scanline_bounds' + */ +static void +bilinear_pad_repeat_get_scanline_bounds_ref (int32_t source_image_width, + pixman_fixed_t vx_, + pixman_fixed_t unit_x, + int32_t * left_pad, + int32_t * left_tz, + int32_t * width, + int32_t * right_tz, + int32_t * right_pad) +{ + int w = *width; + int64_t vx = vx_; + *left_pad = 0; + *left_tz = 0; + *width = 0; + *right_tz = 0; + *right_pad = 0; + while (--w >= 0) + { + if (vx < 0) + { + if (vx + pixman_fixed_1 < 0) + *left_pad += 1; + else + *left_tz += 1; + } + else if (vx + pixman_fixed_1 >= pixman_int_to_fixed (source_image_width)) + { + if (vx >= pixman_int_to_fixed (source_image_width)) + *right_pad += 1; + else + *right_tz += 1; + } + else + { + *width += 1; + } + vx += unit_x; + } +} + +int +main (void) +{ + int i; + prng_srand (0); + for (i = 0; i < 10000; i++) + { + int32_t left_pad1, left_tz1, width1, right_tz1, right_pad1; + int32_t left_pad2, left_tz2, width2, right_tz2, right_pad2; + pixman_fixed_t vx = prng_rand_n(10000 << 16) - (3000 << 16); + int32_t width = prng_rand_n(10000); + int32_t source_image_width = prng_rand_n(10000) + 1; + pixman_fixed_t unit_x = prng_rand_n(10 << 16) + 1; + width1 = width2 = width; + + bilinear_pad_repeat_get_scanline_bounds_ref (source_image_width, + vx, + unit_x, + &left_pad1, + &left_tz1, + &width1, + &right_tz1, + &right_pad1); + + bilinear_pad_repeat_get_scanline_bounds (source_image_width, + vx, + unit_x, + &left_pad2, + &left_tz2, + &width2, + &right_tz2, + &right_pad2); + + assert (left_pad1 == left_pad2); + assert (left_tz1 == left_tz2); + assert (width1 == width2); + assert (right_tz1 == right_tz2); + assert (right_pad1 == right_pad2); + } + + return 0; +} diff --git a/src/pixman/test/scaling-test.c b/src/pixman/test/scaling-test.c new file mode 100644 index 0000000..e2f7fa9 --- /dev/null +++ b/src/pixman/test/scaling-test.c @@ -0,0 +1,402 @@ +/* + * Test program, which can detect some problems with nearest neighbour + * and bilinear scaling in pixman. Testing is done by running lots + * of random SRC and OVER compositing operations a8r8g8b8, x8a8r8g8b8 + * and r5g6b5 color formats. + * + * Script 'fuzzer-find-diff.pl' can be used to narrow down the problem in + * the case of test failure. + */ +#include <stdlib.h> +#include <stdio.h> +#include "utils.h" + +#define MAX_SRC_WIDTH 48 +#define MAX_SRC_HEIGHT 8 +#define MAX_DST_WIDTH 48 +#define MAX_DST_HEIGHT 8 +#define MAX_STRIDE 4 + +/* + * Composite operation with pseudorandom images + */ + +static pixman_format_code_t +get_format (int bpp) +{ + if (bpp == 4) + { + switch (prng_rand_n (4)) + { + default: + case 0: + return PIXMAN_a8r8g8b8; + case 1: + return PIXMAN_x8r8g8b8; + case 2: + return PIXMAN_a8b8g8r8; + case 3: + return PIXMAN_x8b8g8r8; + } + } + else + { + return PIXMAN_r5g6b5; + } +} + +uint32_t +test_composite (int testnum, + int verbose) +{ + int i; + pixman_image_t * src_img; + pixman_image_t * mask_img; + pixman_image_t * dst_img; + pixman_transform_t transform; + pixman_region16_t clip; + int src_width, src_height; + int mask_width, mask_height; + int dst_width, dst_height; + int src_stride, mask_stride, dst_stride; + int src_x, src_y; + int mask_x, mask_y; + int dst_x, dst_y; + int src_bpp; + int mask_bpp = 1; + int dst_bpp; + int w, h; + pixman_fixed_t scale_x = 65536, scale_y = 65536; + pixman_fixed_t translate_x = 0, translate_y = 0; + pixman_fixed_t mask_scale_x = 65536, mask_scale_y = 65536; + pixman_fixed_t mask_translate_x = 0, mask_translate_y = 0; + pixman_op_t op; + pixman_repeat_t repeat = PIXMAN_REPEAT_NONE; + pixman_repeat_t mask_repeat = PIXMAN_REPEAT_NONE; + pixman_format_code_t src_fmt, dst_fmt; + uint32_t * srcbuf; + uint32_t * dstbuf; + uint32_t * maskbuf; + uint32_t crc32; + FLOAT_REGS_CORRUPTION_DETECTOR_START (); + + prng_srand (testnum); + + src_bpp = (prng_rand_n (2) == 0) ? 2 : 4; + dst_bpp = (prng_rand_n (2) == 0) ? 2 : 4; + switch (prng_rand_n (3)) + { + case 0: + op = PIXMAN_OP_SRC; + break; + case 1: + op = PIXMAN_OP_OVER; + break; + default: + op = PIXMAN_OP_ADD; + break; + } + + src_width = prng_rand_n (MAX_SRC_WIDTH) + 1; + src_height = prng_rand_n (MAX_SRC_HEIGHT) + 1; + + if (prng_rand_n (2)) + { + mask_width = prng_rand_n (MAX_SRC_WIDTH) + 1; + mask_height = prng_rand_n (MAX_SRC_HEIGHT) + 1; + } + else + { + mask_width = mask_height = 1; + } + + dst_width = prng_rand_n (MAX_DST_WIDTH) + 1; + dst_height = prng_rand_n (MAX_DST_HEIGHT) + 1; + src_stride = src_width * src_bpp + prng_rand_n (MAX_STRIDE) * src_bpp; + mask_stride = mask_width * mask_bpp + prng_rand_n (MAX_STRIDE) * mask_bpp; + dst_stride = dst_width * dst_bpp + prng_rand_n (MAX_STRIDE) * dst_bpp; + + if (src_stride & 3) + src_stride += 2; + + if (mask_stride & 1) + mask_stride += 1; + if (mask_stride & 2) + mask_stride += 2; + + if (dst_stride & 3) + dst_stride += 2; + + src_x = -(src_width / 4) + prng_rand_n (src_width * 3 / 2); + src_y = -(src_height / 4) + prng_rand_n (src_height * 3 / 2); + mask_x = -(mask_width / 4) + prng_rand_n (mask_width * 3 / 2); + mask_y = -(mask_height / 4) + prng_rand_n (mask_height * 3 / 2); + dst_x = -(dst_width / 4) + prng_rand_n (dst_width * 3 / 2); + dst_y = -(dst_height / 4) + prng_rand_n (dst_height * 3 / 2); + w = prng_rand_n (dst_width * 3 / 2 - dst_x); + h = prng_rand_n (dst_height * 3 / 2 - dst_y); + + srcbuf = (uint32_t *)malloc (src_stride * src_height); + maskbuf = (uint32_t *)malloc (mask_stride * mask_height); + dstbuf = (uint32_t *)malloc (dst_stride * dst_height); + + prng_randmemset (srcbuf, src_stride * src_height, 0); + prng_randmemset (maskbuf, mask_stride * mask_height, 0); + prng_randmemset (dstbuf, dst_stride * dst_height, 0); + + src_fmt = get_format (src_bpp); + dst_fmt = get_format (dst_bpp); + + if (prng_rand_n (2)) + { + srcbuf += (src_stride / 4) * (src_height - 1); + src_stride = - src_stride; + } + + if (prng_rand_n (2)) + { + maskbuf += (mask_stride / 4) * (mask_height - 1); + mask_stride = - mask_stride; + } + + if (prng_rand_n (2)) + { + dstbuf += (dst_stride / 4) * (dst_height - 1); + dst_stride = - dst_stride; + } + + src_img = pixman_image_create_bits ( + src_fmt, src_width, src_height, srcbuf, src_stride); + + mask_img = pixman_image_create_bits ( + PIXMAN_a8, mask_width, mask_height, maskbuf, mask_stride); + + dst_img = pixman_image_create_bits ( + dst_fmt, dst_width, dst_height, dstbuf, dst_stride); + + image_endian_swap (src_img); + image_endian_swap (dst_img); + + if (prng_rand_n (4) > 0) + { + scale_x = -32768 * 3 + prng_rand_n (65536 * 5); + scale_y = -32768 * 3 + prng_rand_n (65536 * 5); + translate_x = prng_rand_n (65536); + translate_y = prng_rand_n (65536); + pixman_transform_init_scale (&transform, scale_x, scale_y); + pixman_transform_translate (&transform, NULL, translate_x, translate_y); + pixman_image_set_transform (src_img, &transform); + } + + if (prng_rand_n (2) > 0) + { + mask_scale_x = -32768 * 3 + prng_rand_n (65536 * 5); + mask_scale_y = -32768 * 3 + prng_rand_n (65536 * 5); + mask_translate_x = prng_rand_n (65536); + mask_translate_y = prng_rand_n (65536); + pixman_transform_init_scale (&transform, mask_scale_x, mask_scale_y); + pixman_transform_translate (&transform, NULL, mask_translate_x, mask_translate_y); + pixman_image_set_transform (mask_img, &transform); + } + + switch (prng_rand_n (4)) + { + case 0: + mask_repeat = PIXMAN_REPEAT_NONE; + break; + + case 1: + mask_repeat = PIXMAN_REPEAT_NORMAL; + break; + + case 2: + mask_repeat = PIXMAN_REPEAT_PAD; + break; + + case 3: + mask_repeat = PIXMAN_REPEAT_REFLECT; + break; + + default: + break; + } + pixman_image_set_repeat (mask_img, mask_repeat); + + switch (prng_rand_n (4)) + { + case 0: + repeat = PIXMAN_REPEAT_NONE; + break; + + case 1: + repeat = PIXMAN_REPEAT_NORMAL; + break; + + case 2: + repeat = PIXMAN_REPEAT_PAD; + break; + + case 3: + repeat = PIXMAN_REPEAT_REFLECT; + break; + + default: + break; + } + pixman_image_set_repeat (src_img, repeat); + + if (prng_rand_n (2)) + pixman_image_set_filter (src_img, PIXMAN_FILTER_NEAREST, NULL, 0); + else + pixman_image_set_filter (src_img, PIXMAN_FILTER_BILINEAR, NULL, 0); + + if (prng_rand_n (2)) + pixman_image_set_filter (mask_img, PIXMAN_FILTER_NEAREST, NULL, 0); + else + pixman_image_set_filter (mask_img, PIXMAN_FILTER_BILINEAR, NULL, 0); + + if (verbose) + { + printf ("src_fmt=%s, dst_fmt=%s\n", + format_name (src_fmt), format_name (dst_fmt)); + printf ("op=%s, scale_x=%d, scale_y=%d, repeat=%d\n", + operator_name (op), scale_x, scale_y, repeat); + printf ("translate_x=%d, translate_y=%d\n", + translate_x, translate_y); + printf ("src_width=%d, src_height=%d, dst_width=%d, dst_height=%d\n", + src_width, src_height, dst_width, dst_height); + printf ("src_x=%d, src_y=%d, dst_x=%d, dst_y=%d\n", + src_x, src_y, dst_x, dst_y); + printf ("w=%d, h=%d\n", w, h); + } + + if (prng_rand_n (8) == 0) + { + pixman_box16_t clip_boxes[2]; + int n = prng_rand_n (2) + 1; + + for (i = 0; i < n; i++) + { + clip_boxes[i].x1 = prng_rand_n (src_width); + clip_boxes[i].y1 = prng_rand_n (src_height); + clip_boxes[i].x2 = + clip_boxes[i].x1 + prng_rand_n (src_width - clip_boxes[i].x1); + clip_boxes[i].y2 = + clip_boxes[i].y1 + prng_rand_n (src_height - clip_boxes[i].y1); + + if (verbose) + { + printf ("source clip box: [%d,%d-%d,%d]\n", + clip_boxes[i].x1, clip_boxes[i].y1, + clip_boxes[i].x2, clip_boxes[i].y2); + } + } + + pixman_region_init_rects (&clip, clip_boxes, n); + pixman_image_set_clip_region (src_img, &clip); + pixman_image_set_source_clipping (src_img, 1); + pixman_region_fini (&clip); + } + + if (prng_rand_n (8) == 0) + { + pixman_box16_t clip_boxes[2]; + int n = prng_rand_n (2) + 1; + + for (i = 0; i < n; i++) + { + clip_boxes[i].x1 = prng_rand_n (mask_width); + clip_boxes[i].y1 = prng_rand_n (mask_height); + clip_boxes[i].x2 = + clip_boxes[i].x1 + prng_rand_n (mask_width - clip_boxes[i].x1); + clip_boxes[i].y2 = + clip_boxes[i].y1 + prng_rand_n (mask_height - clip_boxes[i].y1); + + if (verbose) + { + printf ("mask clip box: [%d,%d-%d,%d]\n", + clip_boxes[i].x1, clip_boxes[i].y1, + clip_boxes[i].x2, clip_boxes[i].y2); + } + } + + pixman_region_init_rects (&clip, clip_boxes, n); + pixman_image_set_clip_region (mask_img, &clip); + pixman_image_set_source_clipping (mask_img, 1); + pixman_region_fini (&clip); + } + + if (prng_rand_n (8) == 0) + { + pixman_box16_t clip_boxes[2]; + int n = prng_rand_n (2) + 1; + for (i = 0; i < n; i++) + { + clip_boxes[i].x1 = prng_rand_n (dst_width); + clip_boxes[i].y1 = prng_rand_n (dst_height); + clip_boxes[i].x2 = + clip_boxes[i].x1 + prng_rand_n (dst_width - clip_boxes[i].x1); + clip_boxes[i].y2 = + clip_boxes[i].y1 + prng_rand_n (dst_height - clip_boxes[i].y1); + + if (verbose) + { + printf ("destination clip box: [%d,%d-%d,%d]\n", + clip_boxes[i].x1, clip_boxes[i].y1, + clip_boxes[i].x2, clip_boxes[i].y2); + } + } + pixman_region_init_rects (&clip, clip_boxes, n); + pixman_image_set_clip_region (dst_img, &clip); + pixman_region_fini (&clip); + } + + if (prng_rand_n (2) == 0) + pixman_image_composite (op, src_img, NULL, dst_img, + src_x, src_y, 0, 0, dst_x, dst_y, w, h); + else + pixman_image_composite (op, src_img, mask_img, dst_img, + src_x, src_y, mask_x, mask_y, dst_x, dst_y, w, h); + + crc32 = compute_crc32_for_image (0, dst_img); + + if (verbose) + print_image (dst_img); + + pixman_image_unref (src_img); + pixman_image_unref (mask_img); + pixman_image_unref (dst_img); + + if (src_stride < 0) + srcbuf += (src_stride / 4) * (src_height - 1); + + if (mask_stride < 0) + maskbuf += (mask_stride / 4) * (mask_height - 1); + + if (dst_stride < 0) + dstbuf += (dst_stride / 4) * (dst_height - 1); + + free (srcbuf); + free (maskbuf); + free (dstbuf); + + FLOAT_REGS_CORRUPTION_DETECTOR_FINISH (); + return crc32; +} + +#if BILINEAR_INTERPOLATION_BITS == 7 +#define CHECKSUM 0x92E0F068 +#elif BILINEAR_INTERPOLATION_BITS == 4 +#define CHECKSUM 0x8EFFA1E5 +#else +#define CHECKSUM 0x00000000 +#endif + +int +main (int argc, const char *argv[]) +{ + pixman_disable_out_of_bounds_workaround (); + + return fuzzer_test_main("scaling", 8000000, CHECKSUM, + test_composite, argc, argv); +} diff --git a/src/pixman/test/stress-test.c b/src/pixman/test/stress-test.c new file mode 100644 index 0000000..1f03c75 --- /dev/null +++ b/src/pixman/test/stress-test.c @@ -0,0 +1,1040 @@ +#include <stdio.h> +#include <stdlib.h> +#include "utils.h" +#include <sys/types.h> + +#if 0 +#define fence_malloc malloc +#define fence_free free +#define make_random_bytes malloc +#endif + +static const pixman_format_code_t image_formats[] = +{ + PIXMAN_a8r8g8b8, + PIXMAN_x8r8g8b8, + PIXMAN_r5g6b5, + PIXMAN_r3g3b2, + PIXMAN_a8, + PIXMAN_a8b8g8r8, + PIXMAN_x8b8g8r8, + PIXMAN_b8g8r8a8, + PIXMAN_b8g8r8x8, + PIXMAN_r8g8b8a8, + PIXMAN_r8g8b8x8, + PIXMAN_x14r6g6b6, + PIXMAN_r8g8b8, + PIXMAN_b8g8r8, + PIXMAN_a8r8g8b8_sRGB, + PIXMAN_r5g6b5, + PIXMAN_b5g6r5, + PIXMAN_x2r10g10b10, + PIXMAN_a2r10g10b10, + PIXMAN_x2b10g10r10, + PIXMAN_a2b10g10r10, + PIXMAN_a1r5g5b5, + PIXMAN_x1r5g5b5, + PIXMAN_a1b5g5r5, + PIXMAN_x1b5g5r5, + PIXMAN_a4r4g4b4, + PIXMAN_x4r4g4b4, + PIXMAN_a4b4g4r4, + PIXMAN_x4b4g4r4, + PIXMAN_a8, + PIXMAN_r3g3b2, + PIXMAN_b2g3r3, + PIXMAN_a2r2g2b2, + PIXMAN_a2b2g2r2, + PIXMAN_c8, + PIXMAN_g8, + PIXMAN_x4c4, + PIXMAN_x4g4, + PIXMAN_c4, + PIXMAN_g4, + PIXMAN_g1, + PIXMAN_x4a4, + PIXMAN_a4, + PIXMAN_r1g2b1, + PIXMAN_b1g2r1, + PIXMAN_a1r1g1b1, + PIXMAN_a1b1g1r1, + PIXMAN_a1 +}; + +static pixman_filter_t filters[] = +{ + PIXMAN_FILTER_NEAREST, + PIXMAN_FILTER_BILINEAR, + PIXMAN_FILTER_FAST, + PIXMAN_FILTER_GOOD, + PIXMAN_FILTER_BEST, + PIXMAN_FILTER_CONVOLUTION +}; + +static int +get_size (void) +{ + switch (prng_rand_n (28)) + { + case 0: + return 1; + + case 1: + return 2; + + default: + case 2: + return prng_rand_n (100); + + case 4: + return prng_rand_n (2000) + 1000; + + case 5: + return 65535; + + case 6: + return 65536; + + case 7: + return prng_rand_n (64000) + 63000; + } +} + +static void +destroy (pixman_image_t *image, void *data) +{ + if (image->type == BITS && image->bits.free_me != image->bits.bits) + { + uint32_t *bits; + + if (image->bits.bits != (void *)0x01) + { + bits = image->bits.bits; + + if (image->bits.rowstride < 0) + bits -= (- image->bits.rowstride * (image->bits.height - 1)); + + fence_free (bits); + } + } + + free (data); +} + +static uint32_t +real_reader (const void *src, int size) +{ + switch (size) + { + case 1: + return *(uint8_t *)src; + case 2: + return *(uint16_t *)src; + case 4: + return *(uint32_t *)src; + default: + assert (0); + return 0; /* silence MSVC */ + } +} + +static void +real_writer (void *src, uint32_t value, int size) +{ + switch (size) + { + case 1: + *(uint8_t *)src = value; + break; + + case 2: + *(uint16_t *)src = value; + break; + + case 4: + *(uint32_t *)src = value; + break; + + default: + assert (0); + break; + } +} + +static uint32_t +fake_reader (const void *src, int size) +{ + uint32_t r = prng_rand (); + + assert (size == 1 || size == 2 || size == 4); + + return r >> (32 - (size * 8)); +} + +static void +fake_writer (void *src, uint32_t value, int size) +{ + assert (size == 1 || size == 2 || size == 4); +} + +static int32_t +log_rand (void) +{ + uint32_t mask; + + mask = (1 << prng_rand_n (10)) - 1; + + return (prng_rand () & mask) - (mask >> 1); +} + +static int32_t +rand_x (pixman_image_t *image) +{ + if (image->type == BITS) + return prng_rand_n (image->bits.width); + else + return log_rand (); +} + +static int32_t +rand_y (pixman_image_t *image) +{ + if (image->type == BITS) + return prng_rand_n (image->bits.height); + else + return log_rand (); +} + +typedef enum +{ + DONT_CARE, + PREFER_ALPHA, + REQUIRE_ALPHA +} alpha_preference_t; + +static pixman_format_code_t +random_format (alpha_preference_t alpha) +{ + pixman_format_code_t format; + int n = prng_rand_n (ARRAY_LENGTH (image_formats)); + + if (alpha >= PREFER_ALPHA && + (alpha == REQUIRE_ALPHA || prng_rand_n (4) != 0)) + { + do + { + format = image_formats[n++ % ARRAY_LENGTH (image_formats)]; + } while (PIXMAN_FORMAT_TYPE (format) != PIXMAN_TYPE_A); + } + else + { + format = image_formats[n]; + } + + return format; +} + +static pixman_image_t * +create_random_bits_image (alpha_preference_t alpha_preference) +{ + pixman_format_code_t format; + pixman_indexed_t *indexed; + pixman_image_t *image; + int width, height, stride; + uint32_t *bits; + pixman_read_memory_func_t read_func = NULL; + pixman_write_memory_func_t write_func = NULL; + pixman_filter_t filter; + pixman_fixed_t *coefficients = NULL; + int n_coefficients = 0; + + /* format */ + format = random_format (alpha_preference); + + indexed = NULL; + if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_COLOR) + { + indexed = malloc (sizeof (pixman_indexed_t)); + + initialize_palette (indexed, PIXMAN_FORMAT_BPP (format), TRUE); + } + else if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_GRAY) + { + indexed = malloc (sizeof (pixman_indexed_t)); + + initialize_palette (indexed, PIXMAN_FORMAT_BPP (format), FALSE); + } + else + { + indexed = NULL; + } + + /* size */ + width = get_size (); + height = get_size (); + + while ((uint64_t)width * height > 200000) + { + if (prng_rand_n(2) == 0) + height = 200000 / width; + else + width = 200000 / height; + } + + if (height == 0) + height = 1; + if (width == 0) + width = 1; + + /* bits */ + switch (prng_rand_n (7)) + { + default: + case 0: + stride = width * PIXMAN_FORMAT_BPP (format) + prng_rand_n (17); + stride = (stride + 3) & (~3); + bits = (uint32_t *)make_random_bytes (height * stride); + break; + + case 1: + stride = 0; + bits = NULL; + break; + + case 2: /* Zero-filled */ + stride = width * PIXMAN_FORMAT_BPP (format) + prng_rand_n (17); + stride = (stride + 3) & (~3); + bits = fence_malloc (height * stride); + if (!bits) + return NULL; + memset (bits, 0, height * stride); + break; + + case 3: /* Filled with 0xFF */ + stride = width * PIXMAN_FORMAT_BPP (format) + prng_rand_n (17); + stride = (stride + 3) & (~3); + bits = fence_malloc (height * stride); + if (!bits) + return NULL; + memset (bits, 0xff, height * stride); + break; + + case 4: /* bits is a bad pointer, has read/write functions */ + stride = 232; + bits = (void *)0x01; + read_func = fake_reader; + write_func = fake_writer; + break; + + case 5: /* bits is a real pointer, has read/write functions */ + stride = width * PIXMAN_FORMAT_BPP (format) + prng_rand_n (17); + stride = (stride + 3) & (~3); + bits = fence_malloc (height * stride); + if (!bits) + return NULL; + memset (bits, 0xff, height * stride); + read_func = real_reader; + write_func = real_writer; + break; + + case 6: /* bits is a real pointer, stride is negative */ + stride = (width * PIXMAN_FORMAT_BPP (format) + prng_rand_n (17)); + stride = (stride + 3) & (~3); + bits = (uint32_t *)make_random_bytes (height * stride); + if (!bits) + return NULL; + bits += ((height - 1) * stride) / 4; + stride = - stride; + break; + } + + /* Filter */ + filter = filters[prng_rand_n (ARRAY_LENGTH (filters))]; + if (filter == PIXMAN_FILTER_CONVOLUTION) + { + int width = prng_rand_n (3); + int height = prng_rand_n (4); + + n_coefficients = width * height + 2; + coefficients = malloc (n_coefficients * sizeof (pixman_fixed_t)); + + if (coefficients) + { + int i; + + for (i = 0; i < width * height; ++i) + coefficients[i + 2] = prng_rand(); + + coefficients[0] = width << 16; + coefficients[1] = height << 16; + } + else + { + filter = PIXMAN_FILTER_BEST; + } + } + + /* Finally create the image */ + image = pixman_image_create_bits (format, width, height, bits, stride); + if (!image) + return NULL; + + pixman_image_set_indexed (image, indexed); + pixman_image_set_destroy_function (image, destroy, indexed); + pixman_image_set_accessors (image, read_func, write_func); + pixman_image_set_filter (image, filter, coefficients, n_coefficients); + + return image; +} + +static pixman_repeat_t repeats[] = +{ + PIXMAN_REPEAT_NONE, + PIXMAN_REPEAT_NORMAL, + PIXMAN_REPEAT_REFLECT, + PIXMAN_REPEAT_PAD +}; + +static uint32_t +absolute (int32_t i) +{ + return i < 0? -i : i; +} + +static void +set_general_properties (pixman_image_t *image, pixman_bool_t allow_alpha_map) +{ + pixman_repeat_t repeat; + + /* Set properties that are generic to all images */ + + /* Repeat */ + repeat = repeats[prng_rand_n (ARRAY_LENGTH (repeats))]; + pixman_image_set_repeat (image, repeat); + + /* Alpha map */ + if (allow_alpha_map && prng_rand_n (4) == 0) + { + pixman_image_t *alpha_map; + int16_t x, y; + + alpha_map = create_random_bits_image (DONT_CARE); + + if (alpha_map) + { + set_general_properties (alpha_map, FALSE); + + x = rand_x (image) - image->bits.width / 2; + y = rand_y (image) - image->bits.height / 2; + + pixman_image_set_alpha_map (image, alpha_map, x, y); + + pixman_image_unref (alpha_map); + } + } + + /* Component alpha */ + pixman_image_set_component_alpha (image, prng_rand_n (3) == 0); + + /* Clip region */ + if (prng_rand_n (8) < 2) + { + pixman_region32_t region; + int i, n_rects; + + pixman_region32_init (®ion); + + switch (prng_rand_n (12)) + { + case 0: + n_rects = 0; + break; + + case 1: case 2: case 3: + n_rects = 1; + break; + + case 4: case 5: + n_rects = 2; + break; + + case 6: case 7: + n_rects = 3; + break; + + default: + n_rects = prng_rand_n (100); + break; + } + + for (i = 0; i < n_rects; ++i) + { + uint32_t width, height; + int x, y; + + x = log_rand(); + y = log_rand(); + width = absolute (log_rand ()) + 1; + height = absolute (log_rand ()) + 1; + + pixman_region32_union_rect ( + ®ion, ®ion, x, y, width, height); + } + + if (image->type == BITS && prng_rand_n (8) != 0) + { + uint32_t width, height; + int x, y; + int i; + + /* Also add a couple of clip rectangles inside the image + * so that compositing will actually take place. + */ + for (i = 0; i < 5; ++i) + { + x = prng_rand_n (2 * image->bits.width) - image->bits.width; + y = prng_rand_n (2 * image->bits.height) - image->bits.height; + width = prng_rand_n (image->bits.width) - x + 10; + height = prng_rand_n (image->bits.height) - y + 10; + + if (width + x < x) + width = INT32_MAX - x; + if (height + y < y) + height = INT32_MAX - y; + + pixman_region32_union_rect ( + ®ion, ®ion, x, y, width, height); + } + } + + pixman_image_set_clip_region32 (image, ®ion); + + pixman_region32_fini (®ion); + } + + /* Whether source clipping is enabled */ + pixman_image_set_source_clipping (image, !!prng_rand_n (2)); + + /* Client clip */ + pixman_image_set_has_client_clip (image, !!prng_rand_n (2)); + + /* Transform */ + if (prng_rand_n (5) < 2) + { + pixman_transform_t xform; + int i, j, k; + uint32_t tx, ty, sx, sy; + uint32_t c, s; + + memset (&xform, 0, sizeof xform); + xform.matrix[0][0] = pixman_fixed_1; + xform.matrix[1][1] = pixman_fixed_1; + xform.matrix[2][2] = pixman_fixed_1; + + for (k = 0; k < 3; ++k) + { + switch (prng_rand_n (4)) + { + case 0: + /* rotation */ + c = prng_rand_n (2 * 65536) - 65536; + s = prng_rand_n (2 * 65536) - 65536; + pixman_transform_rotate (&xform, NULL, c, s); + break; + + case 1: + /* translation */ + tx = prng_rand(); + ty = prng_rand(); + pixman_transform_translate (&xform, NULL, tx, ty); + break; + + case 2: + /* scale */ + sx = prng_rand(); + sy = prng_rand(); + pixman_transform_scale (&xform, NULL, sx, sy); + break; + + case 3: + if (prng_rand_n (16) == 0) + { + /* random */ + for (i = 0; i < 3; ++i) + for (j = 0; j < 3; ++j) + xform.matrix[i][j] = prng_rand(); + break; + } + else if (prng_rand_n (16) == 0) + { + /* zero */ + memset (&xform, 0, sizeof xform); + } + break; + } + } + + pixman_image_set_transform (image, &xform); + } +} + +static pixman_color_t +random_color (void) +{ + pixman_color_t color = + { + prng_rand() & 0xffff, + prng_rand() & 0xffff, + prng_rand() & 0xffff, + prng_rand() & 0xffff, + }; + + return color; +} + + +static pixman_image_t * +create_random_solid_image (void) +{ + pixman_color_t color = random_color(); + pixman_image_t *image = pixman_image_create_solid_fill (&color); + + return image; +} + +static pixman_gradient_stop_t * +create_random_stops (int *n_stops) +{ + pixman_fixed_t step; + pixman_fixed_t s; + int i; + pixman_gradient_stop_t *stops; + + *n_stops = prng_rand_n (50) + 1; + + step = pixman_fixed_1 / *n_stops; + + stops = malloc (*n_stops * sizeof (pixman_gradient_stop_t)); + + s = 0; + for (i = 0; i < (*n_stops) - 1; ++i) + { + stops[i].x = s; + stops[i].color = random_color(); + + s += step; + } + + stops[*n_stops - 1].x = pixman_fixed_1; + stops[*n_stops - 1].color = random_color(); + + return stops; +} + +static pixman_point_fixed_t +create_random_point (void) +{ + pixman_point_fixed_t p; + + p.x = log_rand (); + p.y = log_rand (); + + return p; +} + +static pixman_image_t * +create_random_linear_image (void) +{ + int n_stops; + pixman_gradient_stop_t *stops; + pixman_point_fixed_t p1, p2; + pixman_image_t *result; + + stops = create_random_stops (&n_stops); + if (!stops) + return NULL; + + p1 = create_random_point (); + p2 = create_random_point (); + + result = pixman_image_create_linear_gradient (&p1, &p2, stops, n_stops); + + free (stops); + + return result; +} + +static pixman_image_t * +create_random_radial_image (void) +{ + int n_stops; + pixman_gradient_stop_t *stops; + pixman_point_fixed_t inner_c, outer_c; + pixman_fixed_t inner_r, outer_r; + pixman_image_t *result; + + inner_c = create_random_point(); + outer_c = create_random_point(); + inner_r = prng_rand(); + outer_r = prng_rand(); + + stops = create_random_stops (&n_stops); + + if (!stops) + return NULL; + + result = pixman_image_create_radial_gradient ( + &inner_c, &outer_c, inner_r, outer_r, stops, n_stops); + + free (stops); + + return result; +} + +static pixman_image_t * +create_random_conical_image (void) +{ + pixman_gradient_stop_t *stops; + int n_stops; + pixman_point_fixed_t c; + pixman_fixed_t angle; + pixman_image_t *result; + + c = create_random_point(); + angle = prng_rand(); + + stops = create_random_stops (&n_stops); + + if (!stops) + return NULL; + + result = pixman_image_create_conical_gradient (&c, angle, stops, n_stops); + + free (stops); + + return result; +} + +static pixman_image_t * +create_random_image (void) +{ + pixman_image_t *result; + + switch (prng_rand_n (5)) + { + default: + case 0: + result = create_random_bits_image (DONT_CARE); + break; + + case 1: + result = create_random_solid_image (); + break; + + case 2: + result = create_random_linear_image (); + break; + + case 3: + result = create_random_radial_image (); + break; + + case 4: + result = create_random_conical_image (); + break; + } + + if (result) + set_general_properties (result, TRUE); + + return result; +} + +static void +random_line (pixman_line_fixed_t *line, int width, int height) +{ + line->p1.x = prng_rand_n (width) << 16; + line->p1.y = prng_rand_n (height) << 16; + line->p2.x = prng_rand_n (width) << 16; + line->p2.y = prng_rand_n (height) << 16; +} + +static pixman_trapezoid_t * +create_random_trapezoids (int *n_traps, int height, int width) +{ + pixman_trapezoid_t *trapezoids; + int i; + + *n_traps = prng_rand_n (16) + 1; + + trapezoids = malloc (sizeof (pixman_trapezoid_t) * *n_traps); + + for (i = 0; i < *n_traps; ++i) + { + pixman_trapezoid_t *t = &(trapezoids[i]); + + t->top = prng_rand_n (height) << 16; + t->bottom = prng_rand_n (height) << 16; + + random_line (&t->left, height, width); + random_line (&t->right, height, width); + } + + return trapezoids; +} + +static const pixman_op_t op_list[] = +{ + PIXMAN_OP_SRC, + PIXMAN_OP_OVER, + PIXMAN_OP_ADD, + PIXMAN_OP_CLEAR, + PIXMAN_OP_SRC, + PIXMAN_OP_DST, + PIXMAN_OP_OVER, + PIXMAN_OP_OVER_REVERSE, + PIXMAN_OP_IN, + PIXMAN_OP_IN_REVERSE, + PIXMAN_OP_OUT, + PIXMAN_OP_OUT_REVERSE, + PIXMAN_OP_ATOP, + PIXMAN_OP_ATOP_REVERSE, + PIXMAN_OP_XOR, + PIXMAN_OP_ADD, + PIXMAN_OP_SATURATE, + PIXMAN_OP_DISJOINT_CLEAR, + PIXMAN_OP_DISJOINT_SRC, + PIXMAN_OP_DISJOINT_DST, + PIXMAN_OP_DISJOINT_OVER, + PIXMAN_OP_DISJOINT_OVER_REVERSE, + PIXMAN_OP_DISJOINT_IN, + PIXMAN_OP_DISJOINT_IN_REVERSE, + PIXMAN_OP_DISJOINT_OUT, + PIXMAN_OP_DISJOINT_OUT_REVERSE, + PIXMAN_OP_DISJOINT_ATOP, + PIXMAN_OP_DISJOINT_ATOP_REVERSE, + PIXMAN_OP_DISJOINT_XOR, + PIXMAN_OP_CONJOINT_CLEAR, + PIXMAN_OP_CONJOINT_SRC, + PIXMAN_OP_CONJOINT_DST, + PIXMAN_OP_CONJOINT_OVER, + PIXMAN_OP_CONJOINT_OVER_REVERSE, + PIXMAN_OP_CONJOINT_IN, + PIXMAN_OP_CONJOINT_IN_REVERSE, + PIXMAN_OP_CONJOINT_OUT, + PIXMAN_OP_CONJOINT_OUT_REVERSE, + PIXMAN_OP_CONJOINT_ATOP, + PIXMAN_OP_CONJOINT_ATOP_REVERSE, + PIXMAN_OP_CONJOINT_XOR, + PIXMAN_OP_MULTIPLY, + PIXMAN_OP_SCREEN, + PIXMAN_OP_OVERLAY, + PIXMAN_OP_DARKEN, + PIXMAN_OP_LIGHTEN, + PIXMAN_OP_COLOR_DODGE, + PIXMAN_OP_COLOR_BURN, + PIXMAN_OP_HARD_LIGHT, + PIXMAN_OP_DIFFERENCE, + PIXMAN_OP_EXCLUSION, + PIXMAN_OP_SOFT_LIGHT, + PIXMAN_OP_HSL_HUE, + PIXMAN_OP_HSL_SATURATION, + PIXMAN_OP_HSL_COLOR, + PIXMAN_OP_HSL_LUMINOSITY, +}; + +static void +run_test (uint32_t seed, pixman_bool_t verbose, uint32_t mod) +{ + pixman_image_t *source, *mask, *dest; + pixman_op_t op; + + if (verbose) + { + if (mod == 0 || (seed % mod) == 0) + printf ("Seed 0x%08x\n", seed); + } + + source = mask = dest = NULL; + + prng_srand (seed); + + if (prng_rand_n (8) == 0) + { + int n_traps; + pixman_trapezoid_t *trapezoids; + int p = prng_rand_n (3); + + if (p == 0) + dest = create_random_bits_image (DONT_CARE); + else + dest = create_random_bits_image (REQUIRE_ALPHA); + + if (!dest) + goto out; + + set_general_properties (dest, TRUE); + + if (!(trapezoids = create_random_trapezoids ( + &n_traps, dest->bits.width, dest->bits.height))) + { + goto out; + } + + switch (p) + { + case 0: + source = create_random_image (); + + if (source) + { + op = op_list [prng_rand_n (ARRAY_LENGTH (op_list))]; + + pixman_composite_trapezoids ( + op, source, dest, + random_format (REQUIRE_ALPHA), + rand_x (source), rand_y (source), + rand_x (dest), rand_y (dest), + n_traps, trapezoids); + } + break; + + case 1: + pixman_rasterize_trapezoid ( + dest, &trapezoids[prng_rand_n (n_traps)], + rand_x (dest), rand_y (dest)); + break; + + case 2: + pixman_add_trapezoids ( + dest, rand_x (dest), rand_y (dest), n_traps, trapezoids); + break; + } + + free (trapezoids); + } + else + { + dest = create_random_bits_image (DONT_CARE); + source = create_random_image (); + mask = create_random_image (); + + if (source && mask && dest) + { + set_general_properties (dest, TRUE); + + op = op_list [prng_rand_n (ARRAY_LENGTH (op_list))]; + + pixman_image_composite32 (op, + source, mask, dest, + rand_x (source), rand_y (source), + rand_x (mask), rand_y (mask), + 0, 0, + dest->bits.width, + dest->bits.height); + } + } + +out: + if (source) + pixman_image_unref (source); + if (mask) + pixman_image_unref (mask); + if (dest) + pixman_image_unref (dest); +} + +static pixman_bool_t +get_int (char *s, uint32_t *i) +{ + char *end; + int p; + + p = strtol (s, &end, 0); + + if (end != s && *end == 0) + { + *i = p; + return TRUE; + } + + return FALSE; +} + +int +main (int argc, char **argv) +{ + int verbose = FALSE; + uint32_t seed = 1; + uint32_t n_tests = 8000; + uint32_t mod = 0; + pixman_bool_t use_threads = TRUE; + int32_t i; + + pixman_disable_out_of_bounds_workaround (); + + enable_divbyzero_exceptions(); + + if (getenv ("VERBOSE") != NULL) + verbose = TRUE; + + for (i = 1; i < argc; ++i) + { + if (strcmp (argv[i], "-v") == 0) + { + verbose = TRUE; + + if (i + 1 < argc) + { + get_int (argv[i + 1], &mod); + i++; + } + } + else if (strcmp (argv[i], "-s") == 0 && i + 1 < argc) + { + get_int (argv[i + 1], &seed); + use_threads = FALSE; + i++; + } + else if (strcmp (argv[i], "-n") == 0 && i + 1 < argc) + { + get_int (argv[i + 1], &n_tests); + i++; + } + else + { + if (strcmp (argv[i], "-h") != 0) + printf ("Unknown option '%s'\n\n", argv[i]); + + printf ("Options:\n\n" + "-n <number> Number of tests to run\n" + "-s <seed> Seed of first test (ignored if PIXMAN_RANDOMIZE_TESTS is set)\n" + "-v Print out seeds\n" + "-v <n> Print out every n'th seed\n\n"); + + exit (-1); + } + } + + if (getenv ("PIXMAN_RANDOMIZE_TESTS")) + { + seed = get_random_seed(); + printf ("First seed: 0x%08x\n", seed); + } + + if (use_threads) + { +#ifdef USE_OPENMP +# pragma omp parallel for default(none) shared(verbose, n_tests, mod, seed) +#endif + for (i = 0; i < (int32_t)n_tests; ++i) + run_test (seed + i, verbose, mod); + } + else + { + for (i = 0; i < (int32_t)n_tests; ++i) + run_test (seed + i, verbose, mod); + } + + return 0; +} diff --git a/src/pixman/test/thread-test.c b/src/pixman/test/thread-test.c new file mode 100644 index 0000000..0b07b26 --- /dev/null +++ b/src/pixman/test/thread-test.c @@ -0,0 +1,199 @@ +#include "utils.h" + +#ifndef HAVE_PTHREADS + +int main () +{ + printf ("Skipped thread-test - pthreads not supported\n"); + return 0; +} + +#else + +#include <stdlib.h> +#include <pthread.h> + +typedef struct +{ + int thread_no; + uint32_t *dst_buf; + prng_t prng_state; +} info_t; + +static const pixman_op_t operators[] = +{ + PIXMAN_OP_SRC, + PIXMAN_OP_OVER, + PIXMAN_OP_ADD, + PIXMAN_OP_CLEAR, + PIXMAN_OP_SRC, + PIXMAN_OP_DST, + PIXMAN_OP_OVER, + PIXMAN_OP_OVER_REVERSE, + PIXMAN_OP_IN, + PIXMAN_OP_IN_REVERSE, + PIXMAN_OP_OUT, + PIXMAN_OP_OUT_REVERSE, + PIXMAN_OP_ATOP, + PIXMAN_OP_ATOP_REVERSE, + PIXMAN_OP_XOR, + PIXMAN_OP_ADD, + PIXMAN_OP_SATURATE, + PIXMAN_OP_DISJOINT_CLEAR, + PIXMAN_OP_DISJOINT_SRC, + PIXMAN_OP_DISJOINT_DST, + PIXMAN_OP_DISJOINT_OVER, + PIXMAN_OP_DISJOINT_OVER_REVERSE, + PIXMAN_OP_DISJOINT_IN, + PIXMAN_OP_DISJOINT_IN_REVERSE, + PIXMAN_OP_DISJOINT_OUT, + PIXMAN_OP_DISJOINT_OUT_REVERSE, + PIXMAN_OP_DISJOINT_ATOP, + PIXMAN_OP_DISJOINT_ATOP_REVERSE, + PIXMAN_OP_DISJOINT_XOR, + PIXMAN_OP_CONJOINT_CLEAR, + PIXMAN_OP_CONJOINT_SRC, + PIXMAN_OP_CONJOINT_DST, + PIXMAN_OP_CONJOINT_OVER, + PIXMAN_OP_CONJOINT_OVER_REVERSE, + PIXMAN_OP_CONJOINT_IN, + PIXMAN_OP_CONJOINT_IN_REVERSE, + PIXMAN_OP_CONJOINT_OUT, + PIXMAN_OP_CONJOINT_OUT_REVERSE, + PIXMAN_OP_CONJOINT_ATOP, + PIXMAN_OP_CONJOINT_ATOP_REVERSE, + PIXMAN_OP_CONJOINT_XOR, + PIXMAN_OP_MULTIPLY, + PIXMAN_OP_SCREEN, + PIXMAN_OP_OVERLAY, + PIXMAN_OP_DARKEN, + PIXMAN_OP_LIGHTEN, + PIXMAN_OP_COLOR_DODGE, + PIXMAN_OP_COLOR_BURN, + PIXMAN_OP_HARD_LIGHT, + PIXMAN_OP_DIFFERENCE, + PIXMAN_OP_EXCLUSION, +}; + +static const pixman_format_code_t formats[] = +{ + PIXMAN_a8r8g8b8, + PIXMAN_r5g6b5, + PIXMAN_a8, + PIXMAN_a4, + PIXMAN_a1, + PIXMAN_b5g6r5, + PIXMAN_r8g8b8a8, + PIXMAN_a4r4g4b4 +}; + +#define N_ROUNDS 8192 + +#define RAND_ELT(arr) \ + arr[prng_rand_r(&info->prng_state) % ARRAY_LENGTH (arr)] + +#define DEST_WIDTH (7) + +static void * +thread (void *data) +{ + info_t *info = data; + uint32_t crc32 = 0x0; + uint32_t src_buf[64]; + pixman_image_t *dst_img, *src_img; + int i; + + prng_srand_r (&info->prng_state, info->thread_no); + + for (i = 0; i < N_ROUNDS; ++i) + { + pixman_op_t op; + int rand1, rand2; + + prng_randmemset_r (&info->prng_state, info->dst_buf, + DEST_WIDTH * sizeof (uint32_t), 0); + prng_randmemset_r (&info->prng_state, src_buf, + sizeof (src_buf), 0); + + src_img = pixman_image_create_bits ( + RAND_ELT (formats), 4, 4, src_buf, 16); + dst_img = pixman_image_create_bits ( + RAND_ELT (formats), DEST_WIDTH, 1, info->dst_buf, + DEST_WIDTH * sizeof (uint32_t)); + + image_endian_swap (src_img); + image_endian_swap (dst_img); + + rand2 = prng_rand_r (&info->prng_state) % 4; + rand1 = prng_rand_r (&info->prng_state) % 4; + op = RAND_ELT (operators); + + pixman_image_composite32 ( + op, + src_img, NULL, dst_img, + rand1, rand2, 0, 0, 0, 0, DEST_WIDTH, 1); + + crc32 = compute_crc32_for_image (crc32, dst_img); + + pixman_image_unref (src_img); + pixman_image_unref (dst_img); + } + + return (void *)(uintptr_t)crc32; +} + +static inline uint32_t +byteswap32 (uint32_t x) +{ + return ((x & ((uint32_t)0xFF << 24)) >> 24) | + ((x & ((uint32_t)0xFF << 16)) >> 8) | + ((x & ((uint32_t)0xFF << 8)) << 8) | + ((x & ((uint32_t)0xFF << 0)) << 24); +} + +int +main (void) +{ + uint32_t dest[16 * DEST_WIDTH]; + info_t info[16] = { { 0 } }; + pthread_t threads[16]; + void *retvals[16]; + uint32_t crc32s[16], crc32; + int i; + + for (i = 0; i < 16; ++i) + { + info[i].thread_no = i; + info[i].dst_buf = &dest[i * DEST_WIDTH]; + } + + for (i = 0; i < 16; ++i) + pthread_create (&threads[i], NULL, thread, &info[i]); + + for (i = 0; i < 16; ++i) + pthread_join (threads[i], &retvals[i]); + + for (i = 0; i < 16; ++i) + { + crc32s[i] = (uintptr_t)retvals[i]; + + if (is_little_endian()) + crc32s[i] = byteswap32 (crc32s[i]); + } + + crc32 = compute_crc32 (0, crc32s, sizeof crc32s); + +#define EXPECTED 0xE299B18E + + if (crc32 != EXPECTED) + { + printf ("thread-test failed. Got checksum 0x%08X, expected 0x%08X\n", + crc32, EXPECTED); + return 1; + } + + return 0; +} + +#endif + diff --git a/src/pixman/test/trap-crasher.c b/src/pixman/test/trap-crasher.c new file mode 100644 index 0000000..77be1c9 --- /dev/null +++ b/src/pixman/test/trap-crasher.c @@ -0,0 +1,39 @@ +#include <stdlib.h> +#include "utils.h" + +int +main() +{ + pixman_image_t *dst; + pixman_trapezoid_t traps[] = { + { + 2147483646, + 2147483647, + { + { 0, 0 }, + { 0, 2147483647 } + }, + { + { 65536, 0 }, + { 0, 2147483647 } + } + }, + { + 32768, + - 2147483647, + { + { 0, 0 }, + { 0, 2147483647 } + }, + { + { 65536, 0 }, + { 0, 2147483647 } + } + }, + }; + + dst = pixman_image_create_bits (PIXMAN_a8, 1, 1, NULL, -1); + + pixman_add_trapezoids (dst, 0, 0, ARRAY_LENGTH (traps), traps); + return (0); +} diff --git a/src/pixman/test/utils-prng.c b/src/pixman/test/utils-prng.c new file mode 100644 index 0000000..c27b5be --- /dev/null +++ b/src/pixman/test/utils-prng.c @@ -0,0 +1,298 @@ +/* + * Copyright © 2012 Siarhei Siamashka <siarhei.siamashka@gmail.com> + * + * Based on the public domain implementation of small noncryptographic PRNG + * authored by Bob Jenkins: http://burtleburtle.net/bob/rand/smallprng.html + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "utils.h" +#include "utils-prng.h" + +#if defined(HAVE_GCC_VECTOR_EXTENSIONS) && defined(__SSE2__) +#include <xmmintrin.h> +#endif + +void smallprng_srand_r (smallprng_t *x, uint32_t seed) +{ + uint32_t i; + x->a = 0xf1ea5eed, x->b = x->c = x->d = seed; + for (i = 0; i < 20; ++i) + smallprng_rand_r (x); +} + +/* + * Set a 32-bit seed for PRNG + * + * LCG is used here for generating independent seeds for different + * smallprng instances (in the case if smallprng is also used for + * generating these seeds, "Big Crush" test from TestU01 detects + * some problems in the glued 'prng_rand_128_r' output data). + * Actually we might be even better using some cryptographic + * hash for this purpose, but LCG seems to be also enough for + * passing "Big Crush". + */ +void prng_srand_r (prng_t *x, uint32_t seed) +{ +#ifdef HAVE_GCC_VECTOR_EXTENSIONS + int i; + prng_rand_128_data_t dummy; + smallprng_srand_r (&x->p0, seed); + x->a[0] = x->a[1] = x->a[2] = x->a[3] = 0xf1ea5eed; + x->b[0] = x->c[0] = x->d[0] = (seed = seed * 1103515245 + 12345); + x->b[1] = x->c[1] = x->d[1] = (seed = seed * 1103515245 + 12345); + x->b[2] = x->c[2] = x->d[2] = (seed = seed * 1103515245 + 12345); + x->b[3] = x->c[3] = x->d[3] = (seed = seed * 1103515245 + 12345); + for (i = 0; i < 20; ++i) + prng_rand_128_r (x, &dummy); +#else + smallprng_srand_r (&x->p0, seed); + smallprng_srand_r (&x->p1, (seed = seed * 1103515245 + 12345)); + smallprng_srand_r (&x->p2, (seed = seed * 1103515245 + 12345)); + smallprng_srand_r (&x->p3, (seed = seed * 1103515245 + 12345)); + smallprng_srand_r (&x->p4, (seed = seed * 1103515245 + 12345)); +#endif +} + +static force_inline void +store_rand_128_data (void *addr, prng_rand_128_data_t *d, int aligned) +{ +#ifdef HAVE_GCC_VECTOR_EXTENSIONS + if (aligned) + { + *(uint8x16 *)addr = d->vb; + return; + } + else + { +#ifdef __SSE2__ + /* workaround for http://gcc.gnu.org/PR55614 */ + _mm_storeu_si128 (addr, _mm_loadu_si128 ((__m128i *)d)); + return; +#endif + } +#endif + /* we could try something better for unaligned writes (packed attribute), + * but GCC is not very reliable: http://gcc.gnu.org/PR55454 */ + memcpy (addr, d, 16); +} + +/* + * Helper function and the actual code for "prng_randmemset_r" function + */ +static force_inline void +randmemset_internal (prng_t *prng, + uint8_t *buf, + size_t size, + prng_randmemset_flags_t flags, + int aligned) +{ + prng_t local_prng = *prng; + prng_rand_128_data_t randdata; + size_t i; + + while (size >= 16) + { + prng_rand_128_data_t t; + if (flags == 0) + { + prng_rand_128_r (&local_prng, &randdata); + } + else + { + prng_rand_128_r (&local_prng, &t); + prng_rand_128_r (&local_prng, &randdata); +#ifdef HAVE_GCC_VECTOR_EXTENSIONS + if (flags & RANDMEMSET_MORE_FF) + { + const uint8x16 const_C0 = + { + 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, + 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0 + }; + randdata.vb |= (t.vb >= const_C0); + } + if (flags & RANDMEMSET_MORE_00) + { + const uint8x16 const_40 = + { + 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40 + }; + randdata.vb &= (t.vb >= const_40); + } + if (flags & RANDMEMSET_MORE_FFFFFFFF) + { + const uint32x4 const_C0000000 = + { + 0xC0000000, 0xC0000000, 0xC0000000, 0xC0000000 + }; + randdata.vw |= ((t.vw << 30) >= const_C0000000); + } + if (flags & RANDMEMSET_MORE_00000000) + { + const uint32x4 const_40000000 = + { + 0x40000000, 0x40000000, 0x40000000, 0x40000000 + }; + randdata.vw &= ((t.vw << 30) >= const_40000000); + } +#else + #define PROCESS_ONE_LANE(i) \ + if (flags & RANDMEMSET_MORE_FF) \ + { \ + uint32_t mask_ff = (t.w[i] & (t.w[i] << 1)) & 0x80808080; \ + mask_ff |= mask_ff >> 1; \ + mask_ff |= mask_ff >> 2; \ + mask_ff |= mask_ff >> 4; \ + randdata.w[i] |= mask_ff; \ + } \ + if (flags & RANDMEMSET_MORE_00) \ + { \ + uint32_t mask_00 = (t.w[i] | (t.w[i] << 1)) & 0x80808080; \ + mask_00 |= mask_00 >> 1; \ + mask_00 |= mask_00 >> 2; \ + mask_00 |= mask_00 >> 4; \ + randdata.w[i] &= mask_00; \ + } \ + if (flags & RANDMEMSET_MORE_FFFFFFFF) \ + { \ + int32_t mask_ff = ((t.w[i] << 30) & (t.w[i] << 31)) & \ + 0x80000000; \ + randdata.w[i] |= mask_ff >> 31; \ + } \ + if (flags & RANDMEMSET_MORE_00000000) \ + { \ + int32_t mask_00 = ((t.w[i] << 30) | (t.w[i] << 31)) & \ + 0x80000000; \ + randdata.w[i] &= mask_00 >> 31; \ + } + + PROCESS_ONE_LANE (0) + PROCESS_ONE_LANE (1) + PROCESS_ONE_LANE (2) + PROCESS_ONE_LANE (3) +#endif + } + if (is_little_endian ()) + { + store_rand_128_data (buf, &randdata, aligned); + buf += 16; + } + else + { +#ifdef HAVE_GCC_VECTOR_EXTENSIONS + const uint8x16 bswap_shufflemask = + { + 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 + }; + randdata.vb = __builtin_shuffle (randdata.vb, bswap_shufflemask); + store_rand_128_data (buf, &randdata, aligned); + buf += 16; +#else + uint8_t t1, t2, t3, t4; + #define STORE_ONE_LANE(i) \ + t1 = randdata.b[i * 4 + 3]; \ + t2 = randdata.b[i * 4 + 2]; \ + t3 = randdata.b[i * 4 + 1]; \ + t4 = randdata.b[i * 4 + 0]; \ + *buf++ = t1; \ + *buf++ = t2; \ + *buf++ = t3; \ + *buf++ = t4; + + STORE_ONE_LANE (0) + STORE_ONE_LANE (1) + STORE_ONE_LANE (2) + STORE_ONE_LANE (3) +#endif + } + size -= 16; + } + i = 0; + while (i < size) + { + uint8_t randbyte = prng_rand_r (&local_prng) & 0xFF; + if (flags != 0) + { + uint8_t t = prng_rand_r (&local_prng) & 0xFF; + if ((flags & RANDMEMSET_MORE_FF) && (t >= 0xC0)) + randbyte = 0xFF; + if ((flags & RANDMEMSET_MORE_00) && (t < 0x40)) + randbyte = 0x00; + if (i % 4 == 0 && i + 4 <= size) + { + t = prng_rand_r (&local_prng) & 0xFF; + if ((flags & RANDMEMSET_MORE_FFFFFFFF) && (t >= 0xC0)) + { + memset(&buf[i], 0xFF, 4); + i += 4; + continue; + } + if ((flags & RANDMEMSET_MORE_00000000) && (t < 0x40)) + { + memset(&buf[i], 0x00, 4); + i += 4; + continue; + } + } + } + buf[i] = randbyte; + i++; + } + *prng = local_prng; +} + +/* + * Fill memory buffer with random data. Flags argument may be used + * to tweak some statistics properties: + * RANDMEMSET_MORE_00 - set ~25% of bytes to 0x00 + * RANDMEMSET_MORE_FF - set ~25% of bytes to 0xFF + * RANDMEMSET_MORE_00000000 - ~25% chance for 00000000 4-byte clusters + * RANDMEMSET_MORE_FFFFFFFF - ~25% chance for FFFFFFFF 4-byte clusters + */ +void prng_randmemset_r (prng_t *prng, + void *voidbuf, + size_t size, + prng_randmemset_flags_t flags) +{ + uint8_t *buf = (uint8_t *)voidbuf; + if ((uintptr_t)buf & 15) + { + /* unaligned buffer */ + if (flags == 0) + randmemset_internal (prng, buf, size, 0, 0); + else if (flags == RANDMEMSET_MORE_00_AND_FF) + randmemset_internal (prng, buf, size, RANDMEMSET_MORE_00_AND_FF, 0); + else + randmemset_internal (prng, buf, size, flags, 0); + } + else + { + /* aligned buffer */ + if (flags == 0) + randmemset_internal (prng, buf, size, 0, 1); + else if (flags == RANDMEMSET_MORE_00_AND_FF) + randmemset_internal (prng, buf, size, RANDMEMSET_MORE_00_AND_FF, 1); + else + randmemset_internal (prng, buf, size, flags, 1); + } +} diff --git a/src/pixman/test/utils-prng.h b/src/pixman/test/utils-prng.h new file mode 100644 index 0000000..f9ae8dd --- /dev/null +++ b/src/pixman/test/utils-prng.h @@ -0,0 +1,170 @@ +/* + * Copyright © 2012 Siarhei Siamashka <siarhei.siamashka@gmail.com> + * + * Based on the public domain implementation of small noncryptographic PRNG + * authored by Bob Jenkins: http://burtleburtle.net/bob/rand/smallprng.html + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __UTILS_PRNG_H__ +#define __UTILS_PRNG_H__ + +/* + * This file provides a fast SIMD-optimized noncryptographic PRNG (pseudorandom + * number generator), with the output good enough to pass "Big Crush" tests + * from TestU01 (http://en.wikipedia.org/wiki/TestU01). + * + * SIMD code uses http://gcc.gnu.org/onlinedocs/gcc/Vector-Extensions.html + * which is a GCC specific extension. There is also a slower alternative + * code path, which should work with any C compiler. + * + * The "prng_t" structure keeps the internal state of the random number + * generator. It is possible to have multiple instances of the random number + * generator active at the same time, in this case each of them needs to have + * its own "prng_t". All the functions take a pointer to "prng_t" + * as the first argument. + * + * Functions: + * + * ---------------------------------------------------------------------------- + * void prng_srand_r (prng_t *prng, uint32_t seed); + * + * Initialize the pseudorandom number generator. The sequence of preudorandom + * numbers is deterministic and only depends on "seed". Any two generators + * initialized with the same seed will produce exactly the same sequence. + * + * ---------------------------------------------------------------------------- + * uint32_t prng_rand_r (prng_t *prng); + * + * Generate a single uniformly distributed 32-bit pseudorandom value. + * + * ---------------------------------------------------------------------------- + * void prng_randmemset_r (prng_t *prng, + * void *buffer, + * size_t size, + * prng_randmemset_flags_t flags); + * + * Fills the memory buffer "buffer" with "size" bytes of pseudorandom data. + * The "flags" argument may be used to tweak some statistics properties: + * RANDMEMSET_MORE_00 - set ~25% of bytes to 0x00 + * RANDMEMSET_MORE_FF - set ~25% of bytes to 0xFF + * The flags can be combined. This allows a bit better simulation of typical + * pixel data, which normally contains a lot of fully transparent or fully + * opaque pixels. + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include "pixman-private.h" + +/*****************************************************************************/ + +#ifdef HAVE_GCC_VECTOR_EXTENSIONS +typedef uint32_t uint32x4 __attribute__ ((vector_size(16))); +typedef uint8_t uint8x16 __attribute__ ((vector_size(16))); +#endif + +typedef struct +{ + uint32_t a, b, c, d; +} smallprng_t; + +typedef struct +{ +#ifdef HAVE_GCC_VECTOR_EXTENSIONS + uint32x4 a, b, c, d; +#else + smallprng_t p1, p2, p3, p4; +#endif + smallprng_t p0; +} prng_t; + +typedef union +{ + uint8_t b[16]; + uint32_t w[4]; +#ifdef HAVE_GCC_VECTOR_EXTENSIONS + uint8x16 vb; + uint32x4 vw; +#endif +} prng_rand_128_data_t; + +/*****************************************************************************/ + +static force_inline uint32_t +smallprng_rand_r (smallprng_t *x) +{ + uint32_t e = x->a - ((x->b << 27) + (x->b >> (32 - 27))); + x->a = x->b ^ ((x->c << 17) ^ (x->c >> (32 - 17))); + x->b = x->c + x->d; + x->c = x->d + e; + x->d = e + x->a; + return x->d; +} + +/* Generate 4 bytes (32-bits) of random data */ +static force_inline uint32_t +prng_rand_r (prng_t *x) +{ + return smallprng_rand_r (&x->p0); +} + +/* Generate 16 bytes (128-bits) of random data */ +static force_inline void +prng_rand_128_r (prng_t *x, prng_rand_128_data_t *data) +{ +#ifdef HAVE_GCC_VECTOR_EXTENSIONS + uint32x4 e = x->a - ((x->b << 27) + (x->b >> (32 - 27))); + x->a = x->b ^ ((x->c << 17) ^ (x->c >> (32 - 17))); + x->b = x->c + x->d; + x->c = x->d + e; + x->d = e + x->a; + data->vw = x->d; +#else + data->w[0] = smallprng_rand_r (&x->p1); + data->w[1] = smallprng_rand_r (&x->p2); + data->w[2] = smallprng_rand_r (&x->p3); + data->w[3] = smallprng_rand_r (&x->p4); +#endif +} + +typedef enum +{ + RANDMEMSET_MORE_00 = 1, /* ~25% chance for 0x00 bytes */ + RANDMEMSET_MORE_FF = 2, /* ~25% chance for 0xFF bytes */ + RANDMEMSET_MORE_00000000 = 4, /* ~25% chance for 0x00000000 clusters */ + RANDMEMSET_MORE_FFFFFFFF = 8, /* ~25% chance for 0xFFFFFFFF clusters */ + RANDMEMSET_MORE_00_AND_FF = (RANDMEMSET_MORE_00 | RANDMEMSET_MORE_00000000 | + RANDMEMSET_MORE_FF | RANDMEMSET_MORE_FFFFFFFF) +} prng_randmemset_flags_t; + +/* Set the 32-bit seed for PRNG */ +void prng_srand_r (prng_t *prng, uint32_t seed); + +/* Fill memory buffer with random data */ +void prng_randmemset_r (prng_t *prng, + void *buffer, + size_t size, + prng_randmemset_flags_t flags); + +#endif diff --git a/src/pixman/test/utils.c b/src/pixman/test/utils.c new file mode 100644 index 0000000..ebe0ccc --- /dev/null +++ b/src/pixman/test/utils.c @@ -0,0 +1,1618 @@ +#define _GNU_SOURCE + +#include "utils.h" +#include <math.h> +#include <signal.h> +#include <stdlib.h> + +#ifdef HAVE_GETTIMEOFDAY +#include <sys/time.h> +#else +#include <time.h> +#endif + +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif + +#ifdef HAVE_SYS_MMAN_H +#include <sys/mman.h> +#endif + +#ifdef HAVE_FENV_H +#include <fenv.h> +#endif + +#ifdef HAVE_LIBPNG +#include <png.h> +#endif + +/* Random number generator state + */ + +prng_t prng_state_data; +prng_t *prng_state; + +/*----------------------------------------------------------------------------*\ + * CRC-32 version 2.0.0 by Craig Bruce, 2006-04-29. + * + * This program generates the CRC-32 values for the files named in the + * command-line arguments. These are the same CRC-32 values used by GZIP, + * PKZIP, and ZMODEM. The Crc32_ComputeBuf () can also be detached and + * used independently. + * + * THIS PROGRAM IS PUBLIC-DOMAIN SOFTWARE. + * + * Based on the byte-oriented implementation "File Verification Using CRC" + * by Mark R. Nelson in Dr. Dobb's Journal, May 1992, pp. 64-67. + * + * v1.0.0: original release. + * v1.0.1: fixed printf formats. + * v1.0.2: fixed something else. + * v1.0.3: replaced CRC constant table by generator function. + * v1.0.4: reformatted code, made ANSI C. 1994-12-05. + * v2.0.0: rewrote to use memory buffer & static table, 2006-04-29. +\*----------------------------------------------------------------------------*/ + +/*----------------------------------------------------------------------------*\ + * NAME: + * Crc32_ComputeBuf () - computes the CRC-32 value of a memory buffer + * DESCRIPTION: + * Computes or accumulates the CRC-32 value for a memory buffer. + * The 'inCrc32' gives a previously accumulated CRC-32 value to allow + * a CRC to be generated for multiple sequential buffer-fuls of data. + * The 'inCrc32' for the first buffer must be zero. + * ARGUMENTS: + * inCrc32 - accumulated CRC-32 value, must be 0 on first call + * buf - buffer to compute CRC-32 value for + * bufLen - number of bytes in buffer + * RETURNS: + * crc32 - computed CRC-32 value + * ERRORS: + * (no errors are possible) +\*----------------------------------------------------------------------------*/ + +uint32_t +compute_crc32 (uint32_t in_crc32, + const void *buf, + size_t buf_len) +{ + static const uint32_t crc_table[256] = { + 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F, + 0xE963A535, 0x9E6495A3, 0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, + 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, 0x1DB71064, 0x6AB020F2, + 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, + 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9, + 0xFA0F3D63, 0x8D080DF5, 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, + 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, 0x35B5A8FA, 0x42B2986C, + 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, + 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, + 0xCFBA9599, 0xB8BDA50F, 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, + 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, 0x76DC4190, 0x01DB7106, + 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433, + 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x086D3D2D, + 0x91646C97, 0xE6635C01, 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, + 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, 0x65B0D9C6, 0x12B7E950, + 0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, + 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7, + 0xA4D1C46D, 0xD3D6F4FB, 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, + 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, 0x5005713C, 0x270241AA, + 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F, + 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81, + 0xB7BD5C3B, 0xC0BA6CAD, 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, + 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, 0xE3630B12, 0x94643B84, + 0x0D6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, + 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB, + 0x196C3671, 0x6E6B06E7, 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, + 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, 0xD6D6A3E8, 0xA1D1937E, + 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B, + 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, + 0x316E8EEF, 0x4669BE79, 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, + 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, 0xC5BA3BBE, 0xB2BD0B28, + 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, + 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, 0x9C0906A9, 0xEB0E363F, + 0x72076785, 0x05005713, 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, + 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, 0x86D3D2D4, 0xF1D4E242, + 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, + 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69, + 0x616BFFD3, 0x166CCF45, 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, + 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, 0xAED16A4A, 0xD9D65ADC, + 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9, + 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693, + 0x54DE5729, 0x23D967BF, 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, + 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D + }; + + uint32_t crc32; + unsigned char * byte_buf; + size_t i; + + /* accumulate crc32 for buffer */ + crc32 = in_crc32 ^ 0xFFFFFFFF; + byte_buf = (unsigned char*) buf; + + for (i = 0; i < buf_len; i++) + crc32 = (crc32 >> 8) ^ crc_table[(crc32 ^ byte_buf[i]) & 0xFF]; + + return (crc32 ^ 0xFFFFFFFF); +} + +static uint32_t +compute_crc32_for_image_internal (uint32_t crc32, + pixman_image_t *img, + pixman_bool_t remove_alpha, + pixman_bool_t remove_rgb) +{ + pixman_format_code_t fmt = pixman_image_get_format (img); + uint32_t *data = pixman_image_get_data (img); + int stride = pixman_image_get_stride (img); + int height = pixman_image_get_height (img); + uint32_t mask = 0xffffffff; + int i; + + if (stride < 0) + { + data += (stride / 4) * (height - 1); + stride = - stride; + } + + /* mask unused 'x' part */ + if (PIXMAN_FORMAT_BPP (fmt) - PIXMAN_FORMAT_DEPTH (fmt) && + PIXMAN_FORMAT_DEPTH (fmt) != 0) + { + uint32_t m = (1 << PIXMAN_FORMAT_DEPTH (fmt)) - 1; + + if (PIXMAN_FORMAT_TYPE (fmt) == PIXMAN_TYPE_BGRA || + PIXMAN_FORMAT_TYPE (fmt) == PIXMAN_TYPE_RGBA) + { + m <<= (PIXMAN_FORMAT_BPP (fmt) - PIXMAN_FORMAT_DEPTH (fmt)); + } + + mask &= m; + } + + /* mask alpha channel */ + if (remove_alpha && PIXMAN_FORMAT_A (fmt)) + { + uint32_t m; + + if (PIXMAN_FORMAT_BPP (fmt) == 32) + m = 0xffffffff; + else + m = (1 << PIXMAN_FORMAT_BPP (fmt)) - 1; + + m >>= PIXMAN_FORMAT_A (fmt); + + if (PIXMAN_FORMAT_TYPE (fmt) == PIXMAN_TYPE_BGRA || + PIXMAN_FORMAT_TYPE (fmt) == PIXMAN_TYPE_RGBA || + PIXMAN_FORMAT_TYPE (fmt) == PIXMAN_TYPE_A) + { + /* Alpha is at the bottom of the pixel */ + m <<= PIXMAN_FORMAT_A (fmt); + } + + mask &= m; + } + + /* mask rgb channels */ + if (remove_rgb && PIXMAN_FORMAT_RGB (fmt)) + { + uint32_t m = ((uint32_t)~0) >> (32 - PIXMAN_FORMAT_BPP (fmt)); + uint32_t size = PIXMAN_FORMAT_R (fmt) + PIXMAN_FORMAT_G (fmt) + PIXMAN_FORMAT_B (fmt); + + m &= ~((1 << size) - 1); + + if (PIXMAN_FORMAT_TYPE (fmt) == PIXMAN_TYPE_BGRA || + PIXMAN_FORMAT_TYPE (fmt) == PIXMAN_TYPE_RGBA) + { + /* RGB channels are at the top of the pixel */ + m >>= size; + } + + mask &= m; + } + + for (i = 0; i * PIXMAN_FORMAT_BPP (fmt) < 32; i++) + mask |= mask << (i * PIXMAN_FORMAT_BPP (fmt)); + + for (i = 0; i < stride * height / 4; i++) + data[i] &= mask; + + /* swap endiannes in order to provide identical results on both big + * and litte endian systems + */ + image_endian_swap (img); + + return compute_crc32 (crc32, data, stride * height); +} + +uint32_t +compute_crc32_for_image (uint32_t crc32, + pixman_image_t *img) +{ + if (img->common.alpha_map) + { + crc32 = compute_crc32_for_image_internal (crc32, img, TRUE, FALSE); + crc32 = compute_crc32_for_image_internal ( + crc32, (pixman_image_t *)img->common.alpha_map, FALSE, TRUE); + } + else + { + crc32 = compute_crc32_for_image_internal (crc32, img, FALSE, FALSE); + } + + return crc32; +} + +void +print_image (pixman_image_t *image) +{ + int i, j; + int width, height, stride; + pixman_format_code_t format; + uint8_t *buffer; + int s; + + width = pixman_image_get_width (image); + height = pixman_image_get_height (image); + stride = pixman_image_get_stride (image); + format = pixman_image_get_format (image); + buffer = (uint8_t *)pixman_image_get_data (image); + + s = (stride >= 0)? stride : - stride; + + printf ("---\n"); + for (i = 0; i < height; i++) + { + for (j = 0; j < s; j++) + { + if (j == (width * PIXMAN_FORMAT_BPP (format) + 7) / 8) + printf ("| "); + + printf ("%02X ", *((uint8_t *)buffer + i * stride + j)); + } + printf ("\n"); + } + printf ("---\n"); +} + +/* perform endian conversion of pixel data + */ +void +image_endian_swap (pixman_image_t *img) +{ + int stride = pixman_image_get_stride (img); + uint32_t *data = pixman_image_get_data (img); + int height = pixman_image_get_height (img); + int bpp = PIXMAN_FORMAT_BPP (pixman_image_get_format (img)); + int i, j; + + /* swap bytes only on big endian systems */ + if (is_little_endian()) + return; + + if (bpp == 8) + return; + + for (i = 0; i < height; i++) + { + uint8_t *line_data = (uint8_t *)data + stride * i; + int s = (stride >= 0)? stride : - stride; + + switch (bpp) + { + case 1: + for (j = 0; j < s; j++) + { + line_data[j] = + ((line_data[j] & 0x80) >> 7) | + ((line_data[j] & 0x40) >> 5) | + ((line_data[j] & 0x20) >> 3) | + ((line_data[j] & 0x10) >> 1) | + ((line_data[j] & 0x08) << 1) | + ((line_data[j] & 0x04) << 3) | + ((line_data[j] & 0x02) << 5) | + ((line_data[j] & 0x01) << 7); + } + break; + case 4: + for (j = 0; j < s; j++) + { + line_data[j] = (line_data[j] >> 4) | (line_data[j] << 4); + } + break; + case 16: + for (j = 0; j + 2 <= s; j += 2) + { + char t1 = line_data[j + 0]; + char t2 = line_data[j + 1]; + + line_data[j + 1] = t1; + line_data[j + 0] = t2; + } + break; + case 24: + for (j = 0; j + 3 <= s; j += 3) + { + char t1 = line_data[j + 0]; + char t2 = line_data[j + 1]; + char t3 = line_data[j + 2]; + + line_data[j + 2] = t1; + line_data[j + 1] = t2; + line_data[j + 0] = t3; + } + break; + case 32: + for (j = 0; j + 4 <= s; j += 4) + { + char t1 = line_data[j + 0]; + char t2 = line_data[j + 1]; + char t3 = line_data[j + 2]; + char t4 = line_data[j + 3]; + + line_data[j + 3] = t1; + line_data[j + 2] = t2; + line_data[j + 1] = t3; + line_data[j + 0] = t4; + } + break; + default: + assert (FALSE); + break; + } + } +} + +#define N_LEADING_PROTECTED 10 +#define N_TRAILING_PROTECTED 10 + +typedef struct +{ + void *addr; + uint32_t len; + uint8_t *trailing; + int n_bytes; +} info_t; + +#if defined(HAVE_MPROTECT) && defined(HAVE_GETPAGESIZE) && defined(HAVE_SYS_MMAN_H) && defined(HAVE_MMAP) + +/* This is apparently necessary on at least OS X */ +#ifndef MAP_ANONYMOUS +#define MAP_ANONYMOUS MAP_ANON +#endif + +void * +fence_malloc (int64_t len) +{ + unsigned long page_size = getpagesize(); + unsigned long page_mask = page_size - 1; + uint32_t n_payload_bytes = (len + page_mask) & ~page_mask; + uint32_t n_bytes = + (page_size * (N_LEADING_PROTECTED + N_TRAILING_PROTECTED + 2) + + n_payload_bytes) & ~page_mask; + uint8_t *initial_page; + uint8_t *leading_protected; + uint8_t *trailing_protected; + uint8_t *payload; + uint8_t *addr; + + if (len < 0) + abort(); + + addr = mmap (NULL, n_bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, + -1, 0); + + if (addr == MAP_FAILED) + { + printf ("mmap failed on %lld %u\n", (long long int)len, n_bytes); + return NULL; + } + + initial_page = (uint8_t *)(((uintptr_t)addr + page_mask) & ~page_mask); + leading_protected = initial_page + page_size; + payload = leading_protected + N_LEADING_PROTECTED * page_size; + trailing_protected = payload + n_payload_bytes; + + ((info_t *)initial_page)->addr = addr; + ((info_t *)initial_page)->len = len; + ((info_t *)initial_page)->trailing = trailing_protected; + ((info_t *)initial_page)->n_bytes = n_bytes; + + if ((mprotect (leading_protected, N_LEADING_PROTECTED * page_size, + PROT_NONE) == -1) || + (mprotect (trailing_protected, N_TRAILING_PROTECTED * page_size, + PROT_NONE) == -1)) + { + munmap (addr, n_bytes); + return NULL; + } + + return payload; +} + +void +fence_free (void *data) +{ + uint32_t page_size = getpagesize(); + uint8_t *payload = data; + uint8_t *leading_protected = payload - N_LEADING_PROTECTED * page_size; + uint8_t *initial_page = leading_protected - page_size; + info_t *info = (info_t *)initial_page; + + munmap (info->addr, info->n_bytes); +} + +#else + +void * +fence_malloc (int64_t len) +{ + return malloc (len); +} + +void +fence_free (void *data) +{ + free (data); +} + +#endif + +uint8_t * +make_random_bytes (int n_bytes) +{ + uint8_t *bytes = fence_malloc (n_bytes); + + if (!bytes) + return NULL; + + prng_randmemset (bytes, n_bytes, 0); + + return bytes; +} + +void +a8r8g8b8_to_rgba_np (uint32_t *dst, uint32_t *src, int n_pixels) +{ + uint8_t *dst8 = (uint8_t *)dst; + int i; + + for (i = 0; i < n_pixels; ++i) + { + uint32_t p = src[i]; + uint8_t a, r, g, b; + + a = (p & 0xff000000) >> 24; + r = (p & 0x00ff0000) >> 16; + g = (p & 0x0000ff00) >> 8; + b = (p & 0x000000ff) >> 0; + + if (a != 0) + { +#define DIVIDE(c, a) \ + do \ + { \ + int t = ((c) * 255) / a; \ + (c) = t < 0? 0 : t > 255? 255 : t; \ + } while (0) + + DIVIDE (r, a); + DIVIDE (g, a); + DIVIDE (b, a); + } + + *dst8++ = r; + *dst8++ = g; + *dst8++ = b; + *dst8++ = a; + } +} + +#ifdef HAVE_LIBPNG + +pixman_bool_t +write_png (pixman_image_t *image, const char *filename) +{ + int width = pixman_image_get_width (image); + int height = pixman_image_get_height (image); + int stride = width * 4; + uint32_t *data = malloc (height * stride); + pixman_image_t *copy; + png_struct *write_struct; + png_info *info_struct; + pixman_bool_t result = FALSE; + FILE *f = fopen (filename, "wb"); + png_bytep *row_pointers; + int i; + + if (!f) + return FALSE; + + row_pointers = malloc (height * sizeof (png_bytep)); + + copy = pixman_image_create_bits ( + PIXMAN_a8r8g8b8, width, height, data, stride); + + pixman_image_composite32 ( + PIXMAN_OP_SRC, image, NULL, copy, 0, 0, 0, 0, 0, 0, width, height); + + a8r8g8b8_to_rgba_np (data, data, height * width); + + for (i = 0; i < height; ++i) + row_pointers[i] = (png_bytep)(data + i * width); + + if (!(write_struct = png_create_write_struct ( + PNG_LIBPNG_VER_STRING, NULL, NULL, NULL))) + goto out1; + + if (!(info_struct = png_create_info_struct (write_struct))) + goto out2; + + png_init_io (write_struct, f); + + png_set_IHDR (write_struct, info_struct, width, height, + 8, PNG_COLOR_TYPE_RGB_ALPHA, + PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_BASE, + PNG_FILTER_TYPE_BASE); + + png_write_info (write_struct, info_struct); + + png_write_image (write_struct, row_pointers); + + png_write_end (write_struct, NULL); + + result = TRUE; + +out2: + png_destroy_write_struct (&write_struct, &info_struct); + +out1: + if (fclose (f) != 0) + result = FALSE; + + pixman_image_unref (copy); + free (row_pointers); + free (data); + return result; +} + +#else /* no libpng */ + +pixman_bool_t +write_png (pixman_image_t *image, const char *filename) +{ + return FALSE; +} + +#endif + +static void +color8_to_color16 (uint32_t color8, pixman_color_t *color16) +{ + color16->alpha = ((color8 & 0xff000000) >> 24); + color16->red = ((color8 & 0x00ff0000) >> 16); + color16->green = ((color8 & 0x0000ff00) >> 8); + color16->blue = ((color8 & 0x000000ff) >> 0); + + color16->alpha |= color16->alpha << 8; + color16->red |= color16->red << 8; + color16->blue |= color16->blue << 8; + color16->green |= color16->green << 8; +} + +void +draw_checkerboard (pixman_image_t *image, + int check_size, + uint32_t color1, uint32_t color2) +{ + pixman_color_t check1, check2; + pixman_image_t *c1, *c2; + int n_checks_x, n_checks_y; + int i, j; + + color8_to_color16 (color1, &check1); + color8_to_color16 (color2, &check2); + + c1 = pixman_image_create_solid_fill (&check1); + c2 = pixman_image_create_solid_fill (&check2); + + n_checks_x = ( + pixman_image_get_width (image) + check_size - 1) / check_size; + n_checks_y = ( + pixman_image_get_height (image) + check_size - 1) / check_size; + + for (j = 0; j < n_checks_y; j++) + { + for (i = 0; i < n_checks_x; i++) + { + pixman_image_t *src; + + if (((i ^ j) & 1)) + src = c1; + else + src = c2; + + pixman_image_composite32 (PIXMAN_OP_SRC, src, NULL, image, + 0, 0, 0, 0, + i * check_size, j * check_size, + check_size, check_size); + } + } +} + +static uint32_t +call_test_function (uint32_t (*test_function)(int testnum, int verbose), + int testnum, + int verbose) +{ + uint32_t retval; + +#if defined (__GNUC__) && defined (_WIN32) && (defined (__i386) || defined (__i386__)) + __asm__ ( + /* Deliberately avoid aligning the stack to 16 bytes */ + "pushl %1\n\t" + "pushl %2\n\t" + "call *%3\n\t" + "addl $8, %%esp\n\t" + : "=a" (retval) + : "r" (verbose), + "r" (testnum), + "r" (test_function) + : "edx", "ecx"); /* caller save registers */ +#else + retval = test_function (testnum, verbose); +#endif + + return retval; +} + +/* + * A function, which can be used as a core part of the test programs, + * intended to detect various problems with the help of fuzzing input + * to pixman API (according to some templates, aka "smart" fuzzing). + * Some general information about such testing can be found here: + * http://en.wikipedia.org/wiki/Fuzz_testing + * + * It may help detecting: + * - crashes on bad handling of valid or reasonably invalid input to + * pixman API. + * - deviations from the behavior of older pixman releases. + * - deviations from the behavior of the same pixman release, but + * configured in a different way (for example with SIMD optimizations + * disabled), or running on a different OS or hardware. + * + * The test is performed by calling a callback function a huge number + * of times. The callback function is expected to run some snippet of + * pixman code with pseudorandom variations to the data feeded to + * pixman API. A result of running each callback function should be + * some deterministic value which depends on test number (test number + * can be used as a seed for PRNG). When 'verbose' argument is nonzero, + * callback function is expected to print to stdout some information + * about what it does. + * + * Return values from many small tests are accumulated together and + * used as final checksum, which can be compared to some expected + * value. Running the tests not individually, but in a batch helps + * to reduce process start overhead and also allows to parallelize + * testing and utilize multiple CPU cores. + * + * The resulting executable can be run without any arguments. In + * this case it runs a batch of tests starting from 1 and up to + * 'default_number_of_iterations'. The resulting checksum is + * compared with 'expected_checksum' and FAIL or PASS verdict + * depends on the result of this comparison. + * + * If the executable is run with 2 numbers provided as command line + * arguments, they specify the starting and ending numbers for a test + * batch. + * + * If the executable is run with only one number provided as a command + * line argument, then this number is used to call the callback function + * once, and also with verbose flag set. + */ +int +fuzzer_test_main (const char *test_name, + int default_number_of_iterations, + uint32_t expected_checksum, + uint32_t (*test_function)(int testnum, int verbose), + int argc, + const char *argv[]) +{ + int i, n1 = 1, n2 = 0; + uint32_t checksum = 0; + int verbose = getenv ("VERBOSE") != NULL; + + if (argc >= 3) + { + n1 = atoi (argv[1]); + n2 = atoi (argv[2]); + if (n2 < n1) + { + printf ("invalid test range\n"); + return 1; + } + } + else if (argc >= 2) + { + n2 = atoi (argv[1]); + + checksum = call_test_function (test_function, n2, 1); + + printf ("%d: checksum=%08X\n", n2, checksum); + return 0; + } + else + { + n1 = 1; + n2 = default_number_of_iterations; + } + +#ifdef USE_OPENMP + #pragma omp parallel for reduction(+:checksum) default(none) \ + shared(n1, n2, test_function, verbose) +#endif + for (i = n1; i <= n2; i++) + { + uint32_t crc = call_test_function (test_function, i, 0); + if (verbose) + printf ("%d: %08X\n", i, crc); + checksum += crc; + } + + if (n1 == 1 && n2 == default_number_of_iterations) + { + if (checksum == expected_checksum) + { + printf ("%s test passed (checksum=%08X)\n", + test_name, checksum); + } + else + { + printf ("%s test failed! (checksum=%08X, expected %08X)\n", + test_name, checksum, expected_checksum); + return 1; + } + } + else + { + printf ("%d-%d: checksum=%08X\n", n1, n2, checksum); + } + + return 0; +} + +/* Try to obtain current time in seconds */ +double +gettime (void) +{ +#ifdef HAVE_GETTIMEOFDAY + struct timeval tv; + + gettimeofday (&tv, NULL); + return (double)((int64_t)tv.tv_sec * 1000000 + tv.tv_usec) / 1000000.; +#else + return (double)clock() / (double)CLOCKS_PER_SEC; +#endif +} + +uint32_t +get_random_seed (void) +{ + union { double d; uint32_t u32; } t; + t.d = gettime(); + prng_srand (t.u32); + + return prng_rand (); +} + +#ifdef HAVE_SIGACTION +#ifdef HAVE_ALARM +static const char *global_msg; + +static void +on_alarm (int signo) +{ + printf ("%s\n", global_msg); + exit (1); +} +#endif +#endif + +void +fail_after (int seconds, const char *msg) +{ +#ifdef HAVE_SIGACTION +#ifdef HAVE_ALARM + struct sigaction action; + + global_msg = msg; + + memset (&action, 0, sizeof (action)); + action.sa_handler = on_alarm; + + alarm (seconds); + + sigaction (SIGALRM, &action, NULL); +#endif +#endif +} + +void +enable_divbyzero_exceptions (void) +{ +#ifdef HAVE_FENV_H +#ifdef HAVE_FEENABLEEXCEPT + feenableexcept (FE_DIVBYZERO); +#endif +#endif +} + +void * +aligned_malloc (size_t align, size_t size) +{ + void *result; + +#ifdef HAVE_POSIX_MEMALIGN + if (posix_memalign (&result, align, size) != 0) + result = NULL; +#else + result = malloc (size); +#endif + + return result; +} + +#define CONVERT_15(c, is_rgb) \ + (is_rgb? \ + ((((c) >> 3) & 0x001f) | \ + (((c) >> 6) & 0x03e0) | \ + (((c) >> 9) & 0x7c00)) : \ + (((((c) >> 16) & 0xff) * 153 + \ + (((c) >> 8) & 0xff) * 301 + \ + (((c) ) & 0xff) * 58) >> 2)) + +double +convert_srgb_to_linear (double c) +{ + if (c <= 0.04045) + return c / 12.92; + else + return pow ((c + 0.055) / 1.055, 2.4); +} + +double +convert_linear_to_srgb (double c) +{ + if (c <= 0.0031308) + return c * 12.92; + else + return 1.055 * pow (c, 1.0/2.4) - 0.055; +} + +void +initialize_palette (pixman_indexed_t *palette, uint32_t depth, int is_rgb) +{ + int i; + uint32_t mask = (1 << depth) - 1; + + for (i = 0; i < 32768; ++i) + palette->ent[i] = prng_rand() & mask; + + memset (palette->rgba, 0, sizeof (palette->rgba)); + + for (i = 0; i < mask + 1; ++i) + { + uint32_t rgba24; + pixman_bool_t retry; + uint32_t i15; + + /* We filled the rgb->index map with random numbers, but we + * do need the ability to round trip, that is if some indexed + * color expands to an argb24, then the 15 bit version of that + * color must map back to the index. Anything else, we don't + * care about too much. + */ + do + { + uint32_t old_idx; + + rgba24 = prng_rand(); + i15 = CONVERT_15 (rgba24, is_rgb); + + old_idx = palette->ent[i15]; + if (CONVERT_15 (palette->rgba[old_idx], is_rgb) == i15) + retry = 1; + else + retry = 0; + } while (retry); + + palette->rgba[i] = rgba24; + palette->ent[i15] = i; + } + + for (i = 0; i < mask + 1; ++i) + { + assert (palette->ent[CONVERT_15 (palette->rgba[i], is_rgb)] == i); + } +} + +const char * +operator_name (pixman_op_t op) +{ + switch (op) + { + case PIXMAN_OP_CLEAR: return "PIXMAN_OP_CLEAR"; + case PIXMAN_OP_SRC: return "PIXMAN_OP_SRC"; + case PIXMAN_OP_DST: return "PIXMAN_OP_DST"; + case PIXMAN_OP_OVER: return "PIXMAN_OP_OVER"; + case PIXMAN_OP_OVER_REVERSE: return "PIXMAN_OP_OVER_REVERSE"; + case PIXMAN_OP_IN: return "PIXMAN_OP_IN"; + case PIXMAN_OP_IN_REVERSE: return "PIXMAN_OP_IN_REVERSE"; + case PIXMAN_OP_OUT: return "PIXMAN_OP_OUT"; + case PIXMAN_OP_OUT_REVERSE: return "PIXMAN_OP_OUT_REVERSE"; + case PIXMAN_OP_ATOP: return "PIXMAN_OP_ATOP"; + case PIXMAN_OP_ATOP_REVERSE: return "PIXMAN_OP_ATOP_REVERSE"; + case PIXMAN_OP_XOR: return "PIXMAN_OP_XOR"; + case PIXMAN_OP_ADD: return "PIXMAN_OP_ADD"; + case PIXMAN_OP_SATURATE: return "PIXMAN_OP_SATURATE"; + + case PIXMAN_OP_DISJOINT_CLEAR: return "PIXMAN_OP_DISJOINT_CLEAR"; + case PIXMAN_OP_DISJOINT_SRC: return "PIXMAN_OP_DISJOINT_SRC"; + case PIXMAN_OP_DISJOINT_DST: return "PIXMAN_OP_DISJOINT_DST"; + case PIXMAN_OP_DISJOINT_OVER: return "PIXMAN_OP_DISJOINT_OVER"; + case PIXMAN_OP_DISJOINT_OVER_REVERSE: return "PIXMAN_OP_DISJOINT_OVER_REVERSE"; + case PIXMAN_OP_DISJOINT_IN: return "PIXMAN_OP_DISJOINT_IN"; + case PIXMAN_OP_DISJOINT_IN_REVERSE: return "PIXMAN_OP_DISJOINT_IN_REVERSE"; + case PIXMAN_OP_DISJOINT_OUT: return "PIXMAN_OP_DISJOINT_OUT"; + case PIXMAN_OP_DISJOINT_OUT_REVERSE: return "PIXMAN_OP_DISJOINT_OUT_REVERSE"; + case PIXMAN_OP_DISJOINT_ATOP: return "PIXMAN_OP_DISJOINT_ATOP"; + case PIXMAN_OP_DISJOINT_ATOP_REVERSE: return "PIXMAN_OP_DISJOINT_ATOP_REVERSE"; + case PIXMAN_OP_DISJOINT_XOR: return "PIXMAN_OP_DISJOINT_XOR"; + + case PIXMAN_OP_CONJOINT_CLEAR: return "PIXMAN_OP_CONJOINT_CLEAR"; + case PIXMAN_OP_CONJOINT_SRC: return "PIXMAN_OP_CONJOINT_SRC"; + case PIXMAN_OP_CONJOINT_DST: return "PIXMAN_OP_CONJOINT_DST"; + case PIXMAN_OP_CONJOINT_OVER: return "PIXMAN_OP_CONJOINT_OVER"; + case PIXMAN_OP_CONJOINT_OVER_REVERSE: return "PIXMAN_OP_CONJOINT_OVER_REVERSE"; + case PIXMAN_OP_CONJOINT_IN: return "PIXMAN_OP_CONJOINT_IN"; + case PIXMAN_OP_CONJOINT_IN_REVERSE: return "PIXMAN_OP_CONJOINT_IN_REVERSE"; + case PIXMAN_OP_CONJOINT_OUT: return "PIXMAN_OP_CONJOINT_OUT"; + case PIXMAN_OP_CONJOINT_OUT_REVERSE: return "PIXMAN_OP_CONJOINT_OUT_REVERSE"; + case PIXMAN_OP_CONJOINT_ATOP: return "PIXMAN_OP_CONJOINT_ATOP"; + case PIXMAN_OP_CONJOINT_ATOP_REVERSE: return "PIXMAN_OP_CONJOINT_ATOP_REVERSE"; + case PIXMAN_OP_CONJOINT_XOR: return "PIXMAN_OP_CONJOINT_XOR"; + + case PIXMAN_OP_MULTIPLY: return "PIXMAN_OP_MULTIPLY"; + case PIXMAN_OP_SCREEN: return "PIXMAN_OP_SCREEN"; + case PIXMAN_OP_OVERLAY: return "PIXMAN_OP_OVERLAY"; + case PIXMAN_OP_DARKEN: return "PIXMAN_OP_DARKEN"; + case PIXMAN_OP_LIGHTEN: return "PIXMAN_OP_LIGHTEN"; + case PIXMAN_OP_COLOR_DODGE: return "PIXMAN_OP_COLOR_DODGE"; + case PIXMAN_OP_COLOR_BURN: return "PIXMAN_OP_COLOR_BURN"; + case PIXMAN_OP_HARD_LIGHT: return "PIXMAN_OP_HARD_LIGHT"; + case PIXMAN_OP_SOFT_LIGHT: return "PIXMAN_OP_SOFT_LIGHT"; + case PIXMAN_OP_DIFFERENCE: return "PIXMAN_OP_DIFFERENCE"; + case PIXMAN_OP_EXCLUSION: return "PIXMAN_OP_EXCLUSION"; + case PIXMAN_OP_HSL_HUE: return "PIXMAN_OP_HSL_HUE"; + case PIXMAN_OP_HSL_SATURATION: return "PIXMAN_OP_HSL_SATURATION"; + case PIXMAN_OP_HSL_COLOR: return "PIXMAN_OP_HSL_COLOR"; + case PIXMAN_OP_HSL_LUMINOSITY: return "PIXMAN_OP_HSL_LUMINOSITY"; + + case PIXMAN_OP_NONE: + return "<invalid operator 'none'>"; + }; + + return "<unknown operator>"; +} + +const char * +format_name (pixman_format_code_t format) +{ + switch (format) + { +/* 32bpp formats */ + case PIXMAN_a8r8g8b8: return "a8r8g8b8"; + case PIXMAN_x8r8g8b8: return "x8r8g8b8"; + case PIXMAN_a8b8g8r8: return "a8b8g8r8"; + case PIXMAN_x8b8g8r8: return "x8b8g8r8"; + case PIXMAN_b8g8r8a8: return "b8g8r8a8"; + case PIXMAN_b8g8r8x8: return "b8g8r8x8"; + case PIXMAN_r8g8b8a8: return "r8g8b8a8"; + case PIXMAN_r8g8b8x8: return "r8g8b8x8"; + case PIXMAN_x14r6g6b6: return "x14r6g6b6"; + case PIXMAN_x2r10g10b10: return "x2r10g10b10"; + case PIXMAN_a2r10g10b10: return "a2r10g10b10"; + case PIXMAN_x2b10g10r10: return "x2b10g10r10"; + case PIXMAN_a2b10g10r10: return "a2b10g10r10"; + +/* sRGB formats */ + case PIXMAN_a8r8g8b8_sRGB: return "a8r8g8b8_sRGB"; + +/* 24bpp formats */ + case PIXMAN_r8g8b8: return "r8g8b8"; + case PIXMAN_b8g8r8: return "b8g8r8"; + +/* 16bpp formats */ + case PIXMAN_r5g6b5: return "r5g6b5"; + case PIXMAN_b5g6r5: return "b5g6r5"; + + case PIXMAN_a1r5g5b5: return "a1r5g5b5"; + case PIXMAN_x1r5g5b5: return "x1r5g5b5"; + case PIXMAN_a1b5g5r5: return "a1b5g5r5"; + case PIXMAN_x1b5g5r5: return "x1b5g5r5"; + case PIXMAN_a4r4g4b4: return "a4r4g4b4"; + case PIXMAN_x4r4g4b4: return "x4r4g4b4"; + case PIXMAN_a4b4g4r4: return "a4b4g4r4"; + case PIXMAN_x4b4g4r4: return "x4b4g4r4"; + +/* 8bpp formats */ + case PIXMAN_a8: return "a8"; + case PIXMAN_r3g3b2: return "r3g3b2"; + case PIXMAN_b2g3r3: return "b2g3r3"; + case PIXMAN_a2r2g2b2: return "a2r2g2b2"; + case PIXMAN_a2b2g2r2: return "a2b2g2r2"; + +#if 0 + case PIXMAN_x4c4: return "x4c4"; + case PIXMAN_g8: return "g8"; +#endif + case PIXMAN_c8: return "x4c4 / c8"; + case PIXMAN_x4g4: return "x4g4 / g8"; + + case PIXMAN_x4a4: return "x4a4"; + +/* 4bpp formats */ + case PIXMAN_a4: return "a4"; + case PIXMAN_r1g2b1: return "r1g2b1"; + case PIXMAN_b1g2r1: return "b1g2r1"; + case PIXMAN_a1r1g1b1: return "a1r1g1b1"; + case PIXMAN_a1b1g1r1: return "a1b1g1r1"; + + case PIXMAN_c4: return "c4"; + case PIXMAN_g4: return "g4"; + +/* 1bpp formats */ + case PIXMAN_a1: return "a1"; + + case PIXMAN_g1: return "g1"; + +/* YUV formats */ + case PIXMAN_yuy2: return "yuy2"; + case PIXMAN_yv12: return "yv12"; + }; + + /* Fake formats. + * + * This is separate switch to prevent GCC from complaining + * that the values are not in the pixman_format_code_t enum. + */ + switch ((uint32_t)format) + { + case PIXMAN_null: return "null"; + case PIXMAN_solid: return "solid"; + case PIXMAN_pixbuf: return "pixbuf"; + case PIXMAN_rpixbuf: return "rpixbuf"; + case PIXMAN_unknown: return "unknown"; + }; + + return "<unknown format>"; +}; + +static double +calc_op (pixman_op_t op, double src, double dst, double srca, double dsta) +{ +#define mult_chan(src, dst, Fa, Fb) MIN ((src) * (Fa) + (dst) * (Fb), 1.0) + + double Fa, Fb; + + switch (op) + { + case PIXMAN_OP_CLEAR: + case PIXMAN_OP_DISJOINT_CLEAR: + case PIXMAN_OP_CONJOINT_CLEAR: + return mult_chan (src, dst, 0.0, 0.0); + + case PIXMAN_OP_SRC: + case PIXMAN_OP_DISJOINT_SRC: + case PIXMAN_OP_CONJOINT_SRC: + return mult_chan (src, dst, 1.0, 0.0); + + case PIXMAN_OP_DST: + case PIXMAN_OP_DISJOINT_DST: + case PIXMAN_OP_CONJOINT_DST: + return mult_chan (src, dst, 0.0, 1.0); + + case PIXMAN_OP_OVER: + return mult_chan (src, dst, 1.0, 1.0 - srca); + + case PIXMAN_OP_OVER_REVERSE: + return mult_chan (src, dst, 1.0 - dsta, 1.0); + + case PIXMAN_OP_IN: + return mult_chan (src, dst, dsta, 0.0); + + case PIXMAN_OP_IN_REVERSE: + return mult_chan (src, dst, 0.0, srca); + + case PIXMAN_OP_OUT: + return mult_chan (src, dst, 1.0 - dsta, 0.0); + + case PIXMAN_OP_OUT_REVERSE: + return mult_chan (src, dst, 0.0, 1.0 - srca); + + case PIXMAN_OP_ATOP: + return mult_chan (src, dst, dsta, 1.0 - srca); + + case PIXMAN_OP_ATOP_REVERSE: + return mult_chan (src, dst, 1.0 - dsta, srca); + + case PIXMAN_OP_XOR: + return mult_chan (src, dst, 1.0 - dsta, 1.0 - srca); + + case PIXMAN_OP_ADD: + return mult_chan (src, dst, 1.0, 1.0); + + case PIXMAN_OP_SATURATE: + case PIXMAN_OP_DISJOINT_OVER_REVERSE: + if (srca == 0.0) + Fa = 1.0; + else + Fa = MIN (1.0, (1.0 - dsta) / srca); + return mult_chan (src, dst, Fa, 1.0); + + case PIXMAN_OP_DISJOINT_OVER: + if (dsta == 0.0) + Fb = 1.0; + else + Fb = MIN (1.0, (1.0 - srca) / dsta); + return mult_chan (src, dst, 1.0, Fb); + + case PIXMAN_OP_DISJOINT_IN: + if (srca == 0.0) + Fa = 0.0; + else + Fa = MAX (0.0, 1.0 - (1.0 - dsta) / srca); + return mult_chan (src, dst, Fa, 0.0); + + case PIXMAN_OP_DISJOINT_IN_REVERSE: + if (dsta == 0.0) + Fb = 0.0; + else + Fb = MAX (0.0, 1.0 - (1.0 - srca) / dsta); + return mult_chan (src, dst, 0.0, Fb); + + case PIXMAN_OP_DISJOINT_OUT: + if (srca == 0.0) + Fa = 1.0; + else + Fa = MIN (1.0, (1.0 - dsta) / srca); + return mult_chan (src, dst, Fa, 0.0); + + case PIXMAN_OP_DISJOINT_OUT_REVERSE: + if (dsta == 0.0) + Fb = 1.0; + else + Fb = MIN (1.0, (1.0 - srca) / dsta); + return mult_chan (src, dst, 0.0, Fb); + + case PIXMAN_OP_DISJOINT_ATOP: + if (srca == 0.0) + Fa = 0.0; + else + Fa = MAX (0.0, 1.0 - (1.0 - dsta) / srca); + if (dsta == 0.0) + Fb = 1.0; + else + Fb = MIN (1.0, (1.0 - srca) / dsta); + return mult_chan (src, dst, Fa, Fb); + + case PIXMAN_OP_DISJOINT_ATOP_REVERSE: + if (srca == 0.0) + Fa = 1.0; + else + Fa = MIN (1.0, (1.0 - dsta) / srca); + if (dsta == 0.0) + Fb = 0.0; + else + Fb = MAX (0.0, 1.0 - (1.0 - srca) / dsta); + return mult_chan (src, dst, Fa, Fb); + + case PIXMAN_OP_DISJOINT_XOR: + if (srca == 0.0) + Fa = 1.0; + else + Fa = MIN (1.0, (1.0 - dsta) / srca); + if (dsta == 0.0) + Fb = 1.0; + else + Fb = MIN (1.0, (1.0 - srca) / dsta); + return mult_chan (src, dst, Fa, Fb); + + case PIXMAN_OP_CONJOINT_OVER: + if (dsta == 0.0) + Fb = 0.0; + else + Fb = MAX (0.0, 1.0 - srca / dsta); + return mult_chan (src, dst, 1.0, Fb); + + case PIXMAN_OP_CONJOINT_OVER_REVERSE: + if (srca == 0.0) + Fa = 0.0; + else + Fa = MAX (0.0, 1.0 - dsta / srca); + return mult_chan (src, dst, Fa, 1.0); + + case PIXMAN_OP_CONJOINT_IN: + if (srca == 0.0) + Fa = 1.0; + else + Fa = MIN (1.0, dsta / srca); + return mult_chan (src, dst, Fa, 0.0); + + case PIXMAN_OP_CONJOINT_IN_REVERSE: + if (dsta == 0.0) + Fb = 1.0; + else + Fb = MIN (1.0, srca / dsta); + return mult_chan (src, dst, 0.0, Fb); + + case PIXMAN_OP_CONJOINT_OUT: + if (srca == 0.0) + Fa = 0.0; + else + Fa = MAX (0.0, 1.0 - dsta / srca); + return mult_chan (src, dst, Fa, 0.0); + + case PIXMAN_OP_CONJOINT_OUT_REVERSE: + if (dsta == 0.0) + Fb = 0.0; + else + Fb = MAX (0.0, 1.0 - srca / dsta); + return mult_chan (src, dst, 0.0, Fb); + + case PIXMAN_OP_CONJOINT_ATOP: + if (srca == 0.0) + Fa = 1.0; + else + Fa = MIN (1.0, dsta / srca); + if (dsta == 0.0) + Fb = 0.0; + else + Fb = MAX (0.0, 1.0 - srca / dsta); + return mult_chan (src, dst, Fa, Fb); + + case PIXMAN_OP_CONJOINT_ATOP_REVERSE: + if (srca == 0.0) + Fa = 0.0; + else + Fa = MAX (0.0, 1.0 - dsta / srca); + if (dsta == 0.0) + Fb = 1.0; + else + Fb = MIN (1.0, srca / dsta); + return mult_chan (src, dst, Fa, Fb); + + case PIXMAN_OP_CONJOINT_XOR: + if (srca == 0.0) + Fa = 0.0; + else + Fa = MAX (0.0, 1.0 - dsta / srca); + if (dsta == 0.0) + Fb = 0.0; + else + Fb = MAX (0.0, 1.0 - srca / dsta); + return mult_chan (src, dst, Fa, Fb); + + case PIXMAN_OP_MULTIPLY: + case PIXMAN_OP_SCREEN: + case PIXMAN_OP_OVERLAY: + case PIXMAN_OP_DARKEN: + case PIXMAN_OP_LIGHTEN: + case PIXMAN_OP_COLOR_DODGE: + case PIXMAN_OP_COLOR_BURN: + case PIXMAN_OP_HARD_LIGHT: + case PIXMAN_OP_SOFT_LIGHT: + case PIXMAN_OP_DIFFERENCE: + case PIXMAN_OP_EXCLUSION: + case PIXMAN_OP_HSL_HUE: + case PIXMAN_OP_HSL_SATURATION: + case PIXMAN_OP_HSL_COLOR: + case PIXMAN_OP_HSL_LUMINOSITY: + default: + abort(); + return 0; /* silence MSVC */ + } +#undef mult_chan +} + +void +do_composite (pixman_op_t op, + const color_t *src, + const color_t *mask, + const color_t *dst, + color_t *result, + pixman_bool_t component_alpha) +{ + color_t srcval, srcalpha; + + if (mask == NULL) + { + srcval = *src; + + srcalpha.r = src->a; + srcalpha.g = src->a; + srcalpha.b = src->a; + srcalpha.a = src->a; + } + else if (component_alpha) + { + srcval.r = src->r * mask->r; + srcval.g = src->g * mask->g; + srcval.b = src->b * mask->b; + srcval.a = src->a * mask->a; + + srcalpha.r = src->a * mask->r; + srcalpha.g = src->a * mask->g; + srcalpha.b = src->a * mask->b; + srcalpha.a = src->a * mask->a; + } + else + { + srcval.r = src->r * mask->a; + srcval.g = src->g * mask->a; + srcval.b = src->b * mask->a; + srcval.a = src->a * mask->a; + + srcalpha.r = src->a * mask->a; + srcalpha.g = src->a * mask->a; + srcalpha.b = src->a * mask->a; + srcalpha.a = src->a * mask->a; + } + + result->r = calc_op (op, srcval.r, dst->r, srcalpha.r, dst->a); + result->g = calc_op (op, srcval.g, dst->g, srcalpha.g, dst->a); + result->b = calc_op (op, srcval.b, dst->b, srcalpha.b, dst->a); + result->a = calc_op (op, srcval.a, dst->a, srcalpha.a, dst->a); +} + +static double +round_channel (double p, int m) +{ + int t; + double r; + + t = p * ((1 << m)); + t -= t >> m; + + r = t / (double)((1 << m) - 1); + + return r; +} + +void +round_color (pixman_format_code_t format, color_t *color) +{ + if (PIXMAN_FORMAT_R (format) == 0) + { + color->r = 0.0; + color->g = 0.0; + color->b = 0.0; + } + else + { + color->r = round_channel (color->r, PIXMAN_FORMAT_R (format)); + color->g = round_channel (color->g, PIXMAN_FORMAT_G (format)); + color->b = round_channel (color->b, PIXMAN_FORMAT_B (format)); + } + + if (PIXMAN_FORMAT_A (format) == 0) + color->a = 1; + else + color->a = round_channel (color->a, PIXMAN_FORMAT_A (format)); +} + +/* Check whether @pixel is a valid quantization of the a, r, g, b + * parameters. Some slack is permitted. + */ +void +pixel_checker_init (pixel_checker_t *checker, pixman_format_code_t format) +{ + assert (PIXMAN_FORMAT_VIS (format)); + + checker->format = format; + + switch (PIXMAN_FORMAT_TYPE (format)) + { + case PIXMAN_TYPE_A: + checker->bs = 0; + checker->gs = 0; + checker->rs = 0; + checker->as = 0; + break; + + case PIXMAN_TYPE_ARGB: + case PIXMAN_TYPE_ARGB_SRGB: + checker->bs = 0; + checker->gs = checker->bs + PIXMAN_FORMAT_B (format); + checker->rs = checker->gs + PIXMAN_FORMAT_G (format); + checker->as = checker->rs + PIXMAN_FORMAT_R (format); + break; + + case PIXMAN_TYPE_ABGR: + checker->rs = 0; + checker->gs = checker->rs + PIXMAN_FORMAT_R (format); + checker->bs = checker->gs + PIXMAN_FORMAT_G (format); + checker->as = checker->bs + PIXMAN_FORMAT_B (format); + break; + + case PIXMAN_TYPE_BGRA: + /* With BGRA formats we start counting at the high end of the pixel */ + checker->bs = PIXMAN_FORMAT_BPP (format) - PIXMAN_FORMAT_B (format); + checker->gs = checker->bs - PIXMAN_FORMAT_B (format); + checker->rs = checker->gs - PIXMAN_FORMAT_G (format); + checker->as = checker->rs - PIXMAN_FORMAT_R (format); + break; + + case PIXMAN_TYPE_RGBA: + /* With BGRA formats we start counting at the high end of the pixel */ + checker->rs = PIXMAN_FORMAT_BPP (format) - PIXMAN_FORMAT_R (format); + checker->gs = checker->rs - PIXMAN_FORMAT_R (format); + checker->bs = checker->gs - PIXMAN_FORMAT_G (format); + checker->as = checker->bs - PIXMAN_FORMAT_B (format); + break; + + default: + assert (0); + break; + } + + checker->am = ((1 << PIXMAN_FORMAT_A (format)) - 1) << checker->as; + checker->rm = ((1 << PIXMAN_FORMAT_R (format)) - 1) << checker->rs; + checker->gm = ((1 << PIXMAN_FORMAT_G (format)) - 1) << checker->gs; + checker->bm = ((1 << PIXMAN_FORMAT_B (format)) - 1) << checker->bs; + + checker->aw = PIXMAN_FORMAT_A (format); + checker->rw = PIXMAN_FORMAT_R (format); + checker->gw = PIXMAN_FORMAT_G (format); + checker->bw = PIXMAN_FORMAT_B (format); +} + +void +pixel_checker_split_pixel (const pixel_checker_t *checker, uint32_t pixel, + int *a, int *r, int *g, int *b) +{ + *a = (pixel & checker->am) >> checker->as; + *r = (pixel & checker->rm) >> checker->rs; + *g = (pixel & checker->gm) >> checker->gs; + *b = (pixel & checker->bm) >> checker->bs; +} + +void +pixel_checker_get_masks (const pixel_checker_t *checker, + uint32_t *am, + uint32_t *rm, + uint32_t *gm, + uint32_t *bm) +{ + if (am) + *am = checker->am; + if (rm) + *rm = checker->rm; + if (gm) + *gm = checker->gm; + if (bm) + *bm = checker->bm; +} + +void +pixel_checker_convert_pixel_to_color (const pixel_checker_t *checker, + uint32_t pixel, color_t *color) +{ + int a, r, g, b; + + pixel_checker_split_pixel (checker, pixel, &a, &r, &g, &b); + + if (checker->am == 0) + color->a = 1.0; + else + color->a = a / (double)(checker->am >> checker->as); + + if (checker->rm == 0) + color->r = 0.0; + else + color->r = r / (double)(checker->rm >> checker->rs); + + if (checker->gm == 0) + color->g = 0.0; + else + color->g = g / (double)(checker->gm >> checker->gs); + + if (checker->bm == 0) + color->b = 0.0; + else + color->b = b / (double)(checker->bm >> checker->bs); + + if (PIXMAN_FORMAT_TYPE (checker->format) == PIXMAN_TYPE_ARGB_SRGB) + { + color->r = convert_srgb_to_linear (color->r); + color->g = convert_srgb_to_linear (color->g); + color->b = convert_srgb_to_linear (color->b); + } +} + +static int32_t +convert (double v, uint32_t width, uint32_t mask, uint32_t shift, double def) +{ + int32_t r; + + if (!mask) + v = def; + + r = (v * ((mask >> shift) + 1)); + r -= r >> width; + + return r; +} + +static void +get_limits (const pixel_checker_t *checker, double limit, + color_t *color, + int *ao, int *ro, int *go, int *bo) +{ + color_t tmp; + + if (PIXMAN_FORMAT_TYPE (checker->format) == PIXMAN_TYPE_ARGB_SRGB) + { + tmp.a = color->a; + tmp.r = convert_linear_to_srgb (color->r); + tmp.g = convert_linear_to_srgb (color->g); + tmp.b = convert_linear_to_srgb (color->b); + + color = &tmp; + } + + *ao = convert (color->a + limit, checker->aw, checker->am, checker->as, 1.0); + *ro = convert (color->r + limit, checker->rw, checker->rm, checker->rs, 0.0); + *go = convert (color->g + limit, checker->gw, checker->gm, checker->gs, 0.0); + *bo = convert (color->b + limit, checker->bw, checker->bm, checker->bs, 0.0); +} + +/* The acceptable deviation in units of [0.0, 1.0] + */ +#define DEVIATION (0.0064) + +void +pixel_checker_get_max (const pixel_checker_t *checker, color_t *color, + int *am, int *rm, int *gm, int *bm) +{ + get_limits (checker, DEVIATION, color, am, rm, gm, bm); +} + +void +pixel_checker_get_min (const pixel_checker_t *checker, color_t *color, + int *am, int *rm, int *gm, int *bm) +{ + get_limits (checker, - DEVIATION, color, am, rm, gm, bm); +} + +pixman_bool_t +pixel_checker_check (const pixel_checker_t *checker, uint32_t pixel, + color_t *color) +{ + int32_t a_lo, a_hi, r_lo, r_hi, g_lo, g_hi, b_lo, b_hi; + int32_t ai, ri, gi, bi; + pixman_bool_t result; + + pixel_checker_get_min (checker, color, &a_lo, &r_lo, &g_lo, &b_lo); + pixel_checker_get_max (checker, color, &a_hi, &r_hi, &g_hi, &b_hi); + pixel_checker_split_pixel (checker, pixel, &ai, &ri, &gi, &bi); + + result = + a_lo <= ai && ai <= a_hi && + r_lo <= ri && ri <= r_hi && + g_lo <= gi && gi <= g_hi && + b_lo <= bi && bi <= b_hi; + + return result; +} diff --git a/src/pixman/test/utils.h b/src/pixman/test/utils.h new file mode 100644 index 0000000..ebb14d9 --- /dev/null +++ b/src/pixman/test/utils.h @@ -0,0 +1,247 @@ +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <assert.h> +#include "pixman-private.h" /* For 'inline' definition */ +#include "utils-prng.h" + +#if defined(_MSC_VER) +#define snprintf _snprintf +#define strcasecmp _stricmp +#endif + +#define ARRAY_LENGTH(A) ((int) (sizeof (A) / sizeof ((A) [0]))) + +/* A primitive pseudorandom number generator, + * taken from POSIX.1-2001 example + */ + +extern prng_t prng_state_data; +extern prng_t *prng_state; +#ifdef USE_OPENMP +#pragma omp threadprivate(prng_state_data) +#pragma omp threadprivate(prng_state) +#endif + +static inline uint32_t +prng_rand (void) +{ + return prng_rand_r (prng_state); +} + +static inline void +prng_srand (uint32_t seed) +{ + if (!prng_state) + { + /* Without setting a seed, PRNG does not work properly (is just + * returning zeros). So we only initialize the pointer here to + * make sure that 'prng_srand' is always called before any + * other 'prng_*' function. The wrongdoers violating this order + * will get a segfault. */ + prng_state = &prng_state_data; + } + prng_srand_r (prng_state, seed); +} + +static inline uint32_t +prng_rand_n (int max) +{ + return prng_rand () % max; +} + +static inline void +prng_randmemset (void *buffer, size_t size, prng_randmemset_flags_t flags) +{ + prng_randmemset_r (prng_state, buffer, size, flags); +} + +/* CRC 32 computation + */ +uint32_t +compute_crc32 (uint32_t in_crc32, + const void *buf, + size_t buf_len); + +uint32_t +compute_crc32_for_image (uint32_t in_crc32, + pixman_image_t *image); + +/* Print the image in hexadecimal */ +void +print_image (pixman_image_t *image); + +/* Returns TRUE if running on a little endian system + */ +static force_inline pixman_bool_t +is_little_endian (void) +{ + unsigned long endian_check_var = 1; + return *(unsigned char *)&endian_check_var == 1; +} + +/* perform endian conversion of pixel data + */ +void +image_endian_swap (pixman_image_t *img); + +/* Allocate memory that is bounded by protected pages, + * so that out-of-bounds access will cause segfaults + */ +void * +fence_malloc (int64_t len); + +void +fence_free (void *data); + +/* Generate n_bytes random bytes in fence_malloced memory */ +uint8_t * +make_random_bytes (int n_bytes); + +/* Return current time in seconds */ +double +gettime (void); + +uint32_t +get_random_seed (void); + +/* main body of the fuzzer test */ +int +fuzzer_test_main (const char *test_name, + int default_number_of_iterations, + uint32_t expected_checksum, + uint32_t (*test_function)(int testnum, int verbose), + int argc, + const char *argv[]); + +void +fail_after (int seconds, const char *msg); + +/* If possible, enable traps for floating point exceptions */ +void enable_divbyzero_exceptions(void); + +/* Converts a8r8g8b8 pixels to pixels that + * - are not premultiplied, + * - are stored in this order in memory: R, G, B, A, regardless of + * the endianness of the computer. + * It is allowed for @src and @dst to point to the same memory buffer. + */ +void +a8r8g8b8_to_rgba_np (uint32_t *dst, uint32_t *src, int n_pixels); + +pixman_bool_t +write_png (pixman_image_t *image, const char *filename); + +void +draw_checkerboard (pixman_image_t *image, + int check_size, + uint32_t color1, uint32_t color2); + +/* A pair of macros which can help to detect corruption of + * floating point registers after a function call. This may + * happen if _mm_empty() call is forgotten in MMX/SSE2 fast + * path code, or ARM NEON assembly optimized function forgets + * to save/restore d8-d15 registers before use. + */ + +#define FLOAT_REGS_CORRUPTION_DETECTOR_START() \ + static volatile double frcd_volatile_constant1 = 123451; \ + static volatile double frcd_volatile_constant2 = 123452; \ + static volatile double frcd_volatile_constant3 = 123453; \ + static volatile double frcd_volatile_constant4 = 123454; \ + static volatile double frcd_volatile_constant5 = 123455; \ + static volatile double frcd_volatile_constant6 = 123456; \ + static volatile double frcd_volatile_constant7 = 123457; \ + static volatile double frcd_volatile_constant8 = 123458; \ + double frcd_canary_variable1 = frcd_volatile_constant1; \ + double frcd_canary_variable2 = frcd_volatile_constant2; \ + double frcd_canary_variable3 = frcd_volatile_constant3; \ + double frcd_canary_variable4 = frcd_volatile_constant4; \ + double frcd_canary_variable5 = frcd_volatile_constant5; \ + double frcd_canary_variable6 = frcd_volatile_constant6; \ + double frcd_canary_variable7 = frcd_volatile_constant7; \ + double frcd_canary_variable8 = frcd_volatile_constant8; + +#define FLOAT_REGS_CORRUPTION_DETECTOR_FINISH() \ + assert (frcd_canary_variable1 == frcd_volatile_constant1); \ + assert (frcd_canary_variable2 == frcd_volatile_constant2); \ + assert (frcd_canary_variable3 == frcd_volatile_constant3); \ + assert (frcd_canary_variable4 == frcd_volatile_constant4); \ + assert (frcd_canary_variable5 == frcd_volatile_constant5); \ + assert (frcd_canary_variable6 == frcd_volatile_constant6); \ + assert (frcd_canary_variable7 == frcd_volatile_constant7); \ + assert (frcd_canary_variable8 == frcd_volatile_constant8); + +/* Try to get an aligned memory chunk */ +void * +aligned_malloc (size_t align, size_t size); + +double +convert_srgb_to_linear (double component); + +double +convert_linear_to_srgb (double component); + +void +initialize_palette (pixman_indexed_t *palette, uint32_t depth, int is_rgb); + +const char * +operator_name (pixman_op_t op); + +const char * +format_name (pixman_format_code_t format); + +typedef struct +{ + double r, g, b, a; +} color_t; + +void +do_composite (pixman_op_t op, + const color_t *src, + const color_t *mask, + const color_t *dst, + color_t *result, + pixman_bool_t component_alpha); + +void +round_color (pixman_format_code_t format, color_t *color); + +typedef struct +{ + pixman_format_code_t format; + uint32_t am, rm, gm, bm; + uint32_t as, rs, gs, bs; + uint32_t aw, rw, gw, bw; +} pixel_checker_t; + +void +pixel_checker_init (pixel_checker_t *checker, pixman_format_code_t format); + +void +pixel_checker_split_pixel (const pixel_checker_t *checker, uint32_t pixel, + int *a, int *r, int *g, int *b); + +void +pixel_checker_get_max (const pixel_checker_t *checker, color_t *color, + int *a, int *r, int *g, int *b); + +void +pixel_checker_get_min (const pixel_checker_t *checker, color_t *color, + int *a, int *r, int *g, int *b); + +pixman_bool_t +pixel_checker_check (const pixel_checker_t *checker, + uint32_t pixel, color_t *color); + +void +pixel_checker_convert_pixel_to_color (const pixel_checker_t *checker, + uint32_t pixel, color_t *color); + +void +pixel_checker_get_masks (const pixel_checker_t *checker, + uint32_t *am, + uint32_t *rm, + uint32_t *gm, + uint32_t *bm); |